In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.datasets import load_breast_cancer
import numpy as np

In [2]:
#Step 2: Loading the Breast Cancer dataset
data = load_breast_cancer()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target # target: 0 = malignant, 1 = benign

In [3]:
# Step 3: Data Preprocessing (optional, depending on dataset)
# In this case, the data is already preprocessed, but further normalization or feature selection may be added.
# Step 4: Splitting the data into train and test sets
X = df.drop(columns='target') # features
y = df['target'] # target variable
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Step 5: Training the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
# Step 6: Predicting and evaluating the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Model Accuracy:", accuracy)
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Model Accuracy: 0.9649122807017544

Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.93      0.95        43
           1       0.96      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.97      0.96      0.96       114
weighted avg       0.97      0.96      0.96       114

Confusion Matrix:
 [[40  3]
 [ 1 70]]


In [7]:
# Step 7: Making recommendations based on prediction outcomes
def prognosis_recommendation(features):
    """
    Function to provide prognosis recommendation based on model predictions.
    :param features: Array of patient features
    :return: String recommendation
    """
    prediction = model.predict([features])
    if prediction[0] == 0:
        return "High risk of malignant cancer. Immediate consultation and further tests recommended."
    else:
        return "Benign results. Routine monitoring suggested, but follow up with a healthcare provider."

In [8]:
# Example usage
example_patient =X_test.iloc[0].values
recommendation = prognosis_recommendation(example_patient)
print("Recommendation:", recommendation)

Recommendation: Benign results. Routine monitoring suggested, but follow up with a healthcare provider.




In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.datasets import load_breast_cancer
import numpy as np

# Step 1: Load the Breast Cancer dataset
data = load_breast_cancer()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target  # target: 0 = malignant, 1 = benign

# Step 2: Data Preprocessing (optional but can include normalization or feature selection)
# In this case, the dataset is already well-preprocessed, but it's good practice to check for missing values or outliers.
print(f"Data Info:\n{df.info()}\n")

# Step 3: Splitting the data into features and target variable
X = df.drop(columns='target')  # Features (all columns except the target)
y = df['target']  # Target variable (0 = malignant, 1 = benign)

# Step 4: Split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Model Training with RandomForestClassifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Step 6: Model Evaluation: Making predictions and evaluating the model
y_pred = model.predict(X_test)

# Evaluating accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f"\nModel Accuracy: {accuracy * 100:.2f}%")

# Displaying the Classification Report and Confusion Matrix for further evaluation
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Step 7: Making recommendations based on model predictions
def prognosis_recommendation(features):
    """
    Function to provide prognosis recommendations based on model predictions.
    :param features: Array of patient features (values of the 30 features for a patient)
    :return: String recommendation based on predicted outcome
    """
    prediction = model.predict([features])  # Making prediction for the given features
    if prediction[0] == 0:
        return "High risk of malignant cancer. Immediate consultation and further tests recommended."
    else:
        return "Benign results. Routine monitoring suggested, but follow up with a healthcare provider."

# Step 8: Example Usage for Patient Recommendation
example_patient = X_test.iloc[0].values  # Selecting a random patient from the test set
recommendation = prognosis_recommendation(example_patient)  # Get prognosis recommendation for the patient
print("\nRecommendation for the Example Patient:")
print(recommendation)

# Optional: Show a few more example recommendations from the test set
print("\nAdditional Patient Recommendations:")
for i in range(1, 4):  # Displaying recommendations for first 3 patients in the test set
    patient_features = X_test.iloc[i].values
    rec = prognosis_recommendation(patient_features)
    print(f"Patient {i}: {rec}")


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568
Data columns (total 31 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   mean radius              569 non-null    float64
 1   mean texture             569 non-null    float64
 2   mean perimeter           569 non-null    float64
 3   mean area                569 non-null    float64
 4   mean smoothness          569 non-null    float64
 5   mean compactness         569 non-null    float64
 6   mean concavity           569 non-null    float64
 7   mean concave points      569 non-null    float64
 8   mean symmetry            569 non-null    float64
 9   mean fractal dimension   569 non-null    float64
 10  radius error             569 non-null    float64
 11  texture error            569 non-null    float64
 12  perimeter error          569 non-null    float64
 13  area error               569 non-null    float64
 14  smoothness error         5

