In [1]:
# Importing necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.metrics import accuracy_score, classification_report
from datetime import datetime

# Load the dataset
data = pd.read_csv("/content/breast_cancer_survival.csv") # Replace "your_dataset.csv" with the path to your dataset file

# Dropping rows with missing values
data.dropna(inplace=True)

# Separate features and target variable
X = data.drop(columns=["Patient_Status"])
y = data["Patient_Status"]

# Encoding categorical variables
encoder = LabelEncoder()
X_encoded = X.copy()
for col in X.columns:
    if X[col].dtype == 'object':
            X_encoded[col] = encoder.fit_transform(X[col])

# One-hot encoding categorical variables
categorical_cols = ['Tumour_Stage', 'Histology', 'ER status', 'PR status', 'HER2 status', 'Surgery_type']
X_encoded = pd.get_dummies(X_encoded, columns=categorical_cols)

            # Convert date variables to numerical representation
date_columns = ['Date_of_Surgery', 'Date_of_Last_Visit']
for col in date_columns:
        X_encoded[col] = pd.to_datetime(X_encoded[col], errors='coerce')
        X_encoded[col] = (X_encoded[col] - datetime(1970, 1, 1)).dt.total_seconds()

                    # Splitting the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)



In [2]:
from sklearn.ensemble import RandomForestClassifier

# Initializing Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Training the Random Forest model
rf_classifier.fit(X_train, y_train)

# Predicting on the test set
y_pred_rf = rf_classifier.predict(X_test)

# Evaluating the Random Forest model
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print("Random Forest Accuracy:", accuracy_rf)
print("\nRandom Forest Classification Report:\n", classification_report(y_test, y_pred_rf))


Random Forest Accuracy: 0.796875

Random Forest Classification Report:
               precision    recall  f1-score   support

       Alive       0.80      1.00      0.89        51
        Dead       0.00      0.00      0.00        13

    accuracy                           0.80        64
   macro avg       0.40      0.50      0.44        64
weighted avg       0.64      0.80      0.71        64



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [3]:
from sklearn.ensemble import AdaBoostClassifier

# Initializing AdaBoost classifier
adaboost_classifier = AdaBoostClassifier(n_estimators=50, random_state=42)

# Training the AdaBoost model
adaboost_classifier.fit(X_train, y_train)

# Predicting on the test set
y_pred_adaboost = adaboost_classifier.predict(X_test)

# Evaluating the AdaBoost model
accuracy_adaboost = accuracy_score(y_test, y_pred_adaboost)
print("AdaBoost Accuracy:", accuracy_adaboost)
print("\nAdaBoost Classification Report:\n", classification_report(y_test, y_pred_adaboost))


AdaBoost Accuracy: 0.765625

AdaBoost Classification Report:
               precision    recall  f1-score   support

       Alive       0.82      0.90      0.86        51
        Dead       0.38      0.23      0.29        13

    accuracy                           0.77        64
   macro avg       0.60      0.57      0.57        64
weighted avg       0.73      0.77      0.74        64

