In [29]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier 
from sklearn.ensemble import BaggingClassifier 
from sklearn.ensemble import VotingClassifier 
from sklearn.ensemble import AdaBoostClassifier 
from sklearn.ensemble import GradientBoostingClassifier 
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import joblib

In [3]:
df= pd.read_csv("Data/preprocessed_breast_cancer_data.csv")

In [4]:
X = df.drop('Classification', axis=1)
y = df['Classification']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
models = {
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Support Vector Machine': SVC(),
    'Voting Classifier': VotingClassifier(estimators=[
        ('lr', LogisticRegression()),
        ('dt', DecisionTreeClassifier()),
        ('rf', RandomForestClassifier()),
        ('svm', SVC())
    ]),
    'Bagging Classifier': BaggingClassifier(base_estimator=DecisionTreeClassifier(), n_estimators=10),
    'AdaBoost Classifier': AdaBoostClassifier(n_estimators=50),
    'Gradient Boosting Classifier': GradientBoostingClassifier(n_estimators=100)
}


In [9]:
def evaluate_model(true, predicted):
    accuracy= accuracy_score(true, predicted)
    precision = precision_score(true, predicted)
    recall = recall_score(true, predicted)
    f1 = f1_score(true, predicted)
    return accuracy, precision, recall, f1




In [27]:
model_list = []
accuracy_list = []
precision_list = []
recall_list = []
f1_list = []

# Loop through each model
for name, model in models.items():
    # Train the model
    model.fit(X_train, y_train)
    
    # Make predictions
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)
    
    # Evaluate the model
    train_accuracy, train_precision, train_recall, train_f1 = evaluate_model(y_train, y_train_pred)
    test_accuracy, test_precision, test_recall, test_f1 = evaluate_model(y_test, y_test_pred)
    
    # Print evaluation results
    print(name)
    model_list.append(name)
    
    print('Model performance for Training set')
    print("- Accuracy: {:.4f}".format(train_accuracy))
    print("- Precision: {:.4f}".format(train_precision))
    print("- Recall: {:.4f}".format(train_recall))
    print("- F1 Score: {:.4f}".format(train_f1))
    print('----------------------------------')
    
    print('Model performance for Test set')
    print("- Accuracy: {:.4f}".format(test_accuracy))
    print("- Precision: {:.4f}".format(test_precision))
    print("- Recall: {:.4f}".format(test_recall))
    print("- F1 Score: {:.4f}".format(test_f1))
    
    # Append evaluation results to lists
    accuracy_list.append(test_accuracy)
    precision_list.append(test_precision)
    recall_list.append(test_recall)
    f1_list.append(test_f1)
    
    print('='*35)
    print('\n')

Logistic Regression
Model performance for Training set
- Accuracy: 0.7609
- Precision: 0.7045
- Recall: 0.7750
- F1 Score: 0.7381
----------------------------------
Model performance for Test set
- Accuracy: 0.8750
- Precision: 0.8462
- Recall: 0.9167
- F1 Score: 0.8800


Decision Tree
Model performance for Training set
- Accuracy: 1.0000
- Precision: 1.0000
- Recall: 1.0000
- F1 Score: 1.0000
----------------------------------
Model performance for Test set
- Accuracy: 0.7917
- Precision: 0.8182
- Recall: 0.7500
- F1 Score: 0.7826


Random Forest
Model performance for Training set
- Accuracy: 1.0000
- Precision: 1.0000
- Recall: 1.0000
- F1 Score: 1.0000
----------------------------------
Model performance for Test set
- Accuracy: 0.8333
- Precision: 0.9000
- Recall: 0.7500
- F1 Score: 0.8182


Support Vector Machine
Model performance for Training set
- Accuracy: 0.8804
- Precision: 0.8718
- Recall: 0.8500
- F1 Score: 0.8608
----------------------------------
Model performance for Tes



AdaBoost Classifier
Model performance for Training set
- Accuracy: 1.0000
- Precision: 1.0000
- Recall: 1.0000
- F1 Score: 1.0000
----------------------------------
Model performance for Test set
- Accuracy: 0.7500
- Precision: 0.6875
- Recall: 0.9167
- F1 Score: 0.7857


Gradient Boosting Classifier
Model performance for Training set
- Accuracy: 1.0000
- Precision: 1.0000
- Recall: 1.0000
- F1 Score: 1.0000
----------------------------------
Model performance for Test set
- Accuracy: 0.8750
- Precision: 1.0000
- Recall: 0.7500
- F1 Score: 0.8571




In [30]:
# Train the model
model = SVC()
model.fit(X_train, y_train)

# Save the trained model to a file
joblib.dump(model, 'n svm_model.pkl')

['svm_model.pkl']