# Logistic Regression, Random Forest, Decision Tree, KNN, N-Bayes, SVM

In [32]:
# Importing Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Data Collection and Data Analysis
heart_ds = pd.read_csv('heart.csv')

# Data Pre - Processing
X = heart_ds.drop(columns='target', axis=1)
Y = heart_ds['target']

# Splitting the data into Training and Testing data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

# Models
models = {
    'Logistic Regression': LogisticRegression(),
    'Random Forest': RandomForestClassifier(random_state=2),
    'Decision Tree': DecisionTreeClassifier(random_state=2),
    'KNN': KNeighborsClassifier(),
    'Naive Bayes': GaussianNB(),
    'SVM': SVC()
}

# Training and Evaluation
for name, model in models.items():
    print("Training", name)
    model.fit(X_train, Y_train)
    
    # Training Accuracy
    train_pred = model.predict(X_train)
    train_acc = accuracy_score(train_pred, Y_train)
    print('Training Accuracy:', train_acc)
    
    # Testing Accuracy
    test_pred = model.predict(X_test)
    test_acc = accuracy_score(test_pred, Y_test)
    print('Testing Accuracy:', test_acc)
    
    # Confusion Matrix
    conf_matrix = confusion_matrix(Y_test, test_pred)
    print("Confusion Matrix =\n", conf_matrix)
    
    # Classification Report
    class_report = classification_report(Y_test, test_pred)
    print("Classification Report =\n", class_report)
    
    print()


Training Logistic Regression
Training Accuracy: 0.8512396694214877
Testing Accuracy: 0.819672131147541
Confusion Matrix =
 [[23  5]
 [ 6 27]]
Classification Report =
               precision    recall  f1-score   support

           0       0.79      0.82      0.81        28
           1       0.84      0.82      0.83        33

    accuracy                           0.82        61
   macro avg       0.82      0.82      0.82        61
weighted avg       0.82      0.82      0.82        61


Training Random Forest
Training Accuracy: 1.0
Testing Accuracy: 0.7868852459016393
Confusion Matrix =
 [[22  6]
 [ 7 26]]
Classification Report =
               precision    recall  f1-score   support

           0       0.76      0.79      0.77        28
           1       0.81      0.79      0.80        33

    accuracy                           0.79        61
   macro avg       0.79      0.79      0.79        61
weighted avg       0.79      0.79      0.79        61


Training Decision Tree
Trainin

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


# Logistic Regression, Random Forest, Decision Tree, KNN, N-Bayes, SVM (Changed Version of the Code)

In this code, I added feature scaling using StandardScaler to standardize the features, which can improve the performance of some models. For SVM, I performed hyperparameter tuning using GridSearchCV to find the best parameters for the model. This process helps in optimizing the model's performance. You can further experiment with different hyperparameters or preprocessing techniques to achieve even better accuracy.

In [30]:
# Importing Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Data Collection and Data Analysis
heart_ds = pd.read_csv('heart.csv')

# Data Pre - Processing
X = heart_ds.drop(columns='target', axis=1)
Y = heart_ds['target']

# Splitting the data into Training and Testing data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Models
models = {
    'Logistic Regression': LogisticRegression(),
    'Random Forest': RandomForestClassifier(random_state=2),
    'Decision Tree': DecisionTreeClassifier(random_state=2),
    'KNN': KNeighborsClassifier(),
    'Naive Bayes': GaussianNB(),
    'SVM': SVC()
}

# Hyperparameter tuning
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': [1, 0.1, 0.01, 0.001],
    'kernel': ['rbf', 'linear', 'poly', 'sigmoid']
}

# Training and Evaluation
for name, model in models.items():
    print("Training", name)
    if name == 'SVM':
        grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')
        grid_search.fit(X_train_scaled, Y_train)
        best_model = grid_search.best_estimator_
        best_model.fit(X_train_scaled, Y_train)
    else:
        model.fit(X_train_scaled, Y_train)
        best_model = model
    
    # Training Accuracy
    train_pred = best_model.predict(X_train_scaled)
    train_acc = accuracy_score(train_pred, Y_train)
    print('Training Accuracy:', train_acc)
    
    # Testing Accuracy
    test_pred = best_model.predict(X_test_scaled)
    test_acc = accuracy_score(test_pred, Y_test)
    print('Testing Accuracy:', test_acc)
    
    # Confusion Matrix
    conf_matrix = confusion_matrix(Y_test, test_pred)
    print("Confusion Matrix =\n", conf_matrix)
    
    # Classification Report
    class_report = classification_report(Y_test, test_pred)
    print("Classification Report =\n", class_report)
    
    print()


Training Logistic Regression
Training Accuracy: 0.8471074380165289
Testing Accuracy: 0.7868852459016393
Confusion Matrix =
 [[22  6]
 [ 7 26]]
Classification Report =
               precision    recall  f1-score   support

           0       0.76      0.79      0.77        28
           1       0.81      0.79      0.80        33

    accuracy                           0.79        61
   macro avg       0.79      0.79      0.79        61
weighted avg       0.79      0.79      0.79        61


Training Random Forest
Training Accuracy: 1.0
Testing Accuracy: 0.7704918032786885
Confusion Matrix =
 [[22  6]
 [ 8 25]]
Classification Report =
               precision    recall  f1-score   support

           0       0.73      0.79      0.76        28
           1       0.81      0.76      0.78        33

    accuracy                           0.77        61
   macro avg       0.77      0.77      0.77        61
weighted avg       0.77      0.77      0.77        61


Training Decision Tree
Traini