In [41]:
# Here we are building a Machine Learning models and testing out all the different models to see which one performs the best.

# Here we will 1st do it with cross_val_score and with out giving specific hyper parameters to each of the model.
# Then we will do the same process but by giving hyperparameters and here we will use GridSearchCV instead of cross_val_score

# Now the best model we got here is RandomForestClassifier(random_state=0) and it's parameters as {'n_estimators': 100}.

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier

In [3]:
a = pd.read_csv('/content/drive/MyDrive/ML Datasets/heart_disease_data.csv')
heart = pd.DataFrame(a)

In [4]:
heart

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,57,0,0,140,241,0,1,123,1,0.2,1,0,3,0
299,45,1,3,110,264,0,1,132,0,1.2,1,0,3,0
300,68,1,0,144,193,1,1,141,0,3.4,1,2,3,0
301,57,1,0,130,131,0,1,115,1,1.2,1,1,3,0


In [5]:
X = heart.drop(columns='target').values

In [8]:
Y = heart['target'].values

## **Cross Validation without hyper parameters**

In [9]:
models = [LogisticRegression(max_iter=1000), SVC(kernel='linear'), KNeighborsClassifier(), RandomForestClassifier()]

In [12]:
def cross_val_model():
  for model in models:
    cross_val = cross_val_score(model, X,Y, cv=5)
    mean_num = sum(cross_val)/len(cross_val)
    mean_num = mean_num*100
    mean_num = round(mean_num,2)
    print("The accuracy score of the ", model, "is = ", mean_num)

In [13]:
cross_val_model()

The accuracy score of the  LogisticRegression(max_iter=1000) is =  82.83
The accuracy score of the  SVC(kernel='linear') is =  82.83
The accuracy score of the  KNeighborsClassifier() is =  64.39
The accuracy score of the  RandomForestClassifier() is =  81.49


## **Now GridSearchCV with Hyperparameters**

In [29]:
models_list = [LogisticRegression(max_iter=10000), SVC(), KNeighborsClassifier(), RandomForestClassifier(random_state=0)]

In [30]:
models_hyperparameter = {'log_reg_hyper':{"C":[1,5,10,20]},
                    "SVM_hyper":{"kernel":['linear', 'poly', 'rbf', 'sigmoid'], "C":[1,5,10,20]},
                    "knn_hyper":{'n_neighbors':[3,5,10]},
                    "random_forest_hyper":{"n_estimators":[10,20,50,100]}
                    }

In [31]:
type(models_hyperparameter)

dict

In [32]:
models_hyperparameter.keys()

dict_keys(['log_reg_hyper', 'SVM_hyper', 'knn_hyper', 'random_forest_hyper'])

In [33]:
models_hyperparameter.values()

dict_values([{'C': [1, 5, 10, 20]}, {'kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 'C': [1, 5, 10, 20]}, {'n_neighbors': [3, 5, 10]}, {'n_estimators': [10, 20, 50, 100]}])

In [34]:
model_keys = list(models_hyperparameter.keys())
print(model_keys)

['log_reg_hyper', 'SVM_hyper', 'knn_hyper', 'random_forest_hyper']


In [35]:
models_hyperparameter[model_keys[0]]

{'C': [1, 5, 10, 20]}

In [36]:
# Gridsearch CV

In [39]:
def model_selection(models, hyperparameters):
  results=[]
  i = 0

  for model in models:
    key = model_keys[i]
    params = hyperparameters[key]
    i +=1

    print(model)
    print(params)

    classifier = GridSearchCV(model, params, cv=5)
    classifier.fit(X,Y)

    results.append({"Model":model, "Best Parameters":classifier.best_params_, "Best Score":classifier.best_score_})

  results_dataframe = pd.DataFrame(results, columns=["Model", "Best Parameters", "Best Score"])
  return results_dataframe

In [40]:
model_selection(models_list, models_hyperparameter)

LogisticRegression(max_iter=10000)
{'C': [1, 5, 10, 20]}
SVC()
{'kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 'C': [1, 5, 10, 20]}
KNeighborsClassifier()
{'n_neighbors': [3, 5, 10]}
RandomForestClassifier(random_state=0)
{'n_estimators': [10, 20, 50, 100]}


Unnamed: 0,Model,Best Parameters,Best Score
0,LogisticRegression(max_iter=10000),{'C': 5},0.831585
1,SVC(),"{'C': 1, 'kernel': 'linear'}",0.828306
2,KNeighborsClassifier(),{'n_neighbors': 5},0.64388
3,RandomForestClassifier(random_state=0),{'n_estimators': 100},0.838087


In [38]:
# After we are done with the above steps we will select

# RandomForestClassifier(random_state=0) and it's parameters as {'n_estimators': 100}	and build on our model, train our models and make predictions based on it.
