In [144]:
### Author : Prasad Meesala
# Importing the necessary modules

import numpy as np
import pandas as pd
from sklearn import ensemble, linear_model, svm, model_selection, datasets

In [145]:
# Collecting the data

data = datasets.load_iris()

df = pd.DataFrame(data.data, columns = data.feature_names)
df['target'] = data.target
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [146]:
X = df.drop('target', axis = 1).values
y = df['target'].values

X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size = 0.2)

# Using different classifiers to evaluate the model performance

lg = linear_model.LogisticRegression(max_iter = 10000)
svmc = svm.SVC(kernel = "linear")
rf = ensemble.RandomForestClassifier(n_estimators = 50)

def get_score(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    return model.score(X_test, y_test)

print(get_score(lg, X_train, X_test, y_train, y_test))
print(get_score(svmc, X_train, X_test, y_train, y_test))
print(get_score(rf, X_train, X_test, y_train, y_test))

0.9333333333333333
0.9666666666666667
0.9333333333333333


In [147]:
# Evaluating the model performance by creating folds and calculating score for each fold

def avg(x):
    return sum(x) / len(x)

folds = model_selection.StratifiedKFold(n_splits = 5)

lr_scores, svm_scores = [], []
rf_scores = lr_scores.copy()

for train_index, test_index in folds.split(X, y):
    X_train, X_test, y_train, y_test = X[train_index], X[test_index], y[train_index], y[test_index]
    lr_scores.append(get_score(lr, X_train, X_test, y_train, y_test)) 
    svm_scores.append(get_score(svmc, X_train, X_test, y_train, y_test))
    rf_scores.append(get_score(rf, X_train, X_test, y_train, y_test))

print(avg(lr_scores), avg(svm_scores), avg(rf_scores))

0.9733333333333334 0.9800000000000001 0.96


In [148]:
# Using pre-defined function cross_val_score() to calculate the scores for each fold

print(avg(model_selection.cross_val_score(lr, X, y)))
print(avg(model_selection.cross_val_score(svmc, X, y)))
print(avg(model_selection.cross_val_score(rf, X, y)))

0.9733333333333334
0.9800000000000001
0.96
