In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.model_selection import cross_val_score

In [2]:
# load dataset
ds = pd.read_csv("pima-indians-diabetes.csv")

In [3]:
ds.head()

Unnamed: 0,num_preg,glucose_conc,diastolic_bp,thickness,insulin,bmi,diab_pred,age,diabetes
0,1,85,66,29,0,26.6,0.351,31,0
1,8,183,64,0,0,23.3,0.672,32,1
2,1,89,66,23,94,28.1,0.167,21,0
3,0,137,40,35,168,43.1,2.288,33,1
4,5,116,74,0,0,25.6,0.201,30,0


In [4]:
feature_cols = ['num_preg', 'insulin', 'bmi', 'age','glucose_conc','diastolic_bp','diab_pred']
X = ds[feature_cols] # Features
y = ds.diabetes # Target variable

In [5]:
from sklearn.decomposition import PCA

pca = PCA(n_components=2)
pca.fit(X)

PCA(copy=True, iterated_power='auto', n_components=2, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)

In [6]:
pca_X = pca.transform(X)

In [7]:
# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(pca_X, y, test_size=0.3, random_state=1) # 70% training and 30% test

In [8]:
from sklearn.svm import SVC

# Create SVM classifer object
clf = SVC(gamma='scale',C=2)

# Train SVM Classifer
clf = clf.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test)

In [9]:
# Train Accuracy
print("Train Accuracy:",clf.score(X_train, y_train))
# Test Accuracy
print("Test Accuracy:",metrics.accuracy_score(y_test, y_pred))

Train Accuracy: 0.7574626865671642
Test Accuracy: 0.7445887445887446


In [10]:
scores = cross_val_score(clf, X_train, y_train, cv=10)
print(scores)
print(scores.mean())

[0.70909091 0.78181818 0.75925926 0.77777778 0.77358491 0.75471698
 0.69811321 0.69811321 0.71698113 0.75471698]
0.7424172543040468


In [11]:
from sklearn.tree import DecisionTreeClassifier

# Create Decision Tree classifer object
clf = DecisionTreeClassifier()

# Train Decision Tree Classifer
clf = clf.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test)

In [12]:
# Train Accuracy
print("Train Accuracy:",clf.score(X_train, y_train))
# Test Accuracy
print("Test Accuracy:",metrics.accuracy_score(y_test, y_pred))

Train Accuracy: 1.0
Test Accuracy: 0.6536796536796536


In [13]:
scores = cross_val_score(clf, X_train, y_train, cv=10)
print(scores)
print(scores.mean())

[0.65454545 0.61818182 0.61111111 0.66666667 0.64150943 0.77358491
 0.67924528 0.62264151 0.60377358 0.75471698]
0.6625976748618256


In [14]:
# Create Decision Tree classifer object
clf = DecisionTreeClassifier(criterion="gini", max_depth=3)

# Train Decision Tree Classifer
clf = clf.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test)

In [15]:
# Train Accuracy
print("Train Accuracy:",clf.score(X_train, y_train))

# Test Accuracy
print("Test Accuracy:",metrics.accuracy_score(y_test, y_pred))

Train Accuracy: 0.7593283582089553
Test Accuracy: 0.7575757575757576


In [16]:
scores = cross_val_score(clf, X_train, y_train, cv=10)
print(scores)
print(scores.mean())

[0.70909091 0.72727273 0.75925926 0.7962963  0.71698113 0.75471698
 0.71698113 0.64150943 0.71698113 0.71698113]
0.7256070135315419


In [17]:
# Create Decision Tree classifer object
clf = DecisionTreeClassifier(criterion="entropy", max_depth=3)

# Train Decision Tree Classifer
clf = clf.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test)

In [18]:
# Train Accuracy
print("Train Accuracy:",clf.score(X_train, y_train))

# Test Accuracy
print("Test Accuracy:",metrics.accuracy_score(y_test, y_pred))

Train Accuracy: 0.75
Test Accuracy: 0.7186147186147186


In [19]:
scores = cross_val_score(clf, X_train, y_train, cv=10)
print(scores)
print(scores.mean())

[0.67272727 0.76363636 0.72222222 0.7037037  0.71698113 0.73584906
 0.71698113 0.64150943 0.67924528 0.71698113]
0.7069836732100884


In [21]:
from sklearn.ensemble import RandomForestClassifier

# Create RF classifer object
clf = RandomForestClassifier(n_estimators=5)

# Train RF Classifer
clf = clf.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test)

In [22]:
# Train Accuracy
print("Train Accuracy:",clf.score(X_train, y_train))

# Test Accuracy
print("Test Accuracy:",metrics.accuracy_score(y_test, y_pred))

Train Accuracy: 0.9552238805970149
Test Accuracy: 0.696969696969697


In [23]:
scores = cross_val_score(clf, X_train, y_train, cv=10)
print(scores)
print(scores.mean())

[0.58181818 0.69090909 0.7037037  0.74074074 0.69811321 0.67924528
 0.60377358 0.67924528 0.71698113 0.81132075]
0.6905850962454736


In [24]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

lda = LDA(n_components=2)  
X_train = lda.fit_transform(X_train, y_train)  
X_test = lda.transform(X_test)

In [25]:
# Create SVM classifer object
clf = SVC(gamma='scale',C=2)

# Train SVM Classifer
clf = clf.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test)

In [26]:
# Train Accuracy
print("Train Accuracy:",clf.score(X_train, y_train))
# Test Accuracy
print("Test Accuracy:",metrics.accuracy_score(y_test, y_pred))

Train Accuracy: 0.7406716417910447
Test Accuracy: 0.7575757575757576


In [27]:
scores = cross_val_score(clf, X_train, y_train, cv=10)
print(scores)
print(scores.mean())

[0.70909091 0.76363636 0.77777778 0.74074074 0.77358491 0.73584906
 0.71698113 0.73584906 0.73584906 0.75471698]
0.7444075979925036


In [28]:
from sklearn.tree import DecisionTreeClassifier

# Create Decision Tree classifer object
clf = DecisionTreeClassifier()

# Train Decision Tree Classifer
clf = clf.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test)

In [29]:
# Train Accuracy
print("Train Accuracy:",clf.score(X_train, y_train))
# Test Accuracy
print("Test Accuracy:",metrics.accuracy_score(y_test, y_pred))

Train Accuracy: 1.0
Test Accuracy: 0.6926406926406926


In [30]:
scores = cross_val_score(clf, X_train, y_train, cv=10)
print(scores)
print(scores.mean())

[0.61818182 0.63636364 0.7037037  0.64814815 0.73584906 0.67924528
 0.60377358 0.62264151 0.64150943 0.66037736]
0.65497935328124


In [31]:
# Create Decision Tree classifer object
clf = DecisionTreeClassifier(criterion="gini", max_depth=3)

# Train Decision Tree Classifer
clf = clf.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test)

In [32]:
# Train Accuracy
print("Train Accuracy:",clf.score(X_train, y_train))

# Test Accuracy
print("Test Accuracy:",metrics.accuracy_score(y_test, y_pred))

Train Accuracy: 0.7518656716417911
Test Accuracy: 0.7402597402597403


In [33]:
scores = cross_val_score(clf, X_train, y_train, cv=10)
print(scores)
print(scores.mean())

[0.70909091 0.76363636 0.62962963 0.7037037  0.77358491 0.77358491
 0.75471698 0.71698113 0.67924528 0.75471698]
0.7258890794739851


In [34]:
# Create Decision Tree classifer object
clf = DecisionTreeClassifier(criterion="entropy", max_depth=3)

# Train Decision Tree Classifer
clf = clf.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test)

In [35]:
# Train Accuracy
print("Train Accuracy:",clf.score(X_train, y_train))

# Test Accuracy
print("Test Accuracy:",metrics.accuracy_score(y_test, y_pred))

Train Accuracy: 0.7481343283582089
Test Accuracy: 0.7532467532467533


In [36]:
scores = cross_val_score(clf, X_train, y_train, cv=10)
print(scores)
print(scores.mean())

[0.69090909 0.76363636 0.68518519 0.68518519 0.77358491 0.75471698
 0.75471698 0.71698113 0.69811321 0.75471698]
0.727774601359507


In [37]:
from sklearn.ensemble import RandomForestClassifier

# Create RF classifer object
clf = RandomForestClassifier(n_estimators=5)

# Train RF Classifer
clf = clf.fit(X_train,y_train)

#Predict the response for test dataset
y_pred = clf.predict(X_test)

In [38]:
# Train Accuracy
print("Train Accuracy:",clf.score(X_train, y_train))

# Test Accuracy
print("Test Accuracy:",metrics.accuracy_score(y_test, y_pred))

Train Accuracy: 0.9328358208955224
Test Accuracy: 0.70995670995671


In [39]:
scores = cross_val_score(clf, X_train, y_train, cv=10)
print(scores)
print(scores.mean())

[0.61818182 0.58181818 0.68518519 0.68518519 0.73584906 0.66037736
 0.62264151 0.62264151 0.62264151 0.66037736]
0.6494898672257162
