## Try to build your model using [pipelines](http://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html)



In [1]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.metrics import classification_report
from sklearn.model_selection import cross_val_score

In [2]:
df = pd.read_csv("../datasets/pima-indians-diabetes.data.csv")

In [3]:
df.head()

Unnamed: 0,Number of times pregnant,Plasma glucose concentration,Diastolic blood pressure,Triceps skinfold thickness,2-Hour serum insulin,Body mass index,Diabetes pedigree function,Age,Class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [26]:
features = df.iloc[:, [1,5,7]]
goal = df.iloc[:, -1]

scaler = StandardScaler()
features = scaler.fit_transform(features)

pca = PCA(n_components=2)
logistic = LogisticRegression(random_state=0)
svm = SVC(kernel="poly")

x_train, x_test, y_train, y_test = train_test_split(features, goal, test_size = 0.2, random_state = 0)

pipline = Pipeline(steps=[('pca', pca), ('logistic', logistic)])
pipline.fit(x_train, y_train)
y_pred = pipline.predict(x_test)
pipline.score(x_test, y_test)

0.77922077922077926

In [27]:
print(classification_report(y_test, y_pred))

             precision    recall  f1-score   support

          0       0.81      0.89      0.85       107
          1       0.68      0.53      0.60        47

avg / total       0.77      0.78      0.77       154



In [28]:
accuracies = cross_val_score(estimator = pipline, X = x_train, y = y_train, cv = 10)
accuracies

array([ 0.71428571,  0.74193548,  0.82258065,  0.7704918 ,  0.72131148,
        0.81967213,  0.73770492,  0.68852459,  0.78688525,  0.75409836])

In [29]:
print("mean of acc:", accuracies.mean())
print("Standard diviation of acc:", accuracies.std())

mean of acc: 0.755749036791
Standard diviation of acc: 0.0420509770637


In [30]:
print("model acc : {:.2f} (+/- {:.2f})%".format(accuracies.mean(), accuracies.std()))

model acc : 0.76 (+/- 0.04)%
