In [1]:
import pandas as pd

In [2]:
diabetes_data = pd.read_csv('data/diabetes.csv')

In [3]:
diabetes_data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [4]:
diabetes_data.count()[0]

768

In [5]:
diabetes_data.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [6]:
diabetes_data.keys()

Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')

## Data Description
Data Source: 
<br />
Smith, J. W., Everhart, J. E., Dickson, W. C., Knowler, W. C., & Johannes, R. S. (1988, November). Using the ADAP learning algorithm to forecast the onset of diabetes mellitus. In Proceedings of the annual symposium on computer application in medical care (p. 261). American Medical Informatics Association.
<br />
<br />
Source: 
https://www.kaggle.com/datasets/mathchi/diabetes-data-set
<br /> 
<br /> 
Several constraints were placed on the selection of these instances from a larger database. In particular, all patients here are females at least 21 years old of Pima Indian heritage.


- Pregnancies: Number of times pregnant
- Glucose: Plasma glucose concentration a 2 hours in an oral glucose tolerance test
- BloodPressure: Diastolic blood pressure (mm Hg)
- SkinThickness: Triceps skin fold thickness (mm)
- Insulin: 2-Hour serum insulin (mu U/ml)
- BMI: Body mass index (weight in kg/(height in m)^2)
- DiabetesPedigreeFunction: Diabetes pedigree function
- Age: Age (years)
- Outcome: Class variable (0 or 1)

### data augmentation

In [7]:
diabetes_data.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [8]:
#converting Outcome from integer to category
diabetes_data['Outcome'] = pd.Categorical(diabetes_data.Outcome)

In [9]:
diabetes_data.Outcome

0      1
1      0
2      1
3      0
4      1
      ..
763    0
764    0
765    0
766    1
767    0
Name: Outcome, Length: 768, dtype: category
Categories (2, int64): [0, 1]

### Train-Test Split

In [10]:
import math
total_data = diabetes_data.count()[0]
total_training_data = math.floor(total_data*0.7)
total_testing_data = total_data - total_training_data

print('total_data', total_data)
print('total_training_data', total_training_data, total_training_data/total_data)
print('total_testing_data', total_testing_data, total_testing_data/total_data)

total_data 768
total_training_data 537 0.69921875
total_testing_data 231 0.30078125


In [11]:
x_train = diabetes_data[:total_training_data]
x_test = diabetes_data[total_testing_data:]

In [12]:
x_train.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [13]:
x_test.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
231,6,134,80,37,370,46.2,0.238,46,1
232,1,79,80,25,37,25.4,0.583,22,0
233,4,122,68,0,0,35.0,0.394,29,0
234,3,74,68,28,45,29.7,0.293,23,0
235,4,171,72,0,0,43.6,0.479,26,1


In [14]:
y_train = x_train['Outcome']

In [15]:
x_train.drop(columns=['Outcome'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_train.drop(columns=['Outcome'], inplace=True)


In [16]:
x_train

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33
...,...,...,...,...,...,...,...,...
532,1,86,66,52,65,41.3,0.917,29
533,6,91,0,0,0,29.8,0.501,31
534,1,77,56,30,56,33.3,1.251,24
535,4,132,0,0,0,32.9,0.302,23


In [17]:
y_test = x_test['Outcome']
x_test.drop(columns=['Outcome'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_test.drop(columns=['Outcome'], inplace=True)


### Simple linear regreesion

In [18]:
from sklearn import linear_model



In [19]:
#loading the model
linear_regression = linear_model.LinearRegression()

In [20]:
#fitting the model
linear_regression.fit(x_train, y_train)

In [21]:
y_pred = linear_regression.predict(x_test)

In [22]:
import numpy as np

In [23]:
y_pred = np.int0(np.abs(np.round(y_pred)))

In [24]:
np.array(y_test)

array([1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1,
       0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0,
       1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0,
       1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1,
       0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0,
       1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0,
       1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,

In [25]:
y_pred

array([1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,
       0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,
       1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0,
       1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,
       0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,
       1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,

In [26]:
linear_regression.coef_

array([ 2.22119288e-02,  5.35424490e-03, -2.01407833e-03,  5.57572778e-05,
       -1.74283797e-04,  1.38714128e-02,  1.44911747e-01,  7.93531239e-04])

In [27]:
from sklearn.metrics import mean_squared_error, confusion_matrix, accuracy_score

In [28]:
mean_squared_error(y_test, y_pred)

0.21042830540037244

In [29]:
confusion_matrix(y_test, y_pred)

array([[324,  33],
       [ 80, 100]])

In [30]:
accuracy_score(y_pred, y_test)

0.7895716945996276

### Naive Bayes

In [31]:
from sklearn.naive_bayes import GaussianNB

In [32]:
naive_bayes = GaussianNB()

In [33]:
naive_bayes.fit(x_train, y_train)

In [34]:
y_pred_nb = naive_bayes.predict(x_test)

In [35]:
y_pred

array([1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,
       0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,
       1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0,
       1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,
       0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,
       1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,

In [36]:
mean_squared_error(y_test, y_pred_nb)

0.2271880819366853

In [37]:
confusion_matrix(y_test, y_pred_nb)

array([[304,  53],
       [ 69, 111]])

In [38]:
accuracy_score(y_pred_nb, y_test)

0.7728119180633147

### Decision Tree

In [39]:
from sklearn import neural_network

In [40]:
nn = neural_network.MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)

In [41]:
nn.fit(x_train, y_train)

In [42]:
y_pred_nn = nn.predict(x_test)

In [43]:
print('accuracy_score', accuracy_score(y_test, y_pred_nn))
print('mean_squared_error', mean_squared_error(y_test, y_pred_nn))
print('confusion_matrix', confusion_matrix(y_test, y_pred_nn))

accuracy_score 0.6834264432029795
mean_squared_error 0.3165735567970205
confusion_matrix [[319  38]
 [132  48]]


In [44]:
from sklearn.svm import SVC

In [45]:
svm = SVC(probability=True)

In [46]:
svm.fit(x_train, y_train)

In [47]:
y_pred_svm = svm.predict(x_test)

In [48]:
print('accuracy_score', accuracy_score(y_test, y_pred_svm))
print('mean_squared_error', mean_squared_error(y_test, y_pred_svm))
print('confusion_matrix', confusion_matrix(y_test, y_pred_svm))

accuracy_score 0.776536312849162
mean_squared_error 0.22346368715083798
confusion_matrix [[322  35]
 [ 85  95]]


### All together

In [49]:
print('accuracy_score lr', accuracy_score(y_test, y_pred))
print('accuracy_score nb', accuracy_score(y_test, y_pred_nb))
print('accuracy_score nn', accuracy_score(y_test, y_pred_nn))
print('accuracy_score svm', accuracy_score(y_test, y_pred_svm))


accuracy_score lr 0.7895716945996276
accuracy_score nb 0.7728119180633147
accuracy_score nn 0.6834264432029795
accuracy_score svm 0.776536312849162


### simple average ensemble

In [50]:
average_prediction = (y_pred+y_pred_nb+y_pred_nn)//3
average_prediction

array([1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [51]:
print('accuracy_score', accuracy_score(y_test, average_prediction))
print('mean_squared_error', mean_squared_error(y_test, average_prediction))
print('confusion_matrix', confusion_matrix(y_test, average_prediction))

accuracy_score 0.7150837988826816
mean_squared_error 0.2849162011173184
confusion_matrix [[348   9]
 [144  36]]


### other ensembles
- Voting
- Bagging
- Boosting

In [52]:
from sklearn.ensemble import VotingClassifier

In [53]:
voting = VotingClassifier([
    ('nb', naive_bayes),
    ('nn', nn),
    ('svm', svm),
], voting='soft')

In [54]:
np_x_train = np.array(x_train)
np_y_train = np.array(y_train)

In [55]:
voting.fit(x_train, y_train)

In [56]:
y_pred_voting = voting.predict(x_test)

In [57]:
print('accuracy_score', accuracy_score(y_test, y_pred_voting))
print('mean_squared_error', mean_squared_error(y_test, y_pred_voting))
print('confusion_matrix\n', confusion_matrix(y_test, y_pred_voting))

accuracy_score 0.7821229050279329
mean_squared_error 0.21787709497206703
confusion_matrix
 [[321  36]
 [ 81  99]]


### Saving the machine learning file

In [58]:
import pickle

In [59]:
pickle.dump(voting, open('static/saved_voting.pkl', 'wb'))

### Loading model and using it

In [60]:
# x_test.reset_index(inplace=True)

In [61]:
x_test

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
231,6,134,80,37,370,46.2,0.238,46
232,1,79,80,25,37,25.4,0.583,22
233,4,122,68,0,0,35.0,0.394,29
234,3,74,68,28,45,29.7,0.293,23
235,4,171,72,0,0,43.6,0.479,26
...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63
764,2,122,70,27,0,36.8,0.340,27
765,5,121,72,23,112,26.2,0.245,30
766,1,126,60,0,0,30.1,0.349,47


In [62]:
x_test.values[0]

array([6.00e+00, 1.34e+02, 8.00e+01, 3.70e+01, 3.70e+02, 4.62e+01,
       2.38e-01, 4.60e+01])

In [63]:
loaded_model = pickle.load(open('static/saved_voting.pkl', 'rb'))
loaded_model.predict([x_test.values[4]])



array([1])

In [64]:
y_pred_voting[4]

1

### Bootstrap Aggregation (Bagging)

In [66]:
from sklearn.ensemble import RandomForestClassifier

In [67]:
random_forest = RandomForestClassifier()

In [68]:
random_forest.fit(x_train, y_train)

In [70]:
y_pred_bagging = random_forest.predict(x_test)

In [71]:
print('accuracy_score', accuracy_score(y_test, y_pred_bagging))
print('mean_squared_error', mean_squared_error(y_test, y_pred_bagging))
print('confusion_matrix\n', confusion_matrix(y_test, y_pred_bagging))

accuracy_score 0.9068901303538175
mean_squared_error 0.0931098696461825
confusion_matrix
 [[341  16]
 [ 34 146]]


### Boosting

In [72]:
from sklearn.ensemble import AdaBoostClassifier

In [73]:
ada_boost = AdaBoostClassifier()

In [74]:
ada_boost.fit(x_train, y_train)

In [75]:
y_pred_boosting = ada_boost.predict(x_test)

In [76]:
print('accuracy_score', accuracy_score(y_test, y_pred_boosting))
print('mean_squared_error', mean_squared_error(y_test, y_pred_boosting))
print('confusion_matrix\n', confusion_matrix(y_test, y_pred_boosting))

accuracy_score 0.8286778398510242
mean_squared_error 0.1713221601489758
confusion_matrix
 [[320  37]
 [ 55 125]]


### Stacking

In [77]:
from sklearn.ensemble import StackingClassifier

In [79]:
stacking = StackingClassifier(estimators=[
    ('nb', naive_bayes),
    ('nn', nn),
    ('svm', svm)
])

In [80]:
stacking.fit(x_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


In [81]:
y_pred_stacking = stacking.predict(x_test)

In [82]:
print('accuracy_score', accuracy_score(y_test, y_pred_stacking))
print('mean_squared_error', mean_squared_error(y_test, y_pred_stacking))
print('confusion_matrix\n', confusion_matrix(y_test, y_pred_stacking))

accuracy_score 0.7858472998137802
mean_squared_error 0.21415270018621974
confusion_matrix
 [[320  37]
 [ 78 102]]


### Saving Pickle file

In [83]:
import pickle

In [85]:
with open('static/saved_random_forest.pkl', 'wb') as f:
    pickle.dump(random_forest, f)

In [86]:
loaded_model = pickle.load(open('static/saved_random_forest.pkl', 'rb'))

In [87]:
loaded_predict = loaded_model.predict(x_test)

In [88]:
print('accuracy_score', accuracy_score(y_test, loaded_predict))
print('mean_squared_error', mean_squared_error(y_test, loaded_predict))
print('confusion_matrix\n', confusion_matrix(y_test, loaded_predict))

accuracy_score 0.9068901303538175
mean_squared_error 0.0931098696461825
confusion_matrix
 [[341  16]
 [ 34 146]]


In [91]:
x_test.head(1)

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
231,6,134,80,37,370,46.2,0.238,46


In [90]:
loaded_predict

array([1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1,
       0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0,
       1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0,
       1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1,
       0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0,
       1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0,
       1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,