In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_recall_fscore_support
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
import matplotlib.pyplot as plt

In [None]:
data = pd.read_csv('passage.csv')
data.head(30)

Unnamed: 0,Passage,Score,Time,D
0,3,4.0,10,Mild
1,6,0.0,10,Yes
2,4,10.0,10,No
3,5,10.0,1,No
4,6,6.0,2,Mild
5,7,0.0,6,Yes
6,4,8.0,2,No
7,7,2.0,1,Yes
8,3,10.0,8,No
9,4,4.0,4,Mild


In [None]:
data['D'].value_counts()

Yes     84
No      84
Mild    78
Name: D, dtype: int64

In [None]:
data = data.sample(frac=1).reset_index(drop=True)

In [None]:
data.head()

Unnamed: 0,Passage,Score,Time,D
0,4,4.0,10,Mild
1,6,4.0,10,Mild
2,7,6.0,4,Mild
3,1,2.5,10,Yes
4,7,6.0,6,Mild


In [None]:
# Imputing Missing Values
from sklearn.base import TransformerMixin

class DataFrameImputer(TransformerMixin):

    def __init__(self):
        """Impute missing values.

        Columns of dtype object are imputed with the most frequent value 
        in column.

        Columns of other types are imputed with mean of column.

        """
    def fit(self, X, y=None):

        self.fill = pd.Series([X[c].value_counts().index[0]
            if X[c].dtype == np.dtype('O') else X[c].mean() for c in X],
            index=X.columns)

        return self

    def transform(self, X, y=None):
        return X.fillna(self.fill)

In [None]:
X = pd.DataFrame(data)
data = DataFrameImputer().fit_transform(X)

In [None]:
data.isna().sum()

Passage    0
Score      0
Time       0
D          0
dtype: int64

In [None]:
LE = LabelEncoder()
CateList = data.select_dtypes(exclude="int64").columns
print(CateList)

Index(['Score', 'D'], dtype='object')


In [None]:
for i in ['D']:
    data[i] = LE.fit_transform(data[i])

In [None]:
data.head()

Unnamed: 0,Passage,Score,Time,D
0,4,4.0,10,0
1,6,4.0,10,0
2,7,6.0,4,0
3,1,2.5,10,2
4,7,6.0,6,0


In [None]:
LE.classes_

array(['Mild', 'No', 'Yes'], dtype=object)

In [None]:
df = data.iloc[:,:-1]
mm = MinMaxScaler()
mm.fit(df[:])
df[:]= mm.transform(df[:])

In [None]:
df.head()

Unnamed: 0,Passage,Score,Time
0,0.5,0.4,1.0
1,0.833333,0.4,1.0
2,1.0,0.6,0.333333
3,0.0,0.25,1.0
4,1.0,0.6,0.555556


In [None]:
X = df.values
y = data['D'].values
print(X.shape, y.shape)

(246, 3) (246,)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 123)

In [None]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(184, 3) (184,)
(62, 3) (62,)


In [None]:
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

In [None]:
clf1 = SVC(kernel='linear') 
clf2 = LogisticRegression()
clf3 = DecisionTreeClassifier(criterion="gini")
clf4 = KNeighborsClassifier(n_neighbors=3)

In [None]:
clf1.fit(X_train, y_train)
clf2.fit(X_train, y_train)
clf3.fit(X_train, y_train)
clf4.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                     weights='uniform')

In [None]:
pred1_t1 = clf1.predict(X_train)

pred2_t1 = clf2.predict(X_train)

pred3_t1 = clf3.predict(X_train)

pred4_t1 = clf4.predict(X_train)


In [None]:
acc1_t1 = accuracy_score(y_train, pred1_t1)

acc2_t1 = accuracy_score(y_train, pred2_t1)

acc3_t1 = accuracy_score(y_train, pred3_t1)

acc4_t1 = accuracy_score(y_train, pred4_t1)


In [None]:
print("Training Accuracy (SVM):",acc1_t1)

print("Training Accuracy (Logistic Regression):",acc2_t1)

print("Training Accuracy (Decision Tree):",acc3_t1)

print("Training Accuracy (KNN):",acc4_t1)


Training Accuracy (SVM): 0.98
Training Accuracy (Logistic Regression): 0.9966666666666667
Training Accuracy (Decision Tree): 0.96
Training Accuracy (KNN): 0.9966666666666667


In [None]:
estimators = []
estimators.append(('SVM', clf1))
estimators.append(('Logistic Regression', clf2))
estimators.append(('Decision Tree', clf3))
estimators.append(('KNN', clf4))
estimators

[('SVM',
  SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
      decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
      max_iter=-1, probability=False, random_state=None, shrinking=True,
      tol=0.001, verbose=False)),
 ('Logistic Regression',
  LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                     intercept_scaling=1, l1_ratio=None, max_iter=100,
                     multi_class='auto', n_jobs=None, penalty='l2',
                     random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                     warm_start=False)),
 ('Decision Tree',
  DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                         max_depth=None, max_features=None, max_leaf_nodes=None,
                         min_impurity_decrease=0.0, min_impurity_split=None,
                         min_samples_leaf=1, min_samples_split=2,
                         min_weight_fraction_

In [None]:
from sklearn.ensemble import VotingClassifier

In [None]:
ensemble = VotingClassifier(estimators)
ensemble.fit(X_train, y_train)

VotingClassifier(estimators=[('SVM',
                              SVC(C=1.0, break_ties=False, cache_size=200,
                                  class_weight=None, coef0=0.0,
                                  decision_function_shape='ovr', degree=3,
                                  gamma='scale', kernel='linear', max_iter=-1,
                                  probability=False, random_state=None,
                                  shrinking=True, tol=0.001, verbose=False)),
                             ('Logistic Regression',
                              LogisticRegression(C=1.0, class_weight=None,
                                                 dual=False, fit_interc...
                                                     min_impurity_split=None,
                                                     min_samples_leaf=1,
                                                     min_samples_split=2,
                                                     min_weight_fraction_leaf=0.0,
         

In [None]:
pred_t1 = ensemble.predict(X_train)
pred_t2 = ensemble.predict(X_test)

In [None]:
acc_t1 = accuracy_score(y_train, pred_t1)
acc_t2 = accuracy_score(y_test, pred_t2)

In [None]:
print("Training Accuracy (Ensemble):",acc_t1)

Training Accuracy (Ensemble): 1.0


In [None]:
print("Testing Accuracy (Ensemble):",acc_t2)

Testing Accuracy (Ensemble): 0.9966666666666667


In [None]:
from sklearn.externals import joblib 



In [None]:
joblib.dump(ensemble, 'memory.pkl') 
loaded_model = joblib.load('memory.pkl')  

In [None]:
final = loaded_model.predict(X) 

In [None]:
set = pd.read_csv('passage.csv')

In [None]:
set.head()

Unnamed: 0,Passage,Score,Time,D
0,3,4.0,10,Mild
1,6,0.0,10,Yes
2,4,10.0,10,No
3,5,10.0,1,No
4,6,6.0,2,Mild


In [None]:
for i in ['D']:
    set[i] = LE.fit_transform(set[i])

In [None]:
set.head()

Unnamed: 0,Passage,Score,Time,D
0,3,4.0,10,0
1,6,0.0,10,2
2,4,10.0,10,1
3,5,10.0,1,1
4,6,6.0,2,0


In [None]:
final = loaded_model.predict(X) 

In [None]:
acc = accuracy_score(y, final)

In [None]:
mm.data_max_

array([ 7., 10., 10.])

In [None]:
mm.data_min_

array([1., 0., 1.])

In [None]:
mm.data_max_ = np.array([2., 5., 5.])

In [None]:
mm.data_min_

array([1., 0., 1.])

In [None]:
joblib.dump(mm, 'memory_mm.pkl') 
loaded_model = joblib.load('memory_mm.pkl')

In [None]:
t = mm.transform([[7, 4, 10]])

In [None]:
t

array([[1. , 0.4, 1. ]])

In [None]:
pred = loaded_model.predict(t)

In [None]:
if(pred[0]==0):
  mem = 'Mild'
elif(pred[0]==1):
  mem = 'No'
elif(pred[0]==2):
  mem = 'Yes'

In [None]:
loaded_model.predict([[4,2,10]])

array([1])