## download some useful packages 

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

## Load train and test data

In [2]:
train = pd.read_csv('train_dataset.csv')
test = pd.read_csv('test_dataset.csv')


In [3]:
train["Condition"] = train.CONDITION.map({"H":0, "D":1})

test["Condition"] = test.CONDITION.map({"H":0, "D":1})
train.drop("CONDITION", axis = 1,inplace = True)
test.drop("CONDITION", axis=1, inplace = True)

In [4]:
train.head()

Unnamed: 0,P1,N2,P3,M4,P5,M6,N7,N8,M9,P10,M11,N12,Condition
0,2,4,1,1,1,1,1,4,2,3,1,1,0
1,2,3,2,1,1,1,2,4,1,1,1,3,0
2,1,3,1,2,1,1,2,3,3,1,2,2,0
3,2,4,2,3,2,2,3,4,2,4,3,4,0
4,1,2,1,1,2,2,3,1,1,1,1,2,0


In [5]:
X_train = train.iloc[:,0:12]
y_train = train.iloc[:,12]
X_test= test.iloc[:,0:12]
y_test = test.iloc[:,12]

In [313]:
y_train.head()

0    0
1    0
2    0
3    0
4    0
Name: Condition, dtype: int64

 ### Use Scaler for train and test datasets

In [6]:
scaler = StandardScaler().fit(X_train)


### Look the Mean of Scaler

In [7]:
scaler.mean_


array([2.74040632, 2.71331828, 1.68397291, 1.94582393, 1.62641084,
       1.66365688, 2.40632054, 2.9537246 , 1.58352144, 2.2731377 ,
       1.74717833, 2.04401806])

### Look the variance of Scaler 

In [8]:
scaler.scale_

array([1.35232158, 1.11179945, 1.00423056, 1.17891538, 1.03075833,
       0.99307407, 1.15586863, 1.18735945, 0.89349253, 1.17532027,
       1.08933261, 1.01082365])

### Scaler transforming 


In [9]:
X_train_scaled = scaler.transform(X_train)
print(X_train_scaled.mean(axis=0))

[-3.20786788e-17  6.41573576e-17 -6.41573576e-17  1.28314715e-16
  8.01966970e-17  8.01966970e-17 -1.28314715e-16 -1.60393394e-16
 -1.28314715e-16 -1.60393394e-16  0.00000000e+00 -1.44354055e-16]


# Scale Test data set 

In [10]:
scaler = StandardScaler().fit(X_test)

In [11]:
scaler.mean_

array([2.83, 2.73, 1.72, 1.93, 1.62, 1.74, 2.58, 2.91, 1.69, 2.46, 1.84,
       2.27])

In [320]:
scaler.scale_

array([1.39323365, 1.16494635, 1.04      , 1.13362251, 0.91411159,
       1.11013513, 1.20149906, 1.23365311, 0.94546285, 1.16978631,
       1.18084715, 0.97831488])

In [12]:
scaler.transform(X_test)

array([[-0.5957364 ,  1.09017896, -0.69230769, ..., -0.39323422,
        -0.71135371,  0.74618103],
       [-0.5957364 ,  0.23177033, -0.69230769, ...,  0.46162277,
         0.13549594,  0.74618103],
       [-1.3134911 ,  1.94858759, -0.69230769, ..., -1.24809121,
         1.82919525,  1.76834682],
       ...,
       [-0.5957364 , -0.6266383 , -0.69230769, ..., -1.24809121,
        -0.71135371, -0.27598476],
       [-1.3134911 ,  0.23177033, -0.69230769, ..., -1.24809121,
        -0.71135371, -0.27598476],
       [ 1.5575277 , -1.48504693, -0.69230769, ..., -1.24809121,
        -0.71135371, -1.29815055]])

In [13]:
X_test_scaled = scaler.transform(X_test)

In [14]:
print(X_test_scaled)

[[-0.5957364   1.09017896 -0.69230769 ... -0.39323422 -0.71135371
   0.74618103]
 [-0.5957364   0.23177033 -0.69230769 ...  0.46162277  0.13549594
   0.74618103]
 [-1.3134911   1.94858759 -0.69230769 ... -1.24809121  1.82919525
   1.76834682]
 ...
 [-0.5957364  -0.6266383  -0.69230769 ... -1.24809121 -0.71135371
  -0.27598476]
 [-1.3134911   0.23177033 -0.69230769 ... -1.24809121 -0.71135371
  -0.27598476]
 [ 1.5575277  -1.48504693 -0.69230769 ... -1.24809121 -0.71135371
  -1.29815055]]


In [15]:
print(X_test_scaled.mean(axis=0))

[-1.77635684e-17  2.66453526e-17 -1.99840144e-17  7.32747196e-17
 -8.21565038e-17 -1.11022302e-17 -6.77236045e-17 -1.51545443e-16
  4.44089210e-17  1.33226763e-17  2.44249065e-17 -3.99680289e-17]


## Logistic regression 


In [16]:
model_logreg = LogisticRegression()

In [17]:
model_logreg.fit(X_train,y_train)

LogisticRegression()

In [21]:
y_pred = model_logreg.predict(X_test)

In [328]:
print('Accuracy of logistic regression classifier on test set: {:.2f}'.format(model_logreg.score(X_test, y_test)))


Accuracy of logistic regression classifier on test set: 0.82


## Cross Validation ?


In [20]:
from sklearn import model_selection
from sklearn.model_selection import cross_val_score
kfold = model_selection.KFold(n_splits=10)
modelCV = LogisticRegression()
scoring = 'accuracy'
results = model_selection.cross_val_score(modelCV, X_train, y_train, cv=kfold, scoring=scoring)
print("10-fold cross validation average accuracy: %.3f" % (results.mean()))

10-fold cross validation average accuracy: 0.698


## Confusion Matrix ?


In [22]:
from sklearn.metrics import confusion_matrix
confusion_matrix = confusion_matrix(y_test, y_pred)
print(confusion_matrix)

[[41  9]
 [ 9 41]]


The result is telling us that we have 41+41 correct predictions and 9+9 incorrect predictions 

In [23]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.82      0.82      0.82        50
           1       0.82      0.82      0.82        50

    accuracy                           0.82       100
   macro avg       0.82      0.82      0.82       100
weighted avg       0.82      0.82      0.82       100



## Support Vector Machine 

In [24]:
from sklearn import svm
from sklearn import metrics

In [25]:
cls = svm.SVC(kernel="linear")


### Train the model 

In [26]:
cls.fit(X_train_scaled,y_train)

SVC(kernel='linear')

## Predict the response 

In [27]:
pred = cls.predict(X_test_scaled)

In [28]:
print("accuracy:", metrics.accuracy_score(y_test,y_pred= pred))

accuracy: 0.8


## Precision score

In [29]:
print("precision:", metrics.precision_score(y_test,y_pred= pred))

precision: 0.8


## Recall 

In [30]:
print("recall:", metrics.recall_score(y_test,y_pred= pred))

recall: 0.8


In [31]:
print(metrics.classification_score(y_test,y_pred= pred))

AttributeError: module 'sklearn.metrics' has no attribute 'classification_score'