#### Importing the needed libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
import matplotlib.pyplot as plt 
plt.rc("font", size=14)
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import seaborn as sns
sns.set(style="white")
sns.set(style="whitegrid", color_codes=True)

In [2]:
data = pd.read_csv(r'Parkinsons Train Data.csv', header=0)
data = data.dropna()
print(data.shape)
print(list(data.columns))

(1040, 28)
['Subject Id', 'Jitter(local)', 'Jitter(local, absolute)', 'Jitter (rap)', 'Jitter (ppq5)', 'Jitter (ddp)', 'Shimmer (local)', 'Shimmer (local, db)', 'Shimmer (apq3)', 'Shimmer (apq5)', 'Shimmer (apq11)', 'Shimmer (dda)', 'AC', 'NDH', 'HTM', 'Median Pitch', 'Mean Pitch', 'Standard deviation', 'Minimum pitch', 'Maximum pitch', 'Number of pulses', 'Number of periods', 'Mean period', 'Standard deviation of period', 'Fraction of locally unvoiced frames', 'Number of voice breaks', 'Degree of voice breaks', 'Class information']


In [3]:
data.head()

Unnamed: 0,Subject Id,Jitter(local),"Jitter(local, absolute)",Jitter (rap),Jitter (ppq5),Jitter (ddp),Shimmer (local),"Shimmer (local, db)",Shimmer (apq3),Shimmer (apq5),...,Minimum pitch,Maximum pitch,Number of pulses,Number of periods,Mean period,Standard deviation of period,Fraction of locally unvoiced frames,Number of voice breaks,Degree of voice breaks,Class information
0,1,1.488,9e-05,0.9,0.794,2.699,8.334,0.779,4.517,4.609,...,142.229,187.576,160,159,0.006065,0.000416,0.0,0,0.0,1
1,1,0.728,3.8e-05,0.353,0.376,1.059,5.864,0.642,2.058,3.18,...,159.515,234.505,170,169,0.005181,0.000403,2.247,0,0.0,1
2,1,1.22,7.4e-05,0.732,0.67,2.196,8.719,0.875,4.347,5.166,...,146.445,211.442,1431,1427,0.006071,0.000474,10.656,1,0.178,1
3,1,2.502,0.000123,1.156,1.634,3.469,13.513,1.273,5.263,8.771,...,182.713,220.23,94,92,0.00491,0.00032,0.0,0,0.0,1
4,1,3.509,0.000167,1.715,1.539,5.145,9.112,1.04,3.102,4.927,...,182.821,225.162,117,114,0.004757,0.00038,18.182,1,13.318,1


In [4]:
data.columns.values

array(['Subject Id', 'Jitter(local)', 'Jitter(local, absolute)',
       'Jitter (rap)', 'Jitter (ppq5)', 'Jitter (ddp)', 'Shimmer (local)',
       'Shimmer (local, db)', 'Shimmer (apq3)', 'Shimmer (apq5)',
       'Shimmer (apq11)', 'Shimmer (dda)', 'AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks', 'Class information'], dtype=object)

#### Choosing Columns for Analysis

In [5]:
cols_1 = ['Jitter(local)','Shimmer (local)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_1]
y = data['Class information']

In [6]:
#building a model
import statsmodels.api as sm
logit_model=sm.Logit(y,X)
result=logit_model.fit()
print(result.summary())

Optimization terminated successfully.
         Current function value: 0.626391
         Iterations 8
                           Logit Regression Results                           
Dep. Variable:      Class information   No. Observations:                 1040
Model:                          Logit   Df Residuals:                     1023
Method:                           MLE   Df Model:                           16
Date:                Fri, 28 Feb 2020   Pseudo R-squ.:                 0.09631
Time:                        18:42:26   Log-Likelihood:                -651.45
converged:                       True   LL-Null:                       -720.87
Covariance Type:            nonrobust   LLR p-value:                 1.208e-21
                                          coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------------------------------
Jitter(local)                           0.5236      0.068 

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=0)
from sklearn.svm import SVC
from sklearn import metrics
svc = SVC()
svc.fit(X_train, y_train)

print("Accuracy on training set: {:.2f}".format(svc.score(X_train, y_train)))
print("Accuracy on test set: {:.2f}".format(svc.score(X_test, y_test)))

Accuracy on training set: 0.58
Accuracy on test set: 0.65


In [8]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

svc = SVC()
svc.fit(X_train_scaled, y_train)

print("Accuracy on training set: {:.2f}".format(svc.score(X_train_scaled, y_train)))
print("Accuracy on test set: {:.2f}".format(svc.score(X_test_scaled, y_test)))

Accuracy on training set: 0.69
Accuracy on test set: 0.62


In [9]:
svc = SVC(C=1000)
svc.fit(X_train_scaled, y_train)

print("Accuracy on training set: {:.3f}".format(
    svc.score(X_train_scaled, y_train)))
print("Accuracy on test set: {:.3f}".format(svc.score(X_test_scaled, y_test)))

Accuracy on training set: 0.934
Accuracy on test set: 0.673


In [10]:
data.columns.values

array(['Subject Id', 'Jitter(local)', 'Jitter(local, absolute)',
       'Jitter (rap)', 'Jitter (ppq5)', 'Jitter (ddp)', 'Shimmer (local)',
       'Shimmer (local, db)', 'Shimmer (apq3)', 'Shimmer (apq5)',
       'Shimmer (apq11)', 'Shimmer (dda)', 'AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks', 'Class information'], dtype=object)

In [14]:
cols_2 = ['Jitter(local)','Shimmer (local, db)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_2]
y = data['Class information']

In [15]:
#building a model
import statsmodels.api as sm
logit_model=sm.Logit(y,X)
result=logit_model.fit()
print(result.summary())

Optimization terminated successfully.
         Current function value: 0.623923
         Iterations 8
                           Logit Regression Results                           
Dep. Variable:      Class information   No. Observations:                 1040
Model:                          Logit   Df Residuals:                     1023
Method:                           MLE   Df Model:                           16
Date:                Fri, 28 Feb 2020   Pseudo R-squ.:                 0.09987
Time:                        18:43:23   Log-Likelihood:                -648.88
converged:                       True   LL-Null:                       -720.87
Covariance Type:            nonrobust   LLR p-value:                 1.191e-22
                                          coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------------------------------
Jitter(local)                           0.5263      0.068 

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=0)
from sklearn.svm import SVC
from sklearn import metrics
svc = SVC()
svc.fit(X_train, y_train)

print("Accuracy on training set: {:.2f}".format(svc.score(X_train, y_train)))
print("Accuracy on test set: {:.2f}".format(svc.score(X_test, y_test)))

Accuracy on training set: 0.58
Accuracy on test set: 0.65


In [17]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

svc = SVC()
svc.fit(X_train_scaled, y_train)

print("Accuracy on training set: {:.2f}".format(svc.score(X_train_scaled, y_train)))
print("Accuracy on test set: {:.2f}".format(svc.score(X_test_scaled, y_test)))

Accuracy on training set: 0.70
Accuracy on test set: 0.60


In [18]:
svc = SVC(C=1000)
svc.fit(X_train_scaled, y_train)

print("Accuracy on training set: {:.3f}".format(
    svc.score(X_train_scaled, y_train)))
print("Accuracy on test set: {:.3f}".format(svc.score(X_test_scaled, y_test)))

Accuracy on training set: 0.918
Accuracy on test set: 0.558


In [19]:
cols_3 = ['Jitter(local)','Shimmer (apq3)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_3]
y = data['Class information']

In [20]:
#building a model
import statsmodels.api as sm
logit_model=sm.Logit(y,X)
result=logit_model.fit()
print(result.summary())

Optimization terminated successfully.
         Current function value: 0.631258
         Iterations 8
                           Logit Regression Results                           
Dep. Variable:      Class information   No. Observations:                 1040
Model:                          Logit   Df Residuals:                     1023
Method:                           MLE   Df Model:                           16
Date:                Fri, 28 Feb 2020   Pseudo R-squ.:                 0.08929
Time:                        18:44:50   Log-Likelihood:                -656.51
converged:                       True   LL-Null:                       -720.87
Covariance Type:            nonrobust   LLR p-value:                 1.132e-19
                                          coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------------------------------
Jitter(local)                           0.5113      0.068 

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=0)
from sklearn.svm import SVC
from sklearn import metrics
svc = SVC()
svc.fit(X_train, y_train)

print("Accuracy on training set: {:.2f}".format(svc.score(X_train, y_train)))
print("Accuracy on test set: {:.2f}".format(svc.score(X_test, y_test)))

Accuracy on training set: 0.58
Accuracy on test set: 0.65


In [22]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

svc = SVC()
svc.fit(X_train_scaled, y_train)

print("Accuracy on training set: {:.2f}".format(svc.score(X_train_scaled, y_train)))
print("Accuracy on test set: {:.2f}".format(svc.score(X_test_scaled, y_test)))

Accuracy on training set: 0.69
Accuracy on test set: 0.62


In [23]:
svc = SVC(C=1000)
svc.fit(X_train_scaled, y_train)

print("Accuracy on training set: {:.3f}".format(
    svc.score(X_train_scaled, y_train)))
print("Accuracy on test set: {:.3f}".format(svc.score(X_test_scaled, y_test)))

Accuracy on training set: 0.924
Accuracy on test set: 0.615


In [28]:
cols_4 = ['Jitter(local)','Shimmer (apq5)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_4]
y = data['Class information']

In [29]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=0)
from sklearn.svm import SVC
from sklearn import metrics
svc = SVC()
svc.fit(X_train, y_train)

print("Accuracy on training set: {:.2f}".format(svc.score(X_train, y_train)))
print("Accuracy on test set: {:.2f}".format(svc.score(X_test, y_test)))

Accuracy on training set: 0.58
Accuracy on test set: 0.65


In [30]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

svc = SVC()
svc.fit(X_train_scaled, y_train)

print("Accuracy on training set: {:.2f}".format(svc.score(X_train_scaled, y_train)))
print("Accuracy on test set: {:.2f}".format(svc.score(X_test_scaled, y_test)))

Accuracy on training set: 0.69
Accuracy on test set: 0.62


In [31]:
svc = SVC(C=1000)
svc.fit(X_train_scaled, y_train)

print("Accuracy on training set: {:.3f}".format(
    svc.score(X_train_scaled, y_train)))
print("Accuracy on test set: {:.3f}".format(svc.score(X_test_scaled, y_test)))

Accuracy on training set: 0.923
Accuracy on test set: 0.673


In [32]:
cols_5 = ['Jitter(local)','Shimmer (apq11)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_5]
y = data['Class information']

In [33]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=0)
from sklearn.svm import SVC
from sklearn import metrics
svc = SVC()
svc.fit(X_train, y_train)

print("Accuracy on training set: {:.2f}".format(svc.score(X_train, y_train)))
print("Accuracy on test set: {:.2f}".format(svc.score(X_test, y_test)))

Accuracy on training set: 0.58
Accuracy on test set: 0.65


In [34]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

svc = SVC()
svc.fit(X_train_scaled, y_train)

print("Accuracy on training set: {:.2f}".format(svc.score(X_train_scaled, y_train)))
print("Accuracy on test set: {:.2f}".format(svc.score(X_test_scaled, y_test)))

Accuracy on training set: 0.70
Accuracy on test set: 0.67


In [35]:
svc = SVC(C=1000)
svc.fit(X_train_scaled, y_train)

print("Accuracy on training set: {:.3f}".format(
    svc.score(X_train_scaled, y_train)))
print("Accuracy on test set: {:.3f}".format(svc.score(X_test_scaled, y_test)))

Accuracy on training set: 0.926
Accuracy on test set: 0.635


In [36]:
cols_6 = ['Jitter(local)','Shimmer (dda)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_6]
y = data['Class information']

In [37]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=0)
from sklearn.svm import SVC
from sklearn import metrics
svc = SVC()
svc.fit(X_train, y_train)

print("Accuracy on training set: {:.2f}".format(svc.score(X_train, y_train)))
print("Accuracy on test set: {:.2f}".format(svc.score(X_test, y_test)))

Accuracy on training set: 0.58
Accuracy on test set: 0.65


In [38]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

svc = SVC()
svc.fit(X_train_scaled, y_train)

print("Accuracy on training set: {:.2f}".format(svc.score(X_train_scaled, y_train)))
print("Accuracy on test set: {:.2f}".format(svc.score(X_test_scaled, y_test)))

Accuracy on training set: 0.69
Accuracy on test set: 0.62


In [39]:
svc = SVC(C=1000)
svc.fit(X_train_scaled, y_train)

print("Accuracy on training set: {:.3f}".format(
    svc.score(X_train_scaled, y_train)))
print("Accuracy on test set: {:.3f}".format(svc.score(X_test_scaled, y_test)))

Accuracy on training set: 0.924
Accuracy on test set: 0.615


In [45]:
cols_7 = ['Jitter(local, absolute)','Shimmer (local)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_7]
y = data['Class information']

In [46]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=0)
from sklearn.svm import SVC
from sklearn import metrics
svc = SVC()
svc.fit(X_train, y_train)

print("Accuracy on training set: {:.2f}".format(svc.score(X_train, y_train)))
print("Accuracy on test set: {:.2f}".format(svc.score(X_test, y_test)))

Accuracy on training set: 0.58
Accuracy on test set: 0.65


In [47]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

svc = SVC()
svc.fit(X_train_scaled, y_train)

print("Accuracy on training set: {:.2f}".format(svc.score(X_train_scaled, y_train)))
print("Accuracy on test set: {:.2f}".format(svc.score(X_test_scaled, y_test)))

Accuracy on training set: 0.69
Accuracy on test set: 0.62


In [48]:
svc = SVC(C=1000)
svc.fit(X_train_scaled, y_train)

print("Accuracy on training set: {:.3f}".format(
    svc.score(X_train_scaled, y_train)))
print("Accuracy on test set: {:.3f}".format(svc.score(X_test_scaled, y_test)))

Accuracy on training set: 0.933
Accuracy on test set: 0.673


In [49]:
cols_8 = ['Jitter(local, absolute)','Shimmer (local, db)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_8]
y = data['Class information']

In [50]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=0)
from sklearn.svm import SVC
from sklearn import metrics
svc = SVC()
svc.fit(X_train, y_train)

print("Accuracy on training set: {:.2f}".format(svc.score(X_train, y_train)))
print("Accuracy on test set: {:.2f}".format(svc.score(X_test, y_test)))

Accuracy on training set: 0.58
Accuracy on test set: 0.65


In [51]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

svc = SVC()
svc.fit(X_train_scaled, y_train)

print("Accuracy on training set: {:.2f}".format(svc.score(X_train_scaled, y_train)))
print("Accuracy on test set: {:.2f}".format(svc.score(X_test_scaled, y_test)))

Accuracy on training set: 0.70
Accuracy on test set: 0.60


In [52]:
svc = SVC(C=1000)
svc.fit(X_train_scaled, y_train)

print("Accuracy on training set: {:.3f}".format(
    svc.score(X_train_scaled, y_train)))
print("Accuracy on test set: {:.3f}".format(svc.score(X_test_scaled, y_test)))

Accuracy on training set: 0.921
Accuracy on test set: 0.538


In [53]:
cols_9 = ['Jitter(local, absolute)','Shimmer (apq3)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_9]
y = data['Class information']

In [54]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=0)
from sklearn.svm import SVC
from sklearn import metrics
svc = SVC()
svc.fit(X_train, y_train)

print("Accuracy on training set: {:.2f}".format(svc.score(X_train, y_train)))
print("Accuracy on test set: {:.2f}".format(svc.score(X_test, y_test)))

Accuracy on training set: 0.58
Accuracy on test set: 0.65


In [55]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

svc = SVC()
svc.fit(X_train_scaled, y_train)

print("Accuracy on training set: {:.2f}".format(svc.score(X_train_scaled, y_train)))
print("Accuracy on test set: {:.2f}".format(svc.score(X_test_scaled, y_test)))

Accuracy on training set: 0.68
Accuracy on test set: 0.60


In [56]:
svc = SVC(C=1000)
svc.fit(X_train_scaled, y_train)

print("Accuracy on training set: {:.3f}".format(
    svc.score(X_train_scaled, y_train)))
print("Accuracy on test set: {:.3f}".format(svc.score(X_test_scaled, y_test)))

Accuracy on training set: 0.935
Accuracy on test set: 0.596


In [57]:
cols_10 = ['Jitter(local, absolute)','Shimmer (apq5)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_10]
y = data['Class information']

In [63]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=0)
from sklearn.svm import SVC
from sklearn import metrics
svc = SVC()
svc.fit(X_train, y_train)

print("Accuracy on training set: {:.2f}".format(svc.score(X_train, y_train)))
print("Accuracy on test set: {:.2f}".format(svc.score(X_test, y_test)))

Accuracy on training set: 0.58
Accuracy on test set: 0.65


In [64]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

svc = SVC()
svc.fit(X_train_scaled, y_train)

print("Accuracy on training set: {:.2f}".format(svc.score(X_train_scaled, y_train)))
print("Accuracy on test set: {:.2f}".format(svc.score(X_test_scaled, y_test)))

Accuracy on training set: 0.68
Accuracy on test set: 0.63


In [65]:
svc = SVC(C=1000)
svc.fit(X_train_scaled, y_train)

print("Accuracy on training set: {:.3f}".format(
    svc.score(X_train_scaled, y_train)))
print("Accuracy on test set: {:.3f}".format(svc.score(X_test_scaled, y_test)))

Accuracy on training set: 0.925
Accuracy on test set: 0.692


In [105]:
cols_11 = ['Jitter(local, absolute)','Shimmer (apq11)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_11]
y = data['Class information']

In [67]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=0)
from sklearn.svm import SVC
from sklearn import metrics
svc = SVC()
svc.fit(X_train, y_train)

print("Accuracy on training set: {:.2f}".format(svc.score(X_train, y_train)))
print("Accuracy on test set: {:.2f}".format(svc.score(X_test, y_test)))

Accuracy on training set: 0.58
Accuracy on test set: 0.65


In [68]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

svc = SVC()
svc.fit(X_train_scaled, y_train)

print("Accuracy on training set: {:.2f}".format(svc.score(X_train_scaled, y_train)))
print("Accuracy on test set: {:.2f}".format(svc.score(X_test_scaled, y_test)))

Accuracy on training set: 0.70
Accuracy on test set: 0.65


In [71]:
svc = SVC(C=1000)
svc.fit(X_train_scaled, y_train)

print("Accuracy on training set: {:.3f}".format(
    svc.score(X_train_scaled, y_train)))
print("Accuracy on test set: {:.3f}".format(svc.score(X_test_scaled, y_test)))

Accuracy on training set: 0.928
Accuracy on test set: 0.615


In [106]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.65
Using MinMaxScaler
Accuracy on training set: 0.70
Accuracy on test set: 0.65
Using scaled values
Accuracy on training set: 0.928
Accuracy on test set: 0.615


In [72]:
cols_12 = ['Jitter(local, absolute)','Shimmer (dda)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_12]
y = data['Class information']

In [82]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)
from sklearn.svm import SVC
from sklearn import metrics
svc = SVC()
svc.fit(X_train, y_train)

print("Accuracy on training set: {:.2f}".format(svc.score(X_train, y_train)))
print("Accuracy on test set: {:.2f}".format(svc.score(X_test, y_test)))

Accuracy on training set: 0.58
Accuracy on test set: 0.61


In [83]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

svc = SVC()
svc.fit(X_train_scaled, y_train)

print("Accuracy on training set: {:.2f}".format(svc.score(X_train_scaled, y_train)))
print("Accuracy on test set: {:.2f}".format(svc.score(X_test_scaled, y_test)))

Accuracy on training set: 0.70
Accuracy on test set: 0.63


In [148]:
svc = SVC(C=1000)
svc.fit(X_train_scaled, y_train)

print("Accuracy on training set: {:.3f}".format(
    svc.score(X_train_scaled, y_train)))
print("Accuracy on test set: {:.3f}".format(svc.score(X_test_scaled, y_test)))

Accuracy on training set: 0.926
Accuracy on test set: 0.654


In [85]:
cols_13 = ['Jitter (rap)','Shimmer (local)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_13]
y = data['Class information']

In [89]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=0)
from sklearn.svm import SVC
from sklearn import metrics
svc = SVC()
svc.fit(X_train, y_train)

print("Accuracy on training set: {:.2f}".format(svc.score(X_train, y_train)))
print("Accuracy on test set: {:.2f}".format(svc.score(X_test, y_test)))

Accuracy on training set: 0.58
Accuracy on test set: 0.65


In [90]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

svc = SVC()
svc.fit(X_train_scaled, y_train)

print("Accuracy on training set: {:.2f}".format(svc.score(X_train_scaled, y_train)))
print("Accuracy on test set: {:.2f}".format(svc.score(X_test_scaled, y_test)))

Accuracy on training set: 0.70
Accuracy on test set: 0.62


In [91]:
svc = SVC(C=1000)
svc.fit(X_train_scaled, y_train)

print("Accuracy on training set: {:.3f}".format(
    svc.score(X_train_scaled, y_train)))
print("Accuracy on test set: {:.3f}".format(svc.score(X_test_scaled, y_test)))

Accuracy on training set: 0.926
Accuracy on test set: 0.654


In [99]:
def getSVM(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=0)
    from sklearn.svm import SVC
    from sklearn import metrics
    svc = SVC()
    svc.fit(X_train, y_train)
    print("Accuracy on training set: {:.2f}".format(svc.score(X_train, y_train)))
    print("Accuracy on test set: {:.2f}".format(svc.score(X_test, y_test)))
    
    #MinMaxScalar
    print("Using MinMaxScaler")
    from sklearn.preprocessing import MinMaxScaler

    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.fit_transform(X_test)

    svc = SVC()
    svc.fit(X_train_scaled, y_train)

    print("Accuracy on training set: {:.2f}".format(svc.score(X_train_scaled, y_train)))
    print("Accuracy on test set: {:.2f}".format(svc.score(X_test_scaled, y_test)))
    
    #Scale
    print("Using scaled values")
    svc = SVC(C=1000)
    svc.fit(X_train_scaled, y_train)

    print("Accuracy on training set: {:.3f}".format(
        svc.score(X_train_scaled, y_train)))
    print("Accuracy on test set: {:.3f}".format(svc.score(X_test_scaled, y_test)))

In [95]:
def getSVM_MinMaxScalar(X, y):
    from sklearn.preprocessing import MinMaxScaler

    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.fit_transform(X_test)

    svc = SVC()
    svc.fit(X_train_scaled, y_train)

    print("Accuracy on training set: {:.2f}".format(svc.score(X_train_scaled, y_train)))
    print("Accuracy on test set: {:.2f}".format(svc.score(X_test_scaled, y_test)))
    

In [96]:
def getSVM_scaled(X,y):
    svc = SVC(C=1000)
    svc.fit(X_train_scaled, y_train)

    print("Accuracy on training set: {:.3f}".format(
        svc.score(X_train_scaled, y_train)))
    print("Accuracy on test set: {:.3f}".format(svc.score(X_test_scaled, y_test)))

In [100]:
cols_14 = ['Jitter (rap)','Shimmer (local, db)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_14]
y = data['Class information']

In [101]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.65
Using MinMaxScaler
Accuracy on training set: 0.70
Accuracy on test set: 0.62
Using scaled values
Accuracy on training set: 0.919
Accuracy on test set: 0.615


In [109]:
cols_15 = ['Jitter (rap)','Shimmer (apq3)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_15]
y = data['Class information']

In [110]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.65
Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.60
Using scaled values
Accuracy on training set: 0.932
Accuracy on test set: 0.577


In [112]:
cols_16 = ['Jitter (rap)','Shimmer (apq5)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_16]
y = data['Class information']

In [113]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.65
Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.62
Using scaled values
Accuracy on training set: 0.928
Accuracy on test set: 0.692


In [114]:
cols_17 = ['Jitter (rap)','Shimmer (apq11)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_17]
y = data['Class information']

In [115]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.65
Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.69
Using scaled values
Accuracy on training set: 0.933
Accuracy on test set: 0.615


In [116]:
cols_18 = ['Jitter (rap)','Shimmer (dda)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_18]
y = data['Class information']

In [117]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.65
Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.60
Using scaled values
Accuracy on training set: 0.932
Accuracy on test set: 0.577


In [120]:
cols_19 = ['Jitter (ppq5)','Shimmer (local)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_19]
y = data['Class information']

In [121]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.65
Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.62
Using scaled values
Accuracy on training set: 0.918
Accuracy on test set: 0.712


In [122]:
cols_20 = ['Jitter (ppq5)','Shimmer (local, db)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_20]
y = data['Class information']

In [123]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.65
Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.63
Using scaled values
Accuracy on training set: 0.909
Accuracy on test set: 0.596


In [124]:
cols_21 = ['Jitter (ppq5)','Shimmer (apq3)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_21]
y = data['Class information']

In [125]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.65
Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.60
Using scaled values
Accuracy on training set: 0.918
Accuracy on test set: 0.692


In [126]:
cols_22 = ['Jitter (ppq5)','Shimmer (apq5)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_22]
y = data['Class information']

### Highest Prediction

In [127]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.65
Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.63
Using scaled values
Accuracy on training set: 0.917
Accuracy on test set: 0.731


In [128]:
cols_23 = ['Jitter (ppq5)','Shimmer (apq11)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_23]
y = data['Class information']

In [129]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.65
Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.71
Using scaled values
Accuracy on training set: 0.917
Accuracy on test set: 0.635


In [130]:
cols_24 = ['Jitter (ppq5)','Shimmer (dda)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_24]
y = data['Class information']

In [131]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.65
Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.60
Using scaled values
Accuracy on training set: 0.918
Accuracy on test set: 0.692


In [134]:
cols_25 = ['Jitter (ddp)','Shimmer (local)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_25]
y = data['Class information']

In [135]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.65
Using MinMaxScaler
Accuracy on training set: 0.70
Accuracy on test set: 0.62
Using scaled values
Accuracy on training set: 0.926
Accuracy on test set: 0.654


In [136]:
cols_26 = ['Jitter (ddp)','Shimmer (local, db)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_26]
y = data['Class information']

In [137]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.65
Using MinMaxScaler
Accuracy on training set: 0.70
Accuracy on test set: 0.62
Using scaled values
Accuracy on training set: 0.919
Accuracy on test set: 0.615


In [138]:
cols_27 = ['Jitter (ddp)','Shimmer (apq3)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_27]
y = data['Class information']

In [139]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.65
Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.60
Using scaled values
Accuracy on training set: 0.932
Accuracy on test set: 0.577


In [140]:
cols_28 = ['Jitter (ddp)','Shimmer (apq5)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_28]
y = data['Class information']

In [141]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.65
Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.62
Using scaled values
Accuracy on training set: 0.928
Accuracy on test set: 0.692


In [144]:
cols_29 = ['Jitter (ddp)','Shimmer (apq11)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_29]
y = data['Class information']

In [145]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.65
Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.69
Using scaled values
Accuracy on training set: 0.933
Accuracy on test set: 0.596


In [146]:
cols_30 = ['Jitter (ddp)','Shimmer (dda)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_30]
y = data['Class information']

In [147]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.65
Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.60
Using scaled values
Accuracy on training set: 0.932
Accuracy on test set: 0.577
