### Importing Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
import matplotlib.pyplot as plt 
plt.rc("font", size=14)
from sklearn.model_selection import train_test_split
import seaborn as sns
sns.set(style="white")
sns.set(style="whitegrid", color_codes=True)

### Get from CSV file

In [2]:
data = pd.read_csv(r'Parkinsons Train Data.csv', header=0)
data = data.dropna()
print(data.shape)
print(list(data.columns))

(1040, 28)
['Subject Id', 'Jitter(local)', 'Jitter(local, absolute)', 'Jitter (rap)', 'Jitter (ppq5)', 'Jitter (ddp)', 'Shimmer (local)', 'Shimmer (local, db)', 'Shimmer (apq3)', 'Shimmer (apq5)', 'Shimmer (apq11)', 'Shimmer (dda)', 'AC', 'NDH', 'HTM', 'Median Pitch', 'Mean Pitch', 'Standard deviation', 'Minimum pitch', 'Maximum pitch', 'Number of pulses', 'Number of periods', 'Mean period', 'Standard deviation of period', 'Fraction of locally unvoiced frames', 'Number of voice breaks', 'Degree of voice breaks', 'Class information']


### Viewing the dataset

In [3]:
data.head()

Unnamed: 0,Subject Id,Jitter(local),"Jitter(local, absolute)",Jitter (rap),Jitter (ppq5),Jitter (ddp),Shimmer (local),"Shimmer (local, db)",Shimmer (apq3),Shimmer (apq5),...,Minimum pitch,Maximum pitch,Number of pulses,Number of periods,Mean period,Standard deviation of period,Fraction of locally unvoiced frames,Number of voice breaks,Degree of voice breaks,Class information
0,1,1.488,9e-05,0.9,0.794,2.699,8.334,0.779,4.517,4.609,...,142.229,187.576,160,159,0.006065,0.000416,0.0,0,0.0,1
1,1,0.728,3.8e-05,0.353,0.376,1.059,5.864,0.642,2.058,3.18,...,159.515,234.505,170,169,0.005181,0.000403,2.247,0,0.0,1
2,1,1.22,7.4e-05,0.732,0.67,2.196,8.719,0.875,4.347,5.166,...,146.445,211.442,1431,1427,0.006071,0.000474,10.656,1,0.178,1
3,1,2.502,0.000123,1.156,1.634,3.469,13.513,1.273,5.263,8.771,...,182.713,220.23,94,92,0.00491,0.00032,0.0,0,0.0,1
4,1,3.509,0.000167,1.715,1.539,5.145,9.112,1.04,3.102,4.927,...,182.821,225.162,117,114,0.004757,0.00038,18.182,1,13.318,1


In [None]:
def get_class_counts(df):
    grp = df.groupby([''])

def get_class_proportions(df):
    class_counts = get_class_counts(df)
    return {val[0]: round(val[1]/df.shape[0], 4) for val in class_counts.items()}

In [None]:
def trainTestSets(X, y):
    train, test = train_test_split(X, y, test_size = 0.2 )
    
    train_class_proportion = get_class_proportions(train)
    

### ML method - SVM

In [11]:
def getSVM(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=66, test_size= 0.05, stratify=data['Class information'])
    from sklearn.svm import SVC
    from sklearn import metrics
    svc = SVC()
    svc.fit(X_train, y_train)
    print("Accuracy on training set: {:.4f}".format(svc.score(X_train, y_train)))
    print("Accuracy on test set: {:.4f}".format(svc.score(X_test, y_test)))
    
    #MinMaxScalar
    print("\nUsing MinMaxScaler")
    from sklearn.preprocessing import MinMaxScaler

    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.fit_transform(X_test)

    svc = SVC()
    svc.fit(X_train_scaled, y_train)

    print("Accuracy on training set: {:.4f}".format(svc.score(X_train_scaled, y_train)))
    print("Accuracy on test set: {:.4f}".format(svc.score(X_test_scaled, y_test)))
    
    #Scale
    print("\nUsing scaled values")
    svc = SVC(C=1000)
    svc.fit(X_train_scaled, y_train)

    print("Accuracy on training set: {:.4f}".format(
        svc.score(X_train_scaled, y_train)))
    print("Accuracy on test set: {:.4f}".format(svc.score(X_test_scaled, y_test)))

### Selecting 'y' variable

In [8]:
y = data['Class information']

### Building the model using the entire data

In [None]:
cols_full = ['Jitter(local)', 'Jitter(local, absolute)',
       'Jitter (rap)', 'Jitter (ppq5)', 'Jitter (ddp)', 'Shimmer (local)',
       'Shimmer (local, db)', 'Shimmer (apq3)', 'Shimmer (apq5)',
       'Shimmer (apq11)', 'Shimmer (dda)', 'AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_full]

In [7]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.71
Accuracy on test set: 0.71

Using scaled values
Accuracy on training set: 0.964
Accuracy on test set: 0.654


### Using RFE to select the features

In [14]:
features = data.loc[:, data.columns != 'Class information'].values[:, 1:]
labels = data.loc[:, 'Class information'].values

#analyzing the variables that won't affect the model
from sklearn import datasets
from sklearn.feature_selection import RFE
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn import metrics

svmModel = SVC()
rfe = RFE(svmModel)
rfe = rfe.fit(features, labels)
print(rfe.support_)
print(rfe.ranking_)

RuntimeError: The classifier does not expose "coef_" or "feature_importances_" attributes

### Selecting the columns for analysis (Choosing 1 from Jitter and 1 from Shimmer)

In [15]:
cols_1 = ['Jitter(local)','Shimmer (local)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_1]

In [16]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.71

Using scaled values
Accuracy on training set: 0.928
Accuracy on test set: 0.615


In [17]:
cols_2 = ['Jitter(local)','Shimmer (local, db)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_2]

In [18]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.70
Accuracy on test set: 0.69

Using scaled values
Accuracy on training set: 0.914
Accuracy on test set: 0.635


In [19]:
cols_3 = ['Jitter(local)','Shimmer (apq3)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_3]

In [20]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.68
Accuracy on test set: 0.69

Using scaled values
Accuracy on training set: 0.921
Accuracy on test set: 0.596


In [21]:
cols_4 = ['Jitter(local)','Shimmer (apq5)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_4]

In [22]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.68
Accuracy on test set: 0.67

Using scaled values
Accuracy on training set: 0.912
Accuracy on test set: 0.577


In [14]:
cols_5 = ['Jitter(local)','Shimmer (apq11)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_5]

In [15]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.69

Using scaled values
Accuracy on training set: 0.919
Accuracy on test set: 0.596


In [16]:
cols_6 = ['Jitter(local)','Shimmer (dda)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_6]

In [17]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.68
Accuracy on test set: 0.69

Using scaled values
Accuracy on training set: 0.921
Accuracy on test set: 0.596


In [18]:
cols_7 = ['Jitter(local, absolute)','Shimmer (local)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_7]

In [19]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.67

Using scaled values
Accuracy on training set: 0.928
Accuracy on test set: 0.538


In [20]:
cols_8 = ['Jitter(local, absolute)','Shimmer (local, db)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_8]

In [21]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.70
Accuracy on test set: 0.69

Using scaled values
Accuracy on training set: 0.916
Accuracy on test set: 0.558


In [22]:
cols_9 = ['Jitter(local, absolute)','Shimmer (apq3)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_9]

In [23]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.68
Accuracy on test set: 0.69

Using scaled values
Accuracy on training set: 0.920
Accuracy on test set: 0.558


In [24]:
cols_10 = ['Jitter(local, absolute)','Shimmer (apq5)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_10]

In [25]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.68
Accuracy on test set: 0.67

Using scaled values
Accuracy on training set: 0.916
Accuracy on test set: 0.558


In [26]:
cols_11 = ['Jitter(local, absolute)','Shimmer (apq11)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_11]

In [27]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.67

Using scaled values
Accuracy on training set: 0.924
Accuracy on test set: 0.615


In [28]:
cols_12 = ['Jitter(local, absolute)','Shimmer (dda)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_12]

In [29]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.68
Accuracy on test set: 0.69

Using scaled values
Accuracy on training set: 0.920
Accuracy on test set: 0.558


In [30]:
cols_13 = ['Jitter (rap)','Shimmer (local)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_13]

In [31]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.67

Using scaled values
Accuracy on training set: 0.922
Accuracy on test set: 0.596


In [32]:
cols_14 = ['Jitter (rap)','Shimmer (local, db)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_14]

In [33]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.65

Using scaled values
Accuracy on training set: 0.918
Accuracy on test set: 0.615


In [34]:
cols_15 = ['Jitter (rap)','Shimmer (apq3)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_15]

In [35]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.67

Using scaled values
Accuracy on training set: 0.924
Accuracy on test set: 0.577


In [36]:
cols_16 = ['Jitter (rap)','Shimmer (apq5)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_16]

In [37]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.68
Accuracy on test set: 0.73

Using scaled values
Accuracy on training set: 0.915
Accuracy on test set: 0.654


In [38]:
cols_17 = ['Jitter (rap)','Shimmer (apq11)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_17]

In [39]:
getSVM(X,y)

Accuracy on training set: 0.58
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.68
Accuracy on test set: 0.69

Using scaled values
Accuracy on training set: 0.919
Accuracy on test set: 0.615


In [40]:
cols_18 = ['Jitter (rap)','Shimmer (dda)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_18]

In [41]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.67

Using scaled values
Accuracy on training set: 0.924
Accuracy on test set: 0.577


In [42]:
cols_19 = ['Jitter (ppq5)','Shimmer (local)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_19]

In [43]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.71

Using scaled values
Accuracy on training set: 0.913
Accuracy on test set: 0.615


In [44]:
cols_20 = ['Jitter (ppq5)','Shimmer (local, db)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_20]

In [45]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.73

Using scaled values
Accuracy on training set: 0.912
Accuracy on test set: 0.615


In [46]:
cols_21 = ['Jitter (ppq5)','Shimmer (apq3)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_21]

In [47]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.73

Using scaled values
Accuracy on training set: 0.915
Accuracy on test set: 0.596


In [48]:
cols_22 = ['Jitter (ppq5)','Shimmer (apq5)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_22]

In [49]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.68
Accuracy on test set: 0.71

Using scaled values
Accuracy on training set: 0.904
Accuracy on test set: 0.635


In [50]:
cols_23 = ['Jitter (ppq5)','Shimmer (apq11)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_23]

In [51]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.68
Accuracy on test set: 0.71

Using scaled values
Accuracy on training set: 0.913
Accuracy on test set: 0.635


In [52]:
cols_24 = ['Jitter (ppq5)','Shimmer (dda)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_24]

In [53]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.73

Using scaled values
Accuracy on training set: 0.915
Accuracy on test set: 0.596


In [54]:
cols_25 = ['Jitter (ddp)','Shimmer (local)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_25]
y = data['Class information']

In [55]:
getSVM(X, y)

Accuracy on training set: 0.59
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.67

Using scaled values
Accuracy on training set: 0.923
Accuracy on test set: 0.596


In [56]:
cols_26 = ['Jitter (ddp)','Shimmer (local, db)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_26]

In [57]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.65

Using scaled values
Accuracy on training set: 0.918
Accuracy on test set: 0.615


In [58]:
cols_27 = ['Jitter (ddp)','Shimmer (apq3)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_27]

In [59]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.67

Using scaled values
Accuracy on training set: 0.924
Accuracy on test set: 0.577


In [60]:
cols_28 = ['Jitter (ddp)','Shimmer (apq5)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_28]

In [61]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.68
Accuracy on test set: 0.73

Using scaled values
Accuracy on training set: 0.916
Accuracy on test set: 0.654


In [62]:
cols_29 = ['Jitter (ddp)','Shimmer (apq11)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_29]

In [63]:
getSVM(X, y)

Accuracy on training set: 0.58
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.68
Accuracy on test set: 0.69

Using scaled values
Accuracy on training set: 0.919
Accuracy on test set: 0.615


In [64]:
cols_30 = ['Jitter (ddp)','Shimmer (dda)','AC', 'NDH', 'HTM',
       'Median Pitch', 'Mean Pitch', 'Standard deviation',
       'Minimum pitch', 'Maximum pitch', 'Number of pulses',
       'Number of periods', 'Mean period', 'Standard deviation of period',
       'Fraction of locally unvoiced frames', 'Number of voice breaks',
       'Degree of voice breaks']
X = data[cols_30]
y = data['Class information']

In [65]:
getSVM(X, y)

Accuracy on training set: 0.59
Accuracy on test set: 0.56

Using MinMaxScaler
Accuracy on training set: 0.69
Accuracy on test set: 0.67

Using scaled values
Accuracy on training set: 0.924
Accuracy on test set: 0.577
