In [1]:
%matplotlib inline

import pandas as pd
import numpy as np
import seaborn as sns

from imblearn.over_sampling import SMOTE #for SMOTE -> install package using: conda install -c conda-forge imbalanced-learn
from imblearn.over_sampling import RandomOverSampler 
from scipy import stats, integrate
import matplotlib.pyplot as plt
import ggplot
import scipy
from sklearn.linear_model import LogisticRegression 
from sklearn.model_selection import train_test_split 
from sklearn import metrics
from sklearn.svm import LinearSVC
from sklearn.svm import NuSVC
#from sklearn.svm import SVR #just Testing for regression on other continous data of dataset
from sklearn.decomposition import PCA, NMF
from sklearn.preprocessing import OneHotEncoder
from sklearn.datasets import load_digits
from sklearn.feature_selection import SelectKBest, chi2


import pylab as pl
from itertools import cycle
from sklearn import cross_validation
from sklearn.svm import SVC

features_list = ['age','sex','cp','trestbps','chol','fbs','restecg','thalach','exang','oldpeak','slope','ca','thal','class']
dataset1=pd.read_csv("data/Heart_Disease_Data.csv")
#dataset1=pd.read_csv("Balanced_Data.csv")

You can access Timestamp as pandas.Timestamp
  pd.tslib.Timestamp,
  from pandas.lib import Timestamp
  from pandas.core import datetools


In [2]:
# SVM requires that each data instance is represented as a vector of real numbers
# If you already have numeric dtypes (int8|16|32|64,float64,boolean) you can convert it to another "numeric" dtype using Pandas .astype() method. Demo: In [90]: df = pd.DataFrame(np.random.randint(10**5,10**7,(5,3)),columns=list('abc'), dtype=np.int64) In [91]: df Out[91]: a b c 0 9059440 9590567 2076918 1 5861102 4566089 1947323 2 6636568 162770 2487991 3 6794572 5236903 5628779 4 470121 4044395 4546794 In [92]: df.dtypes Out[92]: a int64 b int64 c int64 dtype: object In [93]: df['a'] = df['a'].astype(float) In [94]: df.dtypes Out[94]: a float64 b int64 c int64 dtype: object It won't work for object (string) dtypes, that can't be converted to numbers: In [95]: df.loc[1, 'b'] = 'XXXXXX' In [96]: df Out[96]:...
# Just make everything numeric for ease, later we will convert to ordinal/one-hot encoding.
dataset1 = dataset1.convert_objects(convert_numeric=True)
dataset1 = dataset1.astype('float')

For all other conversions use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  after removing the cwd from sys.path.


#### count missing value in terms of colunms #######

In [3]:
#dataset.shape[0] - dataset.count()
print(dataset1.isnull().any())
dataset1 = dataset1.replace('?', np.nan)

age               False
sex               False
cp                False
trestbps          False
chol              False
fbs               False
restecg           False
thalach           False
exang             False
oldpeak           False
slop              False
ca                 True
thal               True
pred_attribute    False
dtype: bool


### Preliminary Processing

In [4]:
# based on https://pdfs.semanticscholar.org/daa0/f01f96a89fcfc5f41a2da67fb2a8966900ab.pdf 
# these features, based on reading, may be important but have to be confirmed too by statistical methods:
Genetic_Based_Decision = dataset1[['cp','trestbps', 'restecg', 'thalach', 'ca', 'thal']]

In [5]:
# Two variables are discrete/ordinal: ca (number of major vessels colored by fluoroscopy) and num (diagnosis of heart disease)
# Three can be directly viewed as 1 hot (because binary): 'sex':'male', 'fbs':'fasting blood sugar', 'exang':'exercise induced angina'

# which leaves 4 for one-hot encoding. problem is that the values aren't unique, so have to manually
# make extra columns:

dataset1["cp"] = dataset1["cp"].replace([1,2,3,4], ["typical angina", "atypical angina", "non-angina", "asymptomatic angina"])
dataset1["restecg"] = dataset1["restecg"].replace([0,1,2], ["normalresecg", "ST-T wave abnormality", "left ventricular hypertrophy"])
dataset1["slop"] = dataset1["slop"].replace([1,2,3], ["upsloping", "flat", "downsloping"])
dataset1["thal"] = dataset1["thal"].replace([3,6,7], ["normalthal", "fixed defect", "reversible defect"])

x = dataset1[['cp', 'restecg', 'slop', 'thal']]
for column in ['cp', 'restecg', 'slop', 'thal']:
    one_hot = pd.get_dummies(dataset1[column])
    dataset1 = dataset1.drop(column, axis=1)
    dataset1 = dataset1.join(one_hot)

In [6]:
### Extract features and labels from dataset for local testing:
dataset1.dropna(inplace=True, axis=0, how="any")
Y=dataset1["pred_attribute"]
dataset1 = dataset1.drop("pred_attribute", axis=1)
X=dataset1

In [7]:
# evaluate the model by splitting into train and test sets  #Edit by ryan, we aim to do 3 traditional sets in the end, this first split is 80/20
features_train, features_test, labels_train, labels_test = train_test_split(X, Y, test_size=0.3, random_state=42)

In [8]:
features_train_df = pd.DataFrame(features_train)
#features_train_df.to_csv('features_train.csv', index=False)

features_test_df = pd.DataFrame(features_test)
#features_test_df.to_csv('features_test.csv', index=False)

labels_train_df = pd.DataFrame(labels_train)
labels_train_df.to_csv('data/labels_train.csv', index=False)

labels_test_df = pd.DataFrame(labels_test)
labels_test_df.to_csv('data/labels_test.csv', index=False)

# Modelling And Training

We have an relatively small dataset. Therefore, we should do our feature selection based on a cross-
validated set. 

## Standard Scaler (by David)
SVC Models are only any good when the data is scaled. Lets scale the data and build the model

In [9]:
from sklearn.preprocessing import MinMaxScaler
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing.data import QuantileTransformer

scaler = MinMaxScaler()
Standard_scaler = StandardScaler()
Robust_scaler = preprocessing.RobustScaler(quantile_range=(25, 75))
Quantile_scalar = preprocessing.QuantileTransformer(output_distribution='normal')

features_train = Standard_scaler.fit_transform(features_train)
features_test = Standard_scaler.transform(features_test)


features_train

array([[-0.8045168 ,  0.69445626, -1.08367795, ..., -0.23570226,
         0.93062959, -0.81975606],
       [ 0.53527611,  0.69445626,  2.08476008, ..., -0.23570226,
        -1.07454138,  1.21987509],
       [ 0.98187375, -1.43997549, -1.30219091, ..., -0.23570226,
         0.93062959, -0.81975606],
       ..., 
       [ 0.53527611,  0.69445626,  0.44591283, ..., -0.23570226,
        -1.07454138,  1.21987509],
       [-0.9161662 ,  0.69445626,  0.44591283, ..., -0.23570226,
        -1.07454138,  1.21987509],
       [ 0.3119773 , -1.43997549, -0.20962608, ..., -0.23570226,
         0.93062959, -0.81975606]])

# Balancing only on the training set, not the validation set
Unfortunately SMOTE categorial implementation is not really implemented
We will do simple oversampling -> Done using external program SPSS 

In [65]:
# Export data to files for external program to balance the data

#without features selection
merged = np.concatenate((features_train, labels_train.reshape((-1, 1))), axis=1)
merged_df = pd.DataFrame(merged) 
merged_df.to_csv("data/train_NoEng_NB.csv", index=False)



#features_train_009
 
#features_train_04 

#features_train_009_pca 

#features_train_04_pca

  after removing the cwd from sys.path.


In [72]:
# Import balanced train data back

#without features selection

df=pd.read_csv("data/train_noEng_Balanced.csv")

#df.drop(df.columns[[-1,]], axis=1, inplace=True)
test_df = df.iloc[:, 22:23]
df.drop(df.columns[[-1,]], axis=1, inplace=True)

features_train = df.as_matrix

#test_df = df.iloc[:, 22:23]
#df = df.iloc[:, :-1]

df

features_test



test_df.to_csv("data/train.csv", index=False)
#features_train_009
 
#features_train_04 

#features_train_009_pca 

#features_train_04_pca

In [73]:
features_train

<bound method NDFrame.as_matrix of             0         1         2         3         4         5         6  \
0   -0.804517  0.694456 -1.083678 -0.798812 -0.401386 -0.233713 -0.748132   
1    0.535276  0.694456  2.084760  1.477208 -0.401386 -0.361359  1.336663   
2    0.535276  0.694456  2.084760  1.477208 -0.401386 -0.361359  1.336663   
3    0.535276  0.694456  2.084760  1.477208 -0.401386 -0.361359  1.336663   
4    0.535276  0.694456  2.084760  1.477208 -0.401386 -0.361359  1.336663   
5    0.981874 -1.439975 -1.302191  0.413822 -0.401386  0.872555  1.336663   
6    0.981874 -1.439975 -1.302191  0.413822 -0.401386  0.872555  1.336663   
7    0.311977  0.694456  0.992195  0.544413 -0.401386 -1.552725  1.336663   
8    0.311977  0.694456  0.992195  0.544413 -0.401386 -1.552725  1.336663   
9   -0.692867  0.694456 -1.192934 -0.332414 -0.401386  0.830006 -0.748132   
10  -0.692867  0.694456 -1.192934 -0.332414 -0.401386  0.830006 -0.748132   
11  -0.692867  0.694456 -1.192934 -0.3324

In [70]:
from sklearn.metrics import accuracy_score

from sklearn.cross_validation import StratifiedShuffleSplit
from sklearn.cross_validation import train_test_split
from sklearn.cross_validation import KFold
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
#from sklearn import grid_search
#from sklearn.grid_search import GridSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn.metrics import mean_squared_error
import seaborn as sns
from scipy.stats import reciprocal, uniform


def checkmetrics(pred, labels_test, name):
    sns.set()
    print('The accuracy of ', name, 'is: ', accuracy_score(pred, labels_test))
    matrix = confusion_matrix(labels_test, pred)
    ax = sns.heatmap(matrix, annot=True, fmt="d", cmap="Blues")
    print(ax)
    print(classification_report(pred, labels_test))

## Baseline Appoarch - Default Params - 5 fold X-validation

In [71]:
#svm.SVC  C-Support Vector Classification.
parameters ={
#    'C': [0.1,1], 
#    'gamma': [0.00001,0.01,0.05,0.1,0.2,0.5,1,2,3,4,5],
#    "class_weight": ['balanced', None]
}
SVM = svm.SVC()

#param_distributions = {"gamma": reciprocal(0.001, 0.1), "C": uniform(1, 10)}
#grid_search_cv = RandomizedSearchCV(SVM, param_distributions, cv=5,n_jobs=-1, return_train_score=True, refit=True,verbose=1)

grid_search_cv = GridSearchCV(SVM, parameters, cv=5,n_jobs=-1, return_train_score=True, refit=True,verbose=1)
grid_search_cv.fit(features_train, labels_train)
resultsdf=pd.DataFrame(grid_search_cv.cv_results_)
print("The train score:", str(grid_search_cv.score(features_train, labels_train)), "with parameters:", grid_search_cv.best_params_)
pred = grid_search_cv.best_estimator_.predict(features_test)

checkmetrics(pred, labels_test, 'C-Support Vector Classification')

TypeError: Singleton array array(<bound method NDFrame.as_matrix of             0         1         2         3         4         5         6  \
0   -0.804517  0.694456 -1.083678 -0.798812 -0.401386 -0.233713 -0.748132   
1    0.535276  0.694456  2.084760  1.477208 -0.401386 -0.361359  1.336663   
2    0.535276  0.694456  2.084760  1.477208 -0.401386 -0.361359  1.336663   
3    0.535276  0.694456  2.084760  1.477208 -0.401386 -0.361359  1.336663   
4    0.535276  0.694456  2.084760  1.477208 -0.401386 -0.361359  1.336663   
5    0.981874 -1.439975 -1.302191  0.413822 -0.401386  0.872555  1.336663   
6    0.981874 -1.439975 -1.302191  0.413822 -0.401386  0.872555  1.336663   
7    0.311977  0.694456  0.992195  0.544413 -0.401386 -1.552725  1.336663   
8    0.311977  0.694456  0.992195  0.544413 -0.401386 -1.552725  1.336663   
9   -0.692867  0.694456 -1.192934 -0.332414 -0.401386  0.830006 -0.748132   
10  -0.692867  0.694456 -1.192934 -0.332414 -0.401386  0.830006 -0.748132   
11  -0.692867  0.694456 -1.192934 -0.332414 -0.401386  0.830006 -0.748132   
12   1.205173  0.694456  0.172772  0.133983 -0.401386 -0.914493 -0.748132   
13   1.205173  0.694456  0.172772  0.133983 -0.401386 -0.914493 -0.748132   
14   1.205173  0.694456  0.172772  0.133983 -0.401386 -0.914493 -0.748132   
15   1.205173  0.694456  0.172772  0.133983 -0.401386 -0.914493 -0.748132   
16   0.758575  0.694456  0.445913 -0.742844 -0.401386 -0.446457  1.336663   
17   0.758575  0.694456  0.445913 -0.742844 -0.401386 -0.446457  1.336663   
18   0.758575  0.694456  0.445913 -0.742844 -0.401386 -0.446457  1.336663   
19   1.316822  0.694456  1.538478 -0.351070 -0.401386 -0.446457 -0.748132   
20  -0.022971  0.694456  0.992195 -0.276447 -0.401386  0.702360 -0.748132   
21  -2.255959  0.694456 -0.755908 -1.209242 -0.401386  1.085299 -0.748132   
22  -0.916166  0.694456  0.992195 -0.295102 -0.401386 -0.063518 -0.748132   
23  -0.916166  0.694456  0.992195 -0.295102 -0.401386 -0.063518 -0.748132   
24  -0.916166  0.694456  0.992195 -0.295102 -0.401386 -0.063518 -0.748132   
25   0.646926  0.694456 -0.810537 -0.313758  2.491364  0.489616  1.336663   
26   0.646926  0.694456 -0.810537 -0.313758  2.491364  0.489616  1.336663   
27   0.646926  0.694456 -0.810537 -0.313758  2.491364  0.489616  1.336663   
28   0.646926  0.694456 -0.810537 -0.313758  2.491364  0.489616  1.336663   
29   0.870224 -1.439975  0.445913  2.745810 -0.401386  0.361970 -0.748132   
..        ...       ...       ...       ...       ...       ...       ...   
517  0.981874 -1.439975  0.992195  2.988336 -0.401386  0.234324 -0.748132   
518  0.981874 -1.439975  0.992195  2.988336 -0.401386  0.234324 -0.748132   
519  0.981874 -1.439975  0.992195  2.988336 -0.401386  0.234324 -0.748132   
520  0.981874 -1.439975  0.992195  2.988336 -0.401386  0.234324 -0.748132   
521  0.981874 -1.439975  0.992195  2.988336 -0.401386  0.234324 -0.748132   
522  0.981874 -1.439975  0.992195  2.988336 -0.401386  0.234324 -0.748132   
523  0.981874 -1.439975  0.992195  2.988336 -0.401386  0.234324 -0.748132   
524  0.981874 -1.439975  0.992195  2.988336 -0.401386  0.234324 -0.748132   
525  0.981874 -1.439975  0.992195  2.988336 -0.401386  0.234324 -0.748132   
526  0.311977 -1.439975 -0.100370 -0.201823 -0.401386  1.085299 -0.748132   
527  0.311977 -1.439975 -0.100370 -0.201823 -0.401386  1.085299 -0.748132   
528  0.311977 -1.439975 -0.100370 -0.201823 -0.401386  1.085299 -0.748132   
529  1.093523  0.694456 -1.192934 -0.668220 -0.401386 -0.191164  1.336663   
530  1.651770  0.694456  0.445913  0.133983 -0.401386 -0.106067 -0.748132   
531  1.651770  0.694456  0.445913  0.133983 -0.401386 -0.106067 -0.748132   
532  1.651770  0.694456  0.445913  0.133983 -0.401386 -0.106067 -0.748132   
533  1.651770  0.694456  0.445913  0.133983 -0.401386 -0.106067 -0.748132   
534  1.428471  0.694456 -0.373511  0.133983  2.491364  0.617263 -0.748132   
535  1.428471  0.694456 -0.373511  0.133983  2.491364  0.617263 -0.748132   
536  1.428471  0.694456 -0.373511  0.133983  2.491364  0.617263 -0.748132   
537  1.428471  0.694456 -0.373511  0.133983  2.491364  0.617263 -0.748132   
538  0.535276  0.694456  0.445913 -1.302521 -0.401386  0.574714  1.336663   
539  0.535276  0.694456  0.445913 -1.302521 -0.401386  0.574714  1.336663   
540  0.535276  0.694456  0.445913 -1.302521 -0.401386  0.574714  1.336663   
541  0.535276  0.694456  0.445913 -1.302521 -0.401386  0.574714  1.336663   
542 -0.916166  0.694456  0.445913  1.197370 -0.401386 -1.212335  1.336663   
543 -0.916166  0.694456  0.445913  1.197370 -0.401386 -1.212335  1.336663   
544 -0.916166  0.694456  0.445913  1.197370 -0.401386 -1.212335  1.336663   
545 -0.916166  0.694456  0.445913  1.197370 -0.401386 -1.212335  1.336663   
546  0.311977 -1.439975 -0.209626  1.048122 -0.401386  0.447067 -0.748132   

            7         8         9    ...           12        13        14  \
0   -0.792141 -0.705669  1.004796    ...    -0.287926 -0.069338 -0.985747   
1    2.044514 -0.705669  1.004796    ...    -0.287926 -0.069338  1.014459   
2    2.044514 -0.705669  1.004796    ...    -0.287926 -0.069338  1.014459   
3    2.044514 -0.705669  1.004796    ...    -0.287926 -0.069338  1.014459   
4    2.044514 -0.705669  1.004796    ...    -0.287926 -0.069338  1.014459   
5    0.669166  1.546008  1.004796    ...    -0.287926 -0.069338 -0.985747   
6    0.669166  1.546008  1.004796    ...    -0.287926 -0.069338 -0.985747   
7   -0.362345  0.420169  1.004796    ...    -0.287926 -0.069338  1.014459   
8   -0.362345  0.420169  1.004796    ...    -0.287926 -0.069338  1.014459   
9   -0.018508 -0.705669 -0.995227    ...    -0.287926 -0.069338 -0.985747   
10  -0.018508 -0.705669 -0.995227    ...    -0.287926 -0.069338 -0.985747   
11  -0.018508 -0.705669 -0.995227    ...    -0.287926 -0.069338 -0.985747   
12   1.528759  0.420169  1.004796    ...    -0.287926 -0.069338  1.014459   
13   1.528759  0.420169  1.004796    ...    -0.287926 -0.069338  1.014459   
14   1.528759  0.420169  1.004796    ...    -0.287926 -0.069338  1.014459   
15   1.528759  0.420169  1.004796    ...    -0.287926 -0.069338  1.014459   
16   0.755125  0.420169  1.004796    ...    -0.287926 -0.069338  1.014459   
17   0.755125  0.420169  1.004796    ...    -0.287926 -0.069338  1.014459   
18   0.755125  0.420169  1.004796    ...    -0.287926 -0.069338  1.014459   
19   1.098962 -0.705669  1.004796    ...    -0.287926 -0.069338  1.014459   
20   0.497248 -0.705669 -0.995227    ...    -0.287926 -0.069338  1.014459   
21  -0.878101 -0.705669 -0.995227    ...     3.473111 -0.069338  1.014459   
22   2.216433 -0.705669 -0.995227    ...    -0.287926 -0.069338 -0.985747   
23   2.216433 -0.705669 -0.995227    ...    -0.287926 -0.069338 -0.985747   
24   2.216433 -0.705669 -0.995227    ...    -0.287926 -0.069338 -0.985747   
25   0.325329  1.546008  1.004796    ...    -0.287926 -0.069338 -0.985747   
26   0.325329  1.546008  1.004796    ...    -0.287926 -0.069338 -0.985747   
27   0.325329  1.546008  1.004796    ...    -0.287926 -0.069338 -0.985747   
28   0.325329  1.546008  1.004796    ...    -0.287926 -0.069338 -0.985747   
29   0.153411 -0.705669  1.004796    ...    -0.287926 -0.069338  1.014459   
..        ...       ...       ...    ...          ...       ...       ...   
517  2.560270  2.671846  1.004796    ...    -0.287926 -0.069338  1.014459   
518  2.560270  2.671846  1.004796    ...    -0.287926 -0.069338  1.014459   
519  2.560270  2.671846  1.004796    ...    -0.287926 -0.069338  1.014459   
520  2.560270  2.671846  1.004796    ...    -0.287926 -0.069338  1.014459   
521  2.560270  2.671846  1.004796    ...    -0.287926 -0.069338  1.014459   
522  2.560270  2.671846  1.004796    ...    -0.287926 -0.069338  1.014459   
523  2.560270  2.671846  1.004796    ...    -0.287926 -0.069338  1.014459   
524  2.560270  2.671846  1.004796    ...    -0.287926 -0.069338  1.014459   
525  2.560270  2.671846  1.004796    ...    -0.287926 -0.069338  1.014459   
526 -0.878101  0.420169 -0.995227    ...    -0.287926 -0.069338  1.014459   
527 -0.878101  0.420169 -0.995227    ...    -0.287926 -0.069338  1.014459   
528 -0.878101  0.420169 -0.995227    ...    -0.287926 -0.069338  1.014459   
529  0.669166 -0.705669 -0.995227    ...     3.473111 -0.069338  1.014459   
530  0.841085  2.671846 -0.995227    ...    -0.287926 -0.069338  1.014459   
531  0.841085  2.671846 -0.995227    ...    -0.287926 -0.069338  1.014459   
532  0.841085  2.671846 -0.995227    ...    -0.287926 -0.069338  1.014459   
533  0.841085  2.671846 -0.995227    ...    -0.287926 -0.069338  1.014459   
534 -0.706182  1.546008  1.004796    ...    -0.287926 -0.069338 -0.985747   
535 -0.706182  1.546008  1.004796    ...    -0.287926 -0.069338 -0.985747   
536 -0.706182  1.546008  1.004796    ...    -0.287926 -0.069338 -0.985747   
537 -0.706182  1.546008  1.004796    ...    -0.287926 -0.069338 -0.985747   
538 -0.878101  0.420169  1.004796    ...    -0.287926 -0.069338 -0.985747   
539 -0.878101  0.420169  1.004796    ...    -0.287926 -0.069338 -0.985747   
540 -0.878101  0.420169  1.004796    ...    -0.287926 -0.069338 -0.985747   
541 -0.878101  0.420169  1.004796    ...    -0.287926 -0.069338 -0.985747   
542  0.669166  1.546008  1.004796    ...    -0.287926 -0.069338 -0.985747   
543  0.669166  1.546008  1.004796    ...    -0.287926 -0.069338 -0.985747   
544  0.669166  1.546008  1.004796    ...    -0.287926 -0.069338 -0.985747   
545  0.669166  1.546008  1.004796    ...    -0.287926 -0.069338 -0.985747   
546 -0.878101  0.420169  1.004796    ...    -0.287926 -0.069338  1.014459   

           15        16        17        18        19        20        21  
0    0.995227 -0.278064 -0.912871  1.054093 -0.235702  0.930630 -0.819756  
1   -1.004796  3.596294 -0.912871 -0.948683 -0.235702 -1.074541  1.219875  
2   -1.004796  3.596294 -0.912871 -0.948683 -0.235702 -1.074541  1.219875  
3   -1.004796  3.596294 -0.912871 -0.948683 -0.235702 -1.074541  1.219875  
4   -1.004796  3.596294 -0.912871 -0.948683 -0.235702 -1.074541  1.219875  
5    0.995227 -0.278064  1.095445 -0.948683 -0.235702  0.930630 -0.819756  
6    0.995227 -0.278064  1.095445 -0.948683 -0.235702  0.930630 -0.819756  
7   -1.004796 -0.278064  1.095445 -0.948683  4.242641 -1.074541 -0.819756  
8   -1.004796 -0.278064  1.095445 -0.948683  4.242641 -1.074541 -0.819756  
9    0.995227  3.596294 -0.912871 -0.948683 -0.235702 -1.074541  1.219875  
10   0.995227  3.596294 -0.912871 -0.948683 -0.235702 -1.074541  1.219875  
11   0.995227  3.596294 -0.912871 -0.948683 -0.235702 -1.074541  1.219875  
12  -1.004796 -0.278064  1.095445 -0.948683 -0.235702 -1.074541  1.219875  
13  -1.004796 -0.278064  1.095445 -0.948683 -0.235702 -1.074541  1.219875  
14  -1.004796 -0.278064  1.095445 -0.948683 -0.235702 -1.074541  1.219875  
15  -1.004796 -0.278064  1.095445 -0.948683 -0.235702 -1.074541  1.219875  
16  -1.004796 -0.278064 -0.912871  1.054093 -0.235702 -1.074541  1.219875  
17  -1.004796 -0.278064 -0.912871  1.054093 -0.235702 -1.074541  1.219875  
18  -1.004796 -0.278064 -0.912871  1.054093 -0.235702 -1.074541  1.219875  
19  -1.004796 -0.278064 -0.912871  1.054093  4.242641 -1.074541 -0.819756  
20  -1.004796 -0.278064 -0.912871  1.054093 -0.235702 -1.074541  1.219875  
21  -1.004796 -0.278064 -0.912871  1.054093 -0.235702  0.930630 -0.819756  
22   0.995227 -0.278064  1.095445 -0.948683 -0.235702  0.930630 -0.819756  
23   0.995227 -0.278064  1.095445 -0.948683 -0.235702  0.930630 -0.819756  
24   0.995227 -0.278064  1.095445 -0.948683 -0.235702  0.930630 -0.819756  
25   0.995227 -0.278064 -0.912871  1.054093 -0.235702 -1.074541  1.219875  
26   0.995227 -0.278064 -0.912871  1.054093 -0.235702 -1.074541  1.219875  
27   0.995227 -0.278064 -0.912871  1.054093 -0.235702 -1.074541  1.219875  
28   0.995227 -0.278064 -0.912871  1.054093 -0.235702 -1.074541  1.219875  
29  -1.004796 -0.278064  1.095445 -0.948683 -0.235702  0.930630 -0.819756  
..        ...       ...       ...       ...       ...       ...       ...  
517 -1.004796 -0.278064  1.095445 -0.948683 -0.235702 -1.074541  1.219875  
518 -1.004796 -0.278064  1.095445 -0.948683 -0.235702 -1.074541  1.219875  
519 -1.004796 -0.278064  1.095445 -0.948683 -0.235702 -1.074541  1.219875  
520 -1.004796 -0.278064  1.095445 -0.948683 -0.235702 -1.074541  1.219875  
521 -1.004796 -0.278064  1.095445 -0.948683 -0.235702 -1.074541  1.219875  
522 -1.004796 -0.278064  1.095445 -0.948683 -0.235702 -1.074541  1.219875  
523 -1.004796 -0.278064  1.095445 -0.948683 -0.235702 -1.074541  1.219875  
524 -1.004796 -0.278064  1.095445 -0.948683 -0.235702 -1.074541  1.219875  
525 -1.004796 -0.278064  1.095445 -0.948683 -0.235702 -1.074541  1.219875  
526 -1.004796 -0.278064  1.095445 -0.948683 -0.235702  0.930630 -0.819756  
527 -1.004796 -0.278064  1.095445 -0.948683 -0.235702  0.930630 -0.819756  
528 -1.004796 -0.278064  1.095445 -0.948683 -0.235702  0.930630 -0.819756  
529 -1.004796 -0.278064  1.095445 -0.948683 -0.235702  0.930630 -0.819756  
530 -1.004796 -0.278064  1.095445 -0.948683 -0.235702 -1.074541  1.219875  
531 -1.004796 -0.278064  1.095445 -0.948683 -0.235702 -1.074541  1.219875  
532 -1.004796 -0.278064  1.095445 -0.948683 -0.235702 -1.074541  1.219875  
533 -1.004796 -0.278064  1.095445 -0.948683 -0.235702 -1.074541  1.219875  
534  0.995227 -0.278064  1.095445 -0.948683 -0.235702 -1.074541  1.219875  
535  0.995227 -0.278064  1.095445 -0.948683 -0.235702 -1.074541  1.219875  
536  0.995227 -0.278064  1.095445 -0.948683 -0.235702 -1.074541  1.219875  
537  0.995227 -0.278064  1.095445 -0.948683 -0.235702 -1.074541  1.219875  
538  0.995227 -0.278064 -0.912871  1.054093 -0.235702 -1.074541  1.219875  
539  0.995227 -0.278064 -0.912871  1.054093 -0.235702 -1.074541  1.219875  
540  0.995227 -0.278064 -0.912871  1.054093 -0.235702 -1.074541  1.219875  
541  0.995227 -0.278064 -0.912871  1.054093 -0.235702 -1.074541  1.219875  
542  0.995227 -0.278064  1.095445 -0.948683 -0.235702 -1.074541  1.219875  
543  0.995227 -0.278064  1.095445 -0.948683 -0.235702 -1.074541  1.219875  
544  0.995227 -0.278064  1.095445 -0.948683 -0.235702 -1.074541  1.219875  
545  0.995227 -0.278064  1.095445 -0.948683 -0.235702 -1.074541  1.219875  
546 -1.004796 -0.278064 -0.912871  1.054093 -0.235702  0.930630 -0.819756  

[547 rows x 22 columns]>, dtype=object) cannot be considered a valid collection.

In [None]:
#LinearSVC Linear Support Vector Classification

parameters = {
#   'C': [0.1,0.2,0.5,1,2,3,4,5],
#   "class_weight": ['balanced', None]
}
SVM = LinearSVC()
grid_search_cv = GridSearchCV(SVM, parameters, cv=5,n_jobs=-1, return_train_score=True, refit=True,verbose=1)
grid_search_cv.fit(features_train, labels_train)
resultsdf=pd.DataFrame(grid_search_cv.cv_results_)
print("The train score:", str(grid_search_cv.score(features_train, labels_train)), "with parameters:", grid_search_cv.best_params_)
pred = grid_search_cv.best_estimator_.predict(features_test)

checkmetrics(pred, labels_test, 'Linear Support Vector Classification')

In [None]:
# sklearn.svm.NuSVC Nu-Support Vector Classification
# defaults: nu=0.5, kernel=’rbf’, degree=3, gamma=’auto’, coef0=0.0, shrinking=True, probability=False, tol=0.001, cache_size=200, class_weight=None, verbose=False, max_iter=-1, decision_function_shape=’ovr’, random_state=None)
parameters = {
    'nu': [0.1] #other larger nu values is "not fleasible"
#   'C': [0.1,0.2,0.5,1,2,3,4,5],
#    "class_weight": ['balanced', None]
}
SVM = NuSVC()
grid_search_cv = GridSearchCV(SVM, parameters, cv=5,n_jobs=-1, return_train_score=True, refit=True,verbose=1)
grid_search_cv.fit(features_train, labels_train)
resultsdf=pd.DataFrame(grid_search_cv.cv_results_)
print("The train score:", str(grid_search_cv.score(features_train, labels_train)), "with parameters:", grid_search_cv.best_params_)
pred = grid_search_cv.best_estimator_.predict(features_test)

checkmetrics(pred, labels_test, 'Nu-Support Vector Classification')