# Introduction to Machine Learning, UZH 2018, Group Project
### Group 2: Barbara Capl, Mathias Lüthi, Pamela Matias, Stefanie Rentsch
##       
# 3. Support Vector Machines (SVM) with pipelines

###   
In this section we use the feature matrices and response vectors with features selected in chapter 2. 


In [1]:
# hide unnecessary warnings ("depreciation" of packages etc.)
import warnings
warnings.filterwarnings('ignore')

# Load packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.formula.api as smf

from sklearn import metrics
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
plt.style.use('seaborn-whitegrid')

###   
###   
## 3.0. SETTINGS

### (1) Choose the Dataset Version you want

##### Whole Feature Matrix (Features not pre-selected)
VERSION = 1; Feature Matrix with only ratios                                  
VERSION = 2;  Feature Matrix with ratios + saisonality + other market data

##### Reduced Feature Matrix (Features pre-selected)
VERSION = 1.1; Reduced Feature Matrix with only ratios                                  
VERSION = 2.1;  Reduced Feature Matrix with ratios + saisonality + other market data



In [2]:
### Chose which dataset version you want the selection of features and the prediction to be based on 
VERSION = 1.1
"""
INSERT NUMBER 1, 2, 1.1 or 2.1
"""



# Defining sel_state variable for easier printing out    
if VERSION == 1:
    sel_version = 'Based on whole original Dataset with only the Ratios Dataset as predicive Features.'
elif VERSION == 2:
    sel_version = 'Based on whole original Dataset with Ratios + Seasonality + other Market Data as predictive Features.'
elif VERSION == 1.1:
    sel_version = 'Based on reduced Dataset with only the Ratios Dataset as predicive Features.'
elif VERSION == 2.1:
    sel_version = 'Based on reduced Dataset with Ratios + Seasonality + other Market Data as predictive Features.'
else: raise ValueError('VERSION must be either 1, 2, 1.1 or 2.1')

### (2) If you chose VERSION 1.1. or VERSION 2.1:  (Reduced Feature Matrix)                                                                  
### => Choose with which method you want to have the features been pre-selected /reduced

##### You have the choice between:
mySELECTION  = RF ; Features pre-selected with Random Forest Classifier                                                           
mySELECTION = PCA; Features pre-selected with Principal Component Analysis (PCA)                                         

##### By Default;
If VERSION 1 or VERSION 2 was chosen above: SELECTION = none by Default; no features pre-selected. You don't need to define variable mySELECTION.

In [3]:
### Choose whether you want the datasets with features selected with RF or PCA or the original file
mySELECTION = 'RF'
"""
INSERT WISHED METHOD 'RF', 'PCA'
"""



# This is the control loop. If something has been chosen wrong, it returns an error with explanation.
if VERSION == 1 or VERSION == 2:
    SELECTION = 'none'
elif VERSION == 1.1 or VERSION == 2.1:
    SELECTION = mySELECTION
    if mySELECTION is not 'RF' and mySELECTION is not 'PCA':
        raise ValueError('Because VERSION '+str(VERSION)+' is chosen, mySELECTION must be set as either RF or PCA.')
else: raise ValueError('VERSION must be either 1, 2, 1.1 or 2.1. mySELECTION must be chosen as either RF or PCA.')

# Defining of sel_feat (Selected Feature Selection Method) variable and briefing for later.   
if SELECTION == 'RF':
    sel_feat = 'Random Forest (RF)'
    briefing = ('You chose dataset VERSION '+str(VERSION)+' and SELECTION method '+str(SELECTION)+'.'+'\n'+'Features therefore pre-selected with '+str(sel_feat)+'.')
elif SELECTION == 'PCA':
    sel_feat = 'Principal Component Analysis (PCA)'
    briefing = ('You chose dataset VERSION '+str(VERSION)+' and SELECTION method '+str(SELECTION)+'.'+'\n'+'Features therefore pre-selected with '+str(sel_feat)+'.')
elif SELECTION == 'none':
    sel_feat = 'No Feature Selection Method available.'
    briefing = ('You chose VERSION '+str(VERSION)+'. This Version has no Feature Selection Method because Feature Matrix is whole, not reduced.'+'\n'+'SELECTION is therefore "none" by Default.')
else: raise ValueError('mySELECTION must be chosen as either RF or PCA')
print('You chose SELECTION method '+str(sel_feat)+'.')
#print(sel_feat)

You chose SELECTION method Random Forest (RF).


### (3) SUMMARY OF SETTINGS

In [4]:
print(briefing, '\n')
print('VERSION '+str(VERSION)+' is '+str(sel_version),'\n')
print('You are now done with the Settings. You can run the whole Code now by Default.')

You chose dataset VERSION 1.1 and SELECTION method RF.
Features therefore pre-selected with Random Forest (RF). 

VERSION 1.1 is Based on reduced Dataset with only the Ratios Dataset as predicive Features. 

You are now done with the Settings. You can run the whole Code now by Default.


###    
###    
## 3.1. Preparation

### 3.1.1. Import the Response Vector and the Feature Matrix

In [5]:
### import Data (already splitted to train/test-data and selected features-> bc_randomforest_feature_selection)
if VERSION == 1: 
# features not pre-selected, only ratios
    X = pd.read_csv('Data/generated_datasets/features_ratios_1.csv', sep=',', header=0)
    y = pd.read_csv('Data/generated_datasets/response_1.csv', sep=',', header=0)
elif VERSION == 2: 
# features not pre-selected, ratios + seasonality + market data
    X = pd.read_csv('Data/generated_datasets/features_additional_1.csv', sep=',', header=0)
    y = pd.read_csv('Data/generated_datasets/response_1.csv', sep=',', header=0)
elif VERSION == 1.1: 
# features pre-selected, only ratios
    if SELECTION == 'RF':
        X_train_s = pd.read_csv('Data/generated_splits/features_selected_randomforest/X1_train_f.csv', sep=',', header=0)
        X_test_s = pd.read_csv('Data/generated_splits/features_selected_randomforest/X1_test_f.csv', sep=',', header=0)
        y_train_s = pd.read_csv('Data/generated_splits/features_selected_randomforest/y1_train_f.csv', sep=',', header=0)
        y_test_s = pd.read_csv('Data/generated_splits/features_selected_randomforest/y1_test_f.csv', sep=',', header=0)
    elif SELECTION == 'PCA':
        X_train_s = pd.read_csv('Data/generated_splits/features_selected_pca/X1_train_p.csv', sep=',', header=0)
        X_test_s = pd.read_csv('Data/generated_splits/features_selected_pca/X1_test_p.csv', sep=',', header=0)
        y_train_s = pd.read_csv('Data/generated_splits/features_selected_pca/y1_train_p.csv', sep=',', header=0)
        y_test_s = pd.read_csv('Data/generated_splits/features_selected_pca/y1_test_p.csv', sep=',', header=0)
elif VERSION == 2.1: 
# features pre-selected, ratios + seasonality + market data
    if SELECTION == 'RF':
        X_train_s = pd.read_csv('Data/generated_splits/features_selected_randomforest/X2_train_f.csv', sep=',', header=0)
        X_test_s = pd.read_csv('Data/generated_splits/features_selected_randomforest/X2_test_f.csv', sep=',', header=0)
        y_train_s = pd.read_csv('Data/generated_splits/features_selected_randomforest/y2_train_f.csv', sep=',', header=0)
        y_test_s = pd.read_csv('Data/generated_splits/features_selected_randomforest/y2_test_f.csv', sep=',', header=0)
    elif SELECTION == 'PCA':
        X_train_s = pd.read_csv('Data/generated_splits/features_selected_pca/X2_train_p.csv', sep=',', header=0)
        X_test_s = pd.read_csv('Data/generated_splits/features_selected_pca/X2_test_p.csv', sep=',', header=0)
        y_train_s = pd.read_csv('Data/generated_splits/features_selected_pca/y2_train_p.csv', sep=',', header=0)
        y_test_s = pd.read_csv('Data/generated_splits/features_selected_pca/y2_test_p.csv', sep=',', header=0)
else: raise ValueError('VERSION value must be either 1, 2, 1.1 or 2.1, mySELECTION must be chosen as either RF or PCA.')
    
    
display(X_test_s.shape)

(710, 20)

#### Train-Test-split for whole original files. Automatically executed only if VERSION = 1 or 2.

In [6]:
# For VERSION == 1 or 2 -> train-test-split for the importet sets must be done
if VERSION == 1 or VERSION == 2:
    # import package imputer
    from sklearn.preprocessing import Imputer
    # # Train/test split, into 20% test size and 80% train size because it is a relatively small dataset
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

    # Use a median fill for train
    imp = Imputer(missing_values=np.nan, strategy = 'median' , axis=0)
    imputed_dataset = pd.DataFrame(imp.fit_transform(X_train))
    imputed_dataset.columns = X_train.columns
    imputed_dataset.index = X_train.index
    X_train = imputed_dataset

    # Use a median fill for the test set
    imputed_dataset = pd.DataFrame(imp.fit_transform(X_test))
    imputed_dataset.columns = X_test.columns
    imputed_dataset.index = X_test.index
    X_test = imputed_dataset

    # Extract the feature labels
    feature_labels = list(X)
    print('Type of feature_labels = ' + str(type(feature_labels)), '\n')

else: print('No Train/Test split needed for chosen VERSION '+str(VERSION)+
            '. Loaded Datasets were already pre-splitted and imputed in'+'\n'+'Feature Selection (Chapter 2).')
display(X_train_s.shape)
display(X_test_s.shape)   


No Train/Test split needed for chosen VERSION 1.1. Loaded Datasets were already pre-splitted and imputed in
Feature Selection (Chapter 2).


(2836, 20)

(710, 20)

### 3.1.2. Print out Shape and Form of Feature Matrix and Response Vector

### Train Set

In [7]:
# print status
print('Features Selected with ' + str(sel_feat)+'.')
print('Version ' + str(VERSION) + '; ' + str(sel_version), '\n')

# print properties and head
if VERSION == 1 or VERSION == 2:
    print('Shape (rows, columns) of Feature Matrix X (Train) ' + '= ' + str(X_train.shape)+'\n')
    print('Feature Matrix X (Train) with no Feature pre-Selection:')
    display(X_train[0:3])
    print("")
    print('Response Vector y (Train) after no Feature pre-Selection:')
    display(y_train[0:3])
    print("")
else:
    print('Shape (rows, columns) of Feature Matrix X (Train) ' + '= ' + str(X_train_s.shape), '\n')
    print('Feature Matrix X (Train) with Selected Features:'+'\n')
    display(X_train_s[0:3])
    print("")
    print('Response Vector y (Train) after Feature Selection:')
    display(y_train_s[0:3])

Features Selected with Random Forest (RF).
Version 1.1; Based on reduced Dataset with only the Ratios Dataset as predicive Features. 

Shape (rows, columns) of Feature Matrix X (Train) = (2836, 20) 

Feature Matrix X (Train) with Selected Features:



Unnamed: 0,pe_op_dil,CAPEI,pe_op_basic,divyield,pe_exi,pcf,ptb,PEG_ltgforward,bm,evm,ps,pe_inc,PEG_1yrforward,PEG_trailing,dpr,GProf,roe,short_debt,invt_act,roce
0,15.633,20.773,15.468,0.0354,18.484,9.957,1.811,5.963,0.547,10.644,3.703,18.484,13.678,1.181,0.623,0.245,0.102,0.173,0.108,0.14
1,17.296,25.089,17.203,0.0398,39.232,8.509,2.189,5.702,0.461,9.246,0.689,39.232,1.521,0.668,2.175,0.164,0.067,0.084,0.368,0.105
2,9.209,12.337,9.135,0.0268,13.062,1.993,1.076,2.248,0.911,12.474,2.034,13.062,0.38,3.048,0.337,0.04,0.091,0.532,0.152,0.075



Response Vector y (Train) after Feature Selection:


Unnamed: 0,0
0,0
1,1
2,0


### Test Set

In [8]:
# print status
print('Features Selected with ' + str(sel_feat))
print('Version ' + str(VERSION) + '; ' + str(sel_version),'\n')

# print properties and head
if VERSION == 1 or VERSION == 2:
    print('Shape (rows, columns) of Feature Matrix X (Test) ' + '= ' + str(X_test.shape)+'\n')
    print('Feature Matrix X (Test) with no Feature pre-Selection:')
    display(X_test[0:3])
    print("")
    print('Response Vector y (Test) after no Feature pre-Selection:')
    display(y_test[0:3])
    print("")
else:
    print('Shape (rows, columns) of Feature Matrix X (Test) ' + '= ' + str(X_test_s.shape)+'\n')
    print('Feature Matrix X (Test) with Selected Features:')
    display(X_test_s[0:3])
    print("")
    print('Response Vector y (Test) after Feature Selection:')
    display(y_test_s[0:3])

Features Selected with Random Forest (RF)
Version 1.1; Based on reduced Dataset with only the Ratios Dataset as predicive Features. 

Shape (rows, columns) of Feature Matrix X (Test) = (710, 20)

Feature Matrix X (Test) with Selected Features:


Unnamed: 0,pe_op_dil,CAPEI,pe_op_basic,divyield,pe_exi,pcf,ptb,PEG_ltgforward,bm,evm,ps,pe_inc,PEG_1yrforward,PEG_trailing,dpr,GProf,roe,short_debt,invt_act,roce
0,16.224,20.983,16.127,0.0187,16.224,10.484,4.189,1.33,0.254,8.585,3.318,16.224,1.414,1.022,0.287,0.354,0.272,0.025,0.025,0.364
1,16.489,21.663,16.311,0.0122,18.555,7.709,4.281,1.572,0.235,13.619,2.379,18.555,1.737,1.147,0.199,0.062,0.238,0.106,0.157,0.108
2,9.52,16.327,9.52,0.0343,15.681,8.49,2.198,6.149,0.513,6.716,3.293,15.55,-0.973,1.022,0.792,0.321,0.124,0.253,0.078,0.196



Response Vector y (Test) after Feature Selection:


Unnamed: 0,0
0,1
1,0
2,1


###   
###    
## 3.2. SVM

### Two different SVM tests are applied:
#### => SVM1 = SVM with random parameters
#### => SVM2 = SVM with other parameters

### 3.2.1. SVM1 : SVM with random parameters

### Kernel: rbf

In [9]:
# Create pipeline object with standard scaler and SVC estimator
# Standardscaler standardizes the input variables
pipe1 = Pipeline([('scaler', StandardScaler()), 
                 ('classifier', SVC(random_state=0))])

In [10]:
# Define parameter grid
param_grid1 = [{'scaler': [StandardScaler()],
               'classifier': [SVC(kernel='rbf')],
               'classifier__gamma': [1, 10],
               'classifier__C': [10, 100]},
              {'scaler': [StandardScaler(), None],
               'classifier': [LogisticRegression()],
               'classifier__C': [10, 100]}]

In [11]:
# Run grid search
if VERSION == 1 or VERSION == 2:
    grid1 = GridSearchCV(pipe1, param_grid1, cv=5, n_jobs=-1)
    grid1.fit(X_train, y_train)
else: 
    grid1 = GridSearchCV(pipe1, param_grid1, cv=5, n_jobs=-1)
    grid1.fit(X_train_s, y_train_s)

In [12]:
# Print results
print('Best CV accuracy: {:.2f}'.format(grid1.best_score_),'\n')
if VERSION == 1 or VERSION == 2: 
    print('Test score:       {:.2f}'.format(grid1.score(X_test, y_test)))
else: 
    print('Test score:       {:.2f}'.format(grid1.score(X_test_s, y_test_s)))
print('Best parameters: {}'.format(grid1.best_params_))

Best CV accuracy: 0.62 

Test score:       0.63
Best parameters: {'classifier': LogisticRegression(C=100, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False), 'classifier__C': 100, 'scaler': None}


In [13]:
# Predict classes
if VERSION == 1 or VERSION == 2:
    y_pred1 = grid1.predict(X_test)
else:
    y_pred1 = grid1.predict(X_test_s)
    
print('Predicted Response Vector based on Feature Test Set:')
display(y_pred1[0:20])

Predicted Response Vector based on Feature Test Set:


array([1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1])

In [18]:
# Metrics of Classification

m_text = 'Metrics of Classification / Prediciton with SVM1 (random parameters), kernel rbf:'+'\n'
c_text = 'Confusion Matrix with SVM1 (random parameters), kernel rbf:'+'\n'

if VERSION == 1 or VERSION ==2:
    print(m_text)
    print(metrics.classification_report(y_test, y_pred1))
    print(c_text)
    cfm_matrix = metrics.confusion_matrix(y_test, y_pred1)
    print(cfm_matrix)
    # Compute ROC curve and ROC area (AUC) for each class
    fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred1)
else:
    print(m_text)
    print(metrics.classification_report(y_test_s, y_pred1))
    print(c_text)
    cfm_matrix = metrics.confusion_matrix(y_test_s, y_pred1)
    print(cfm_matrix)
    # Compute ROC curve and ROC area (AUC) for each class
    fpr, tpr, thresholds = metrics.roc_curve(y_test_s, y_pred1)
    
print("")
print('Sensitivity/Recall (TRUE POS. RATE) = '+str(cfm_matrix[1,1]/(cfm_matrix[1,1]+cfm_matrix[1,0])))
print('Specificity (TRUE NEG. RATE) = '+str(cfm_matrix[0,0]/(cfm_matrix[0,0]+cfm_matrix[0,1]))+'\n')

roc_auc = metrics.auc(fpr, tpr)
print('ROC(absolut numbers) = '+str(roc_auc))

Metrics of Classification / Prediciton with SVM1 (random parameters), kernel rbf:

             precision    recall  f1-score   support

          0       0.65      0.33      0.43       305
          1       0.63      0.86      0.73       405

avg / total       0.64      0.63      0.60       710

Confusion Matrix with SVM1 (random parameters), kernel rbf:

[[100 205]
 [ 55 350]]

Sensitivity/Recall (TRUE POS. RATE) = 0.8641975308641975
Specificity (TRUE NEG. RATE) = 0.32786885245901637

ROC(absolut numbers) = 0.596033191661607


### 3.2.1. SVM2 : SVM with other parameters

### Kernel: linear

In [19]:
# Create pipeline object with standard scaler and SVC estimator
# Standardscaler standardizes the input variables
pipe2 = Pipeline([('scaler', StandardScaler()), 
                 ('classifier', SVC(random_state=0))])

In [20]:
# Define parameter grid
param_grid2 = [{'scaler': [StandardScaler()],
               'classifier': [SVC(kernel='linear')],
               'classifier__gamma': [1, 10],
               'classifier__C': [10, 100]},
              {'scaler': [StandardScaler(), None],
               'classifier': [LogisticRegression()],
               'classifier__C': [10, 100]}]

In [21]:
# Run grid search
if VERSION == 1 or VERSION == 2:
    grid2 = GridSearchCV(pipe2, param_grid2, cv=5, n_jobs=-1)
    grid2.fit(X_train, y_train)
else: 
    grid2 = GridSearchCV(pipe2, param_grid2, cv=5, n_jobs=-1)
    grid2.fit(X_train_s, y_train_s)

In [22]:
# Print results
print('Best CV accuracy: {:.2f}'.format(grid2.best_score_))
if VERSION == 1 or VERSION == 2:
    print('Test score:       {:.2f}'.format(grid2.score(X_test, y_test)))
else: 
    print('Test score:       {:.2f}'.format(grid2.score(X_test_s, y_test_s))+'\n')
print('Best parameters: {}'.format(grid2.best_params_))

Best CV accuracy: 0.62
Test score:       0.63

Best parameters: {'classifier': LogisticRegression(C=100, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False), 'classifier__C': 100, 'scaler': None}


In [23]:
# Predict classes
if VERSION == 1 or VERSION ==2:
    y_pred2 = grid2.predict(X_test)
else:
    y_pred2 = grid2.predict(X_test_s)

print('Predicted Response Vector based on Feature Test Set:')
display(y_pred2[0:20])

Predicted Response Vector based on Feature Test Set:


array([1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1])

In [24]:
# Metrics of Classification

m_text = 'Metrics of Classification with SVM2 (random parameters), kernel linear:'+'\n'
c_text = 'Confusion Matrix with SVM2 (random parameters), kernel linear:'+'\n'

if VERSION == 1 or VERSION == 2:
    print(m_text)
    print(metrics.classification_report(y_test, y_pred2))
    print(c_text)
    cfm_matrix = metrics.confusion_matrix(y_test, y_pred2)
    print(cfm_matrix)
    # Compute ROC curve and ROC area (AUC) for each class
    fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred2)
else:
    print(m_text)
    print(metrics.classification_report(y_test_s, y_pred2))
    print(c_text)
    cfm_matrix = metrics.confusion_matrix(y_test_s, y_pred2)
    print(cfm_matrix)
    # Compute ROC curve and ROC area (AUC) for each class
    fpr, tpr, thresholds = metrics.roc_curve(y_test_s, y_pred2)

print("")
print('Sensitivity/Recall (TRUE POS. RATE) = '+str(cfm_matrix[1,1]/(cfm_matrix[1,1]+cfm_matrix[1,0])))
print('Specificity (TRUE NEG. RATE) = '+str(cfm_matrix[0,0]/(cfm_matrix[0,0]+cfm_matrix[0,1]))+'\n')

roc_auc = metrics.auc(fpr, tpr)
print('ROC(absolut numbers) = '+str(roc_auc))

Metrics of Classification with SVM2 (random parameters), kernel linear:

             precision    recall  f1-score   support

          0       0.65      0.33      0.43       305
          1       0.63      0.86      0.73       405

avg / total       0.64      0.63      0.60       710

Confusion Matrix with SVM2 (random parameters), kernel linear:

[[100 205]
 [ 55 350]]

Sensitivity/Recall (TRUE POS. RATE) = 0.8641975308641975
Specificity (TRUE NEG. RATE) = 0.32786885245901637

ROC(absolut numbers) = 0.596033191661607


### Kernel: poly

In [26]:
# Create pipeline object with standard scaler and SVC estimator
pipe3 = Pipeline([('scaler', StandardScaler()), 
                 ('classifier', SVC(random_state=0))])

In [27]:
# Define parameter grid
param_grid3 = [{'scaler': [StandardScaler()],
               'classifier': [SVC(kernel= "poly")],
               'classifier__gamma': [1, 10],
               'classifier__C': [10, 100]},
              {'scaler': [StandardScaler(), None],
               'classifier': [LogisticRegression()],
               'classifier__C': [10, 100]}]

In [None]:
# Run grid search
if VERSION == 1 or VERSION == 2:
    grid3 = GridSearchCV(pipe3, param_grid3, cv=5, n_jobs=-1)
    grid3.fit(X_train, y_train)
else: 
    grid3 = GridSearchCV(pipe3, param_grid3, cv=5, n_jobs=-1)
    grid3.fit(X_train_s, y_train_s)

In [None]:
# Print results
print('Best CV accuracy: {:.2f}'.format(grid3.best_score_)+'\n')
if VERSION == 1 or VERSION ==2:
    print('Test score:       {:.2f}'.format(grid3.score(X_test, y_test))+'\n')
else: 
    print('Test score:       {:.2f}'.format(grid3.score(X_test_s, y_test_s))+'\n')
print('Best parameters: {}'.format(grid3.best_params_))

In [None]:
# Predict classes
if VERSION == 1 or VERSION == 2:
    y_pred3 = grid3.predict(X_test)
else:
    y_pred3 = grid3.predict(X_test_s)

print('Predicted Response Vector based on Feature Test Set:')
display(y_pred3[0:20])

In [None]:
# Metrics of Classification

m_text = 'Metrics of Classification with SVM3 (random parameters), kernel poly:'+'\n'
c_text = 'Confusion Matrix with SVM3 (random parameters), kernel poly:'+'\n'

if VERSION == 1 or VERSION == 2:
    print(m_text)
    print(metrics.classification_report(y_test, y_pred3))
    print(c_text)
    cfm_matrix = metrics.confusion_matrix(y_test, y_pred3)
    print(cfm_matrix)
    # Compute ROC curve and ROC area (AUC) for each class
    fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred3)
else:
    print(m_text)
    print(metrics.classification_report(y_test_s, y_pred3))
    print(c_text)
    cfm_matrix = metrics.confusion_matrix(y_test_s, y_pred3)
    print(cfm_matrix)
    # Compute ROC curve and ROC area (AUC) for each class
    fpr, tpr, thresholds = metrics.roc_curve(y_test_s, y_pred3)

print("")
print('Sensitivity/Recall (TRUE POS. RATE) = '+str(cfm_matrix[1,1]/(cfm_matrix[1,1]+cfm_matrix[1,0])))
print('Specificity (TRUE NEG. RATE) = '+str(cfm_matrix[0,0]/(cfm_matrix[0,0]+cfm_matrix[0,1]))+'\n')

roc_auc = metrics.auc(fpr, tpr)
print('ROC(absolut numbers) = '+str(roc_auc))