In [2]:
import numpy as np
import pandas as pd
import pickle
from matplotlib import pyplot as plt

# check the version of these modules
print(np.__version__)
print(pickle.format_version)

1.23.4
4.0


In [3]:
# load DE features named '1_123.npz'
data_npz = np.load('../data/1_123.npz')
print(data_npz.files)

['data', 'label']


In [4]:
# get data and label
# ** both 'data' and 'label' are pickled dict **

data = pickle.loads(data_npz['data'])
label = pickle.loads(data_npz['label'])

label_dict = {0:'Disgust', 1:'Fear', 2:'Sad', 3:'Neutral', 4:'Happy'}

print(data.keys())
print(label.keys())

dict_keys([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44])
dict_keys([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44])


In [5]:
# As we can see, there are 45 keys in both 'data' and 'label'.
# Each participant took part in our experiments for 3 sessions, and he/she watched 15 movie clips (i.e. 15 trials) during each session.
# Therefore, we could extract 3 * 15 = 45 DE feature matrices.

# The key indexes [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] belong to Session 1.
# The key indexes [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] belong to Session 2.
# The key indexes [30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44] belong to Session 3.

# We will print the emotion labels for each trial.
label_dict = {0:'Disgust', 1:'Fear', 2:'Sad', 3:'Neutral', 4:'Happy'}
for i in range(45):
    print('Session {} -- Trial {} -- EmotionLabel : {}'.format(i//15+1, i%15+1, label_dict[label[i][0]]))

Session 1 -- Trial 1 -- EmotionLabel : Happy
Session 1 -- Trial 2 -- EmotionLabel : Fear
Session 1 -- Trial 3 -- EmotionLabel : Neutral
Session 1 -- Trial 4 -- EmotionLabel : Sad
Session 1 -- Trial 5 -- EmotionLabel : Disgust
Session 1 -- Trial 6 -- EmotionLabel : Happy
Session 1 -- Trial 7 -- EmotionLabel : Fear
Session 1 -- Trial 8 -- EmotionLabel : Neutral
Session 1 -- Trial 9 -- EmotionLabel : Sad
Session 1 -- Trial 10 -- EmotionLabel : Disgust
Session 1 -- Trial 11 -- EmotionLabel : Happy
Session 1 -- Trial 12 -- EmotionLabel : Fear
Session 1 -- Trial 13 -- EmotionLabel : Neutral
Session 1 -- Trial 14 -- EmotionLabel : Sad
Session 1 -- Trial 15 -- EmotionLabel : Disgust
Session 2 -- Trial 1 -- EmotionLabel : Sad
Session 2 -- Trial 2 -- EmotionLabel : Fear
Session 2 -- Trial 3 -- EmotionLabel : Neutral
Session 2 -- Trial 4 -- EmotionLabel : Disgust
Session 2 -- Trial 5 -- EmotionLabel : Happy
Session 2 -- Trial 6 -- EmotionLabel : Happy
Session 2 -- Trial 7 -- EmotionLabel : Disgus

In [6]:
n=0 #no. of movieclip "stimulus" index 0=Happy, 1=Fear
pd.DataFrame(data[n])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,300,301,302,303,304,305,306,307,308,309
0,11.082522,8.91599,7.894088,8.393629,8.576055,10.450283,8.682803,7.425018,6.833982,6.431147,...,8.720063,7.484101,6.889748,5.47804,4.247615,8.838729,7.736448,7.071955,5.657081,4.361506
1,11.081816,8.915104,7.893646,8.393852,8.575908,10.44977,8.681773,7.424501,6.833948,6.43086,...,8.719947,7.483864,6.889452,5.477954,4.247919,8.838477,7.736124,7.071776,5.656987,4.361624
2,11.081139,8.914245,7.893116,8.394334,8.575671,10.449391,8.680753,7.424007,6.834211,6.430407,...,8.719872,7.483311,6.888881,5.477563,4.248155,8.83838,7.735444,7.071462,5.656746,4.36178
3,11.080857,8.913749,7.893061,8.394893,8.575294,10.449425,8.6802,7.424111,6.834612,6.429797,...,8.719701,7.48254,6.887955,5.476832,4.248294,8.838502,7.734421,7.070929,5.656415,4.362126
4,11.081297,8.913816,7.893386,8.395509,8.575009,10.449813,8.680219,7.424706,6.835073,6.429158,...,8.719669,7.481893,6.886515,5.475891,4.248291,8.839146,7.733523,7.069984,5.656045,4.362404
5,11.082385,8.914133,7.893591,8.395986,8.574627,10.45064,8.680702,7.425287,6.835468,6.428375,...,8.719719,7.481358,6.884889,5.474986,4.248107,8.840121,7.732843,7.068976,5.655815,4.362619
6,11.083825,8.914366,7.893565,8.39613,8.573922,10.451684,8.681255,7.42567,6.835498,6.427334,...,8.719999,7.480719,6.883047,5.474118,4.247806,8.841397,7.73229,7.067766,5.655584,4.36283
7,11.085158,8.914192,7.892964,8.395734,8.572777,10.452605,8.681552,7.42542,6.834936,6.425751,...,8.720617,7.480014,6.881331,5.473169,4.247491,8.842948,7.731709,7.066447,5.655036,4.363043
8,11.085906,8.91345,7.891825,8.394749,8.571212,10.452949,8.681341,7.42456,6.833846,6.423801,...,8.721225,7.479337,6.879698,5.472189,4.247235,8.844387,7.731218,7.0651,5.654262,4.363255
9,11.086201,8.911913,7.890041,8.393327,8.569228,10.45264,8.680249,7.422915,6.832204,6.421476,...,8.72141,7.478602,6.877927,5.471179,4.24717,8.845474,7.730684,7.063503,5.653381,4.363512


In [7]:
# adding emotion_label into the last column
def create_dataframes(data, label):
    dataframes = []
    for i in range(45):
        data_array = data[i]
        label_array = label[i]
        df = pd.DataFrame(data_array)
        df['emotion_state'] = label_array
        dataframes.append(df)
    return dataframes

dataframes_emotion_label = create_dataframes(data, label)
dataframes_emotion_label

[            0         1         2         3         4          5         6  \
 0   11.082522  8.915990  7.894088  8.393629  8.576055  10.450283  8.682803   
 1   11.081816  8.915104  7.893646  8.393852  8.575908  10.449770  8.681773   
 2   11.081139  8.914245  7.893116  8.394334  8.575671  10.449391  8.680753   
 3   11.080857  8.913749  7.893061  8.394893  8.575294  10.449425  8.680200   
 4   11.081297  8.913816  7.893386  8.395509  8.575009  10.449813  8.680219   
 5   11.082385  8.914133  7.893591  8.395986  8.574627  10.450640  8.680702   
 6   11.083825  8.914366  7.893565  8.396130  8.573922  10.451684  8.681255   
 7   11.085158  8.914192  7.892964  8.395734  8.572777  10.452605  8.681552   
 8   11.085906  8.913450  7.891825  8.394749  8.571212  10.452949  8.681341   
 9   11.086201  8.911913  7.890041  8.393327  8.569228  10.452640  8.680249   
 10  11.086416  8.910022  7.888055  8.391777  8.567214  10.451958  8.678671   
 11  11.086846  8.908668  7.886208  8.390266  8.5653

In [8]:
# showing first participant dataframes_emotion_label[0]
dataframes_emotion_label[0]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,301,302,303,304,305,306,307,308,309,emotion_state
0,11.082522,8.91599,7.894088,8.393629,8.576055,10.450283,8.682803,7.425018,6.833982,6.431147,...,7.484101,6.889748,5.47804,4.247615,8.838729,7.736448,7.071955,5.657081,4.361506,4.0
1,11.081816,8.915104,7.893646,8.393852,8.575908,10.44977,8.681773,7.424501,6.833948,6.43086,...,7.483864,6.889452,5.477954,4.247919,8.838477,7.736124,7.071776,5.656987,4.361624,4.0
2,11.081139,8.914245,7.893116,8.394334,8.575671,10.449391,8.680753,7.424007,6.834211,6.430407,...,7.483311,6.888881,5.477563,4.248155,8.83838,7.735444,7.071462,5.656746,4.36178,4.0
3,11.080857,8.913749,7.893061,8.394893,8.575294,10.449425,8.6802,7.424111,6.834612,6.429797,...,7.48254,6.887955,5.476832,4.248294,8.838502,7.734421,7.070929,5.656415,4.362126,4.0
4,11.081297,8.913816,7.893386,8.395509,8.575009,10.449813,8.680219,7.424706,6.835073,6.429158,...,7.481893,6.886515,5.475891,4.248291,8.839146,7.733523,7.069984,5.656045,4.362404,4.0
5,11.082385,8.914133,7.893591,8.395986,8.574627,10.45064,8.680702,7.425287,6.835468,6.428375,...,7.481358,6.884889,5.474986,4.248107,8.840121,7.732843,7.068976,5.655815,4.362619,4.0
6,11.083825,8.914366,7.893565,8.39613,8.573922,10.451684,8.681255,7.42567,6.835498,6.427334,...,7.480719,6.883047,5.474118,4.247806,8.841397,7.73229,7.067766,5.655584,4.36283,4.0
7,11.085158,8.914192,7.892964,8.395734,8.572777,10.452605,8.681552,7.42542,6.834936,6.425751,...,7.480014,6.881331,5.473169,4.247491,8.842948,7.731709,7.066447,5.655036,4.363043,4.0
8,11.085906,8.91345,7.891825,8.394749,8.571212,10.452949,8.681341,7.42456,6.833846,6.423801,...,7.479337,6.879698,5.472189,4.247235,8.844387,7.731218,7.0651,5.654262,4.363255,4.0
9,11.086201,8.911913,7.890041,8.393327,8.569228,10.45264,8.680249,7.422915,6.832204,6.421476,...,7.478602,6.877927,5.471179,4.24717,8.845474,7.730684,7.063503,5.653381,4.363512,4.0


In [11]:
for i in range(45):  
  num_missing_values = np.sum(np.isnan(data[i]))
  if num_missing_values > 0:
      print("The data has missing values.")
  else:
      print("The data does not have missing values.")


The data does not have missing values.
The data does not have missing values.
The data does not have missing values.
The data does not have missing values.
The data does not have missing values.
The data does not have missing values.
The data does not have missing values.
The data does not have missing values.
The data does not have missing values.
The data does not have missing values.
The data does not have missing values.
The data does not have missing values.
The data does not have missing values.
The data does not have missing values.
The data does not have missing values.
The data does not have missing values.
The data does not have missing values.
The data does not have missing values.
The data does not have missing values.
The data does not have missing values.
The data does not have missing values.
The data does not have missing values.
The data does not have missing values.
The data does not have missing values.
The data does not have missing values.
The data does not have mi

In [9]:
def print_data_shapes(data):
    for key in data.keys():
        array = data[key]
        print(f"Shape of key {key}: {array.shape}")


print_data_shapes(data)


Shape of key 0: (18, 310)
Shape of key 1: (24, 310)
Shape of key 2: (59, 310)
Shape of key 3: (46, 310)
Shape of key 4: (36, 310)
Shape of key 5: (64, 310)
Shape of key 6: (74, 310)
Shape of key 7: (17, 310)
Shape of key 8: (66, 310)
Shape of key 9: (35, 310)
Shape of key 10: (43, 310)
Shape of key 11: (43, 310)
Shape of key 12: (58, 310)
Shape of key 13: (60, 310)
Shape of key 14: (38, 310)
Shape of key 15: (59, 310)
Shape of key 16: (47, 310)
Shape of key 17: (16, 310)
Shape of key 18: (31, 310)
Shape of key 19: (32, 310)
Shape of key 20: (14, 310)
Shape of key 21: (60, 310)
Shape of key 22: (57, 310)
Shape of key 23: (30, 310)
Shape of key 24: (24, 310)
Shape of key 25: (46, 310)
Shape of key 26: (29, 310)
Shape of key 27: (23, 310)
Shape of key 28: (54, 310)
Shape of key 29: (19, 310)
Shape of key 30: (72, 310)
Shape of key 31: (16, 310)
Shape of key 32: (41, 310)
Shape of key 33: (22, 310)
Shape of key 34: (13, 310)
Shape of key 35: (59, 310)
Shape of key 36: (21, 310)
Shape of ke

In [10]:
import numpy as np

def combine_shapes(data):
    shapes = []
    for key in data.keys():
        array = data[key]
        shapes.append(array.shape)
    
    combined_array = np.array(shapes)
    return combined_array

combined_shapes_array = combine_shapes(data)
print(combined_shapes_array)


[[ 18 310]
 [ 24 310]
 [ 59 310]
 [ 46 310]
 [ 36 310]
 [ 64 310]
 [ 74 310]
 [ 17 310]
 [ 66 310]
 [ 35 310]
 [ 43 310]
 [ 43 310]
 [ 58 310]
 [ 60 310]
 [ 38 310]
 [ 59 310]
 [ 47 310]
 [ 16 310]
 [ 31 310]
 [ 32 310]
 [ 14 310]
 [ 60 310]
 [ 57 310]
 [ 30 310]
 [ 24 310]
 [ 46 310]
 [ 29 310]
 [ 23 310]
 [ 54 310]
 [ 19 310]
 [ 72 310]
 [ 16 310]
 [ 41 310]
 [ 22 310]
 [ 13 310]
 [ 59 310]
 [ 21 310]
 [ 18 310]
 [ 57 310]
 [ 71 310]
 [ 55 310]
 [ 29 310]
 [ 51 310]
 [ 32 310]
 [ 44 310]]


In [11]:
combined_shapes_array.shape


(45, 2)

In [12]:
import numpy as np

dataN = {0: [[1], [2]]}

# Get the maximum length of the inner lists
max_length = max(len(lst) for lst in dataN.values())

# Create a 3D numpy array with zeros
array_3d = np.zeros((len(dataN), max_length, 1))

# Fill the array with the values from the dictionary
for i, lst in dataN.items():
    for j, item in enumerate(lst):
        array_3d[i, j, 0] = item[0]

print(array_3d)
array_3d.shape

[[[1.]
  [2.]]]


(1, 2, 1)

In [13]:
sorted_data = {key: data[key] for key in sorted(
    data.keys(), key=lambda k: data[k].shape[0])}

for key, array in sorted_data.items():
    print(f"Shape of key {key}: {array.shape}")
    # Perform further operations with the sorted arrays


Shape of key 34: (13, 310)
Shape of key 20: (14, 310)
Shape of key 17: (16, 310)
Shape of key 31: (16, 310)
Shape of key 7: (17, 310)
Shape of key 0: (18, 310)
Shape of key 37: (18, 310)
Shape of key 29: (19, 310)
Shape of key 36: (21, 310)
Shape of key 33: (22, 310)
Shape of key 27: (23, 310)
Shape of key 1: (24, 310)
Shape of key 24: (24, 310)
Shape of key 26: (29, 310)
Shape of key 41: (29, 310)
Shape of key 23: (30, 310)
Shape of key 18: (31, 310)
Shape of key 19: (32, 310)
Shape of key 43: (32, 310)
Shape of key 9: (35, 310)
Shape of key 4: (36, 310)
Shape of key 14: (38, 310)
Shape of key 32: (41, 310)
Shape of key 10: (43, 310)
Shape of key 11: (43, 310)
Shape of key 44: (44, 310)
Shape of key 3: (46, 310)
Shape of key 25: (46, 310)
Shape of key 16: (47, 310)
Shape of key 42: (51, 310)
Shape of key 28: (54, 310)
Shape of key 40: (55, 310)
Shape of key 22: (57, 310)
Shape of key 38: (57, 310)
Shape of key 12: (58, 310)
Shape of key 2: (59, 310)
Shape of key 15: (59, 310)
Shape of

In [14]:
from sklearn.linear_model import SGDClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

svc = SVC(kernel='linear', C=10)

# equivalent but with SGD solver
svc_bis = SGDClassifier(loss='hinge', penalty='l2', alpha=1/10)

# Train the classifier on the training data
# svc.fit(X_train, y_train)

# Make predictions on the test data
# y_pred = svc.predict(X_test)

# Evaluate the accuracy of the classifier
# accuracy = accuracy_score(y_test, y_pred)
# print("Accuracy:", accuracy)


In [15]:
dataframes_emotion_label[0].shape

(18, 311)

In [16]:
X = data
y = label


In [17]:
y

{0: array([4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,
        4.]),
 1: array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1.]),
 2: array([3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3.,
        3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3.,
        3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3.,
        3., 3., 3., 3., 3., 3., 3., 3.]),
 3: array([2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
        2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
        2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.]),
 4: array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.]),
 5: array([4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4.,
        4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 

In [18]:
for i in range(45):
    unique_classes = np.unique(label[i])
    num_classes = len(unique_classes)
    print(f"Number of classes for y label {i}: {num_classes}")


Number of classes for y label 0: 1
Number of classes for y label 1: 1
Number of classes for y label 2: 1
Number of classes for y label 3: 1
Number of classes for y label 4: 1
Number of classes for y label 5: 1
Number of classes for y label 6: 1
Number of classes for y label 7: 1
Number of classes for y label 8: 1
Number of classes for y label 9: 1
Number of classes for y label 10: 1
Number of classes for y label 11: 1
Number of classes for y label 12: 1
Number of classes for y label 13: 1
Number of classes for y label 14: 1
Number of classes for y label 15: 1
Number of classes for y label 16: 1
Number of classes for y label 17: 1
Number of classes for y label 18: 1
Number of classes for y label 19: 1
Number of classes for y label 20: 1
Number of classes for y label 21: 1
Number of classes for y label 22: 1
Number of classes for y label 23: 1
Number of classes for y label 24: 1
Number of classes for y label 25: 1
Number of classes for y label 26: 1
Number of classes for y label 27: 1
Nu

In [19]:
for i in range(45):
    unique_values = np.unique(label[i])
    print(f"Unique values for y label {i}: {unique_values}")


Unique values for y label 0: [4.]
Unique values for y label 1: [1.]
Unique values for y label 2: [3.]
Unique values for y label 3: [2.]
Unique values for y label 4: [0.]
Unique values for y label 5: [4.]
Unique values for y label 6: [1.]
Unique values for y label 7: [3.]
Unique values for y label 8: [2.]
Unique values for y label 9: [0.]
Unique values for y label 10: [4.]
Unique values for y label 11: [1.]
Unique values for y label 12: [3.]
Unique values for y label 13: [2.]
Unique values for y label 14: [0.]
Unique values for y label 15: [2.]
Unique values for y label 16: [1.]
Unique values for y label 17: [3.]
Unique values for y label 18: [0.]
Unique values for y label 19: [4.]
Unique values for y label 20: [4.]
Unique values for y label 21: [0.]
Unique values for y label 22: [3.]
Unique values for y label 23: [2.]
Unique values for y label 24: [1.]
Unique values for y label 25: [3.]
Unique values for y label 26: [4.]
Unique values for y label 27: [1.]
Unique values for y label 28: 

In [12]:
def splitdata(data, label, ntrainbatch):
    nbatch = ntrainbatch-1
    trainframes = []
    testframes = []
    for i in range(45):
        if i % 15-1 < nbatch:
            trainframes.append(
                pd.concat([pd.DataFrame(data[i]), pd.DataFrame(label[i])], axis=1))
        if i % 15 > nbatch:
            testframes.append(
                pd.concat([pd.DataFrame(data[i]), pd.DataFrame(label[i])], axis=1))
    train = pd.concat(trainframes)
    test = pd.concat(testframes)
    return train, test

def fulldf(nsubjects=16):
    # 'Files must be labelled as {subject#}_123.npz’ and should be inside a Data folder within the Project'
    data16 = {}
    label16 = {}
    Xytrain16_list = []
    Xytest16_list = []
    for i in range(1, nsubjects+1):
        # Load all 16 files data into a Dict named ‘i_123.npz’ using a for loop
        data16[i] = pickle.loads(np.load(f'../data/{i}_123.npz')['data'])
        label16[i] = pickle.loads(np.load(f'../data/{i}_123.npz')['label'])
    for i in range(1, 17):
        # apply all data to the splitdata func to create lists of DFs
        X, y = splitdata(data16[i], label16[i], 10)
        Xytrain16_list.append(X)
        Xytest16_list.append(y)
    # create a unified DF from every list with pd.concat(trainframes)
    Xytrain16_DF = pd.concat(Xytrain16_list)
    Xytest16_DF = pd.concat(Xytest16_list)
    return Xytrain16_DF, Xytest16_DF


Xytrain16_DF, Xytest16_DF = fulldf()


In [19]:
# for i in range(45):
has_missing_values = Xytrain16_DF.isnull().values.any()
if has_missing_values:
    print("The data has missing values.")
else:
    print("The data does not have missing values.")

# for i in range(45):
has_missing_values = Xytest16_DF.isnull().values.any()
if has_missing_values:
    print("The data has missing values.")
else:
    print("The data does not have missing values.")




# len(Xytest16_DF)

The data does not have missing values.
The data does not have missing values.


In [22]:
X_test = Xytest16_DF.iloc[:, :-1].values
y_test = Xytest16_DF.iloc[:, -1].values
X_train = Xytrain16_DF.iloc[:, :-1].values
y_train = Xytrain16_DF.iloc[:, -1].values


In [31]:
# scaling of the dataset
from sklearn.preprocessing import RobustScaler, MinMaxScaler

scaler = RobustScaler()

scaler.fit(X_train)
X_train = scaler.transform(X_train)

scaler.fit(X_test)
X_test = scaler.transform(X_test)


In [27]:
from sklearn.linear_model import SGDClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# with Robustsclaer
svc = SVC(kernel='linear', C=10)

# equivalent but with SGD solver
svc_bis = SGDClassifier(loss='hinge', penalty='l2', alpha=1/10)

# Train the classifier on the training data
svc.fit(X_train, y_train)

# Make predictions on the test data
y_pred = svc.predict(X_test)

# Evaluate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 0.530448717948718


In [28]:
from sklearn.model_selection import GridSearchCV

# with Robustsclaer
# Define the parameter grid
param_grid = {
    'kernel': ['linear', 'rbf'],
    'C': [0.1, 1, 10, 100]
}

# Create the SVC classifier
svc = SVC()

# Create the GridSearchCV object
grid_search = GridSearchCV(svc, param_grid, cv=5)

# Fit the GridSearchCV object on the training data
grid_search.fit(X_train, y_train)

# Get the best hyperparameters and the corresponding model
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

# Make predictions on the test data using the best model
y_pred = best_model.predict(X_test)

# Evaluate the accuracy of the best model
accuracy = accuracy_score(y_test, y_pred)
print("Best Accuracy:", accuracy)
print("Best Hyperparameters:", best_params)


Best Accuracy: 0.6042668269230769
Best Hyperparameters: {'C': 10, 'kernel': 'rbf'}


In [29]:
from sklearn.model_selection import GridSearchCV

# Define the parameter grid
param_grid = {
    'kernel': ['rbf'],
    'C': np.linspace(5, 20, num=8)
}

# Create the SVC classifier
svc = SVC()

# Create the GridSearchCV object
grid_search = GridSearchCV(svc, param_grid, cv=5)

# Fit the GridSearchCV object on the training data
grid_search.fit(X_train, y_train)

# Get the best hyperparameters and the corresponding model
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

# Make predictions on the test data using the best model
y_pred = best_model.predict(X_test)

# Evaluate the accuracy of the best model
accuracy = accuracy_score(y_test, y_pred)
print("Best Accuracy:", accuracy)
print("Best Hyperparameters:", best_params)


Best Accuracy: 0.6038661858974359
Best Hyperparameters: {'C': 9.285714285714285, 'kernel': 'rbf'}


In [32]:
# scaling of the dataset
from sklearn.preprocessing import  MinMaxScaler

scaler = MinMaxScaler()

scaler.fit(X_train)
X_train_minmaxed = scaler.transform(X_train)

scaler.fit(X_test)
X_test_minmaxed = scaler.transform(X_test)


In [33]:
from sklearn.model_selection import GridSearchCV

# with MinMax
# Define the parameter grid
param_grid = {
    'kernel': ['rbf'],
    'C': [10]
}

# Create the SVC classifier
svc = SVC()

# Create the GridSearchCV object
grid_search = GridSearchCV(svc, param_grid, cv=5)

# Fit the GridSearchCV object on the training data
grid_search.fit(X_train_minmaxed, y_train)

# Get the best hyperparameters and the corresponding model
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

# Make predictions on the test data using the best model
y_pred = best_model.predict(X_test_minmaxed)

# Evaluate the accuracy of the best model
accuracy = accuracy_score(y_test, y_pred)
print("Best Accuracy:", accuracy)
print("Best Hyperparameters:", best_params)


Best Accuracy: 0.6376201923076923
Best Hyperparameters: {'C': 10, 'kernel': 'rbf'}


In [34]:
from sklearn.model_selection import GridSearchCV

# with MinMax
# Define the parameter grid
param_grid = {
    'kernel': ['linear'],
    'C': [10]
}

# Create the SVC classifier
svc = SVC()

# Create the GridSearchCV object
grid_search = GridSearchCV(svc, param_grid, cv=5)

# Fit the GridSearchCV object on the training data
grid_search.fit(X_train_minmaxed, y_train)

# Get the best hyperparameters and the corresponding model
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

# Make predictions on the test data using the best model
y_pred = best_model.predict(X_test_minmaxed)

# Evaluate the accuracy of the best model
accuracy = accuracy_score(y_test, y_pred)
print("Best Accuracy:", accuracy)
print("Best Hyperparameters:", best_params)


In [None]:
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline

param_grid = {
    'pca__n_components': [2, 5, 10],
    'svc__kernel': ['rbf'],
    'svc__C': [10]
}

pipeline = make_pipeline(PCA(), SVC())

grid_search = GridSearchCV(pipeline, param_grid, cv=5)

grid_search.fit(X_train_minmaxed, y_train)

best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

accuracy = accuracy_score(y_test, y_pred)
print("Best Accuracy:", accuracy)
print("Best Hyperparameters:", best_params)
