# Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style("whitegrid", {'grid.linestyle': '--'})
sns.set_style({'font.family':'serif', 'font.serif':'Computer Modern'})
sns.set_context(font_scale=2, rc={"font.size":10,"axes.titlesize":20,"axes.labelsize":15})

from collections import defaultdict

from sklearn.model_selection import train_test_split, cross_val_score 
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.metrics import roc_curve, auc, roc_auc_score

from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler

df_train = pd.read_csv("C:/Users/Utente/OneDrive/Desktop/UNIPI - DS for Business Informatics/Data Mining_2/Progetto/df_train_processed.csv")
df_test = pd.read_csv("C:/Users/Utente/OneDrive/Desktop/UNIPI - DS for Business Informatics/Data Mining_2/Progetto/df_test_processed.csv")

from sklearn.model_selection import RandomizedSearchCV

from sklearn.svm import SVC

In [40]:
from sklearn.metrics import roc_curve, auc

# Standardization

## Train set

In [2]:
df_train_num = df_train.select_dtypes(include="number").drop('actor', axis=1)
df_train_cat = df_train.select_dtypes(include=['object'])

scaler = MinMaxScaler()
df_train_num_scaled = pd.DataFrame(scaler.fit_transform(df_train_num), columns=df_train_num.columns)
df_train = pd.concat([df_train_num_scaled, df_train_cat], axis=1)

## Test set

In [3]:
df_test_num = df_test.select_dtypes(include="number").drop('actor', axis=1)
df_test_cat = df_test.select_dtypes(include=['object'])

scaler = MinMaxScaler()
df_test_num_scaled = pd.DataFrame(scaler.fit_transform(df_test_num), columns=df_test_num.columns)
df_test = pd.concat([df_test_num_scaled, df_test_cat], axis=1)

# Trasformation

In [4]:
def label_encoder(array, label):
    new_array = np.zeros(len(array))
    for i in range(len(array)):
        if array[i] == label:
            new_array[i] = 1
    return new_array

## Vocal_channel

In [5]:
df_train_model_vc = df_train.copy()

In [6]:
df_test_model_vc = df_test.copy()

In [7]:
X_train_vc = df_train_model_vc.drop("vocal_channel", axis=1)
X_train_vc = np.array(pd.get_dummies(X_train_vc))
y_train_vc = label_encoder(df_train_model_vc["vocal_channel"], 'speech')

In [8]:
X_test_vc = df_test_model_vc.drop("vocal_channel", axis=1)
X_test_vc = np.array(pd.get_dummies(X_test_vc))
y_test_vc = label_encoder(df_test_model_vc["vocal_channel"], 'speech') 

## Sex

In [9]:
df_train_model_s = df_train.copy()

In [10]:
df_test_model_s = df_test.copy()

In [11]:
X_train_s = df_train_model_s.drop("sex", axis=1)
X_train_s = np.array(pd.get_dummies(X_train_s))
y_train_s = label_encoder(df_train_model_s["sex"], 'M')

In [12]:
X_test_s = df_test_model_s.drop("sex", axis=1)
X_test_s = np.array(pd.get_dummies(X_test_s))
y_test_s = label_encoder(df_test_model_s["sex"], 'M')

## Emotional_intensity

In [13]:
df_train_model_ei = df_train.copy()

In [14]:
df_test_model_ei = df_test.copy()

In [15]:
X_train_ei = df_train_model_ei.drop("emotional_intensity", axis=1)
X_train_ei = np.array(pd.get_dummies(X_train_ei))
y_train_ei = label_encoder(df_train_model_ei["emotional_intensity"], 'normal')

In [16]:
X_test_ei = df_test_model_ei.drop("emotional_intensity", axis=1)
X_test_ei = np.array(pd.get_dummies(X_test_ei))
y_test_ei = label_encoder(df_test_model_ei["emotional_intensity"], 'normal')

## Emotion

In [17]:
df_train_model_e = df_train.copy()

In [18]:
df_test_model_e = df_test.copy()

In [19]:
def label_encoder_e(array):
    new_array = np.zeros(len(array))
    for i in range(len(array)):
        if array[i] == "calm":
            new_array[i] = 1
        elif array[i] == "happy":
            new_array[i] = 2
        elif array[i] == "sad":
            new_array[i] = 3
        elif array[i] == "angry":
            new_array[i] = 4
        elif array[i] == "fearful":
            new_array[i] = 5
        elif array[i] == "disgust":
            new_array[i] = 6
        elif array[i] == "surprised":
            new_array[i] = 7
    return new_array
 

In [20]:
X_train_e = df_train_model_e.drop("emotion", axis=1)
X_train_e = np.array(pd.get_dummies(X_train_e))
y_train_e = label_encoder_e(df_train_model_e["emotion"])

In [21]:
X_test_e = df_test_model_e.drop("emotion", axis=1)
X_test_e = np.array(pd.get_dummies(X_test_e))
y_test_e = label_encoder_e(df_test_model_e["emotion"])

# Modelli - Non linear

## Sex

In [23]:
C_range = np.logspace(-4, 4, 100)
gamma_range = np.logspace(-4, 4, 100) # possibile gamma range
kernel_type = ['rbf', 'poly'] # possibile kernel type
param_grid = {'C': C_range,
              'gamma': gamma_range,
              'kernel': kernel_type}

clf = SVC()
random_search = RandomizedSearchCV(clf, param_distributions=param_grid, n_iter=200, random_state=42, n_jobs=-1)
random_search.fit(X_train_s, y_train_s)

best_clf = random_search.best_estimator_ # seleziono il miglior modello trovato dalla random search
best_clf.fit(X_train_s, y_train_s) 

y_pred = best_clf.predict(X_test_s)

print(classification_report(y_test_s, y_pred))

print("Best C value:", random_search.best_params_['C'])
print("Best kernel:", random_search.best_params_['kernel'])
print("Best gamma value:", random_search.best_params_['gamma'])

              precision    recall  f1-score   support

         0.0       0.99      0.81      0.89       312
         1.0       0.84      0.99      0.91       312

    accuracy                           0.90       624
   macro avg       0.92      0.90      0.90       624
weighted avg       0.92      0.90      0.90       624

Best C value: 0.521400828799969
Best kernel: rbf
Best gamma value: 0.055908101825122285


In [36]:
C_range = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8]
gamma_range = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8] # possibile gamma range
kernel_type = ['rbf'] # possibile kernel type
param_grid = {'C': C_range,
              'gamma': gamma_range,
              'kernel': kernel_type}

clf = SVC()
random_search = RandomizedSearchCV(clf, param_distributions=param_grid, n_iter=200, random_state=42, n_jobs=-1)
random_search.fit(X_train_s, y_train_s)

best_clf = random_search.best_estimator_ # seleziono il miglior modello trovato dalla random search
best_clf.fit(X_train_s, y_train_s) 

y_pred = best_clf.predict(X_test_s)

print(classification_report(y_test_s, y_pred))

print("Best C value:", random_search.best_params_['C'])
print("Best kernel:", random_search.best_params_['kernel'])
print("Best gamma value:", random_search.best_params_['gamma'])



              precision    recall  f1-score   support

         0.0       0.99      0.80      0.89       312
         1.0       0.83      0.99      0.91       312

    accuracy                           0.90       624
   macro avg       0.91      0.90      0.90       624
weighted avg       0.91      0.90      0.90       624

Best C value: 0.7
Best kernel: rbf
Best gamma value: 0.1


## Vocal_channel

In [28]:
C_range = np.logspace(-4, 4, 100)
gamma_range = np.logspace(-4, 4, 100) # possibile gamma range
kernel_type = ['rbf', 'poly'] # possibile kernel type
param_grid = {'C': C_range,
              'gamma': gamma_range,
              'kernel': kernel_type}

clf = SVC()
random_search = RandomizedSearchCV(clf, param_distributions=param_grid, n_iter=200, random_state=42, n_jobs=-1)
random_search.fit(X_train_vc, y_train_vc)

best_clf = random_search.best_estimator_ # seleziono il miglior modello trovato dalla random search
best_clf.fit(X_train_vc, y_train_vc) 

y_pred = best_clf.predict(X_test_vc)

print(classification_report(y_test_vc, y_pred))

print("Best C value:", random_search.best_params_['C'])
print("Best kernel:", random_search.best_params_['kernel'])
print("Best gamma value:", random_search.best_params_['gamma'])

              precision    recall  f1-score   support

         0.0       0.96      0.99      0.97       264
         1.0       0.99      0.97      0.98       360

    accuracy                           0.98       624
   macro avg       0.97      0.98      0.98       624
weighted avg       0.98      0.98      0.98       624

Best C value: 95.45484566618347
Best kernel: rbf
Best gamma value: 0.0006428073117284319


In [37]:
C_range = [55,65,75,85,95,105,115,125,135]
gamma_range = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8] # possibile gamma range
kernel_type = ["rbf"] # possibile kernel type
param_grid = {'C': C_range,
              'gamma': gamma_range,
              'kernel': kernel_type}

clf = SVC()
random_search = RandomizedSearchCV(clf, param_distributions=param_grid, n_iter=200, random_state=42, n_jobs=-1)
random_search.fit(X_train_vc, y_train_vc)

best_clf = random_search.best_estimator_ # seleziono il miglior modello trovato dalla random search
best_clf.fit(X_train_vc, y_train_vc) 

y_pred = best_clf.predict(X_test_vc)

print(classification_report(y_test_vc, y_pred))

print("Best C value:", random_search.best_params_['C'])
print("Best kernel:", random_search.best_params_['kernel'])
print("Best gamma value:", random_search.best_params_['gamma'])



              precision    recall  f1-score   support

         0.0       0.97      0.96      0.96       264
         1.0       0.97      0.97      0.97       360

    accuracy                           0.97       624
   macro avg       0.97      0.97      0.97       624
weighted avg       0.97      0.97      0.97       624

Best C value: 55
Best kernel: rbf
Best gamma value: 0.1


## Emotional_intensity

In [29]:
C_range = np.logspace(-4, 4, 100)
gamma_range = np.logspace(-4, 4, 100) # possibile gamma range
kernel_type = ['rbf', 'poly'] # possibile kernel type
param_grid = {'C': C_range,
              'gamma': gamma_range,
              'kernel': kernel_type}

clf = SVC()
random_search = RandomizedSearchCV(clf, param_distributions=param_grid, n_iter=200, random_state=42, n_jobs=-1)
random_search.fit(X_train_ei, y_train_ei)

best_clf = random_search.best_estimator_ # seleziono il miglior modello trovato dalla random search
best_clf.fit(X_train_ei, y_train_ei) 

y_pred = best_clf.predict(X_test_ei)

print(classification_report(y_test_ei, y_pred))

print("Best C value:", random_search.best_params_['C'])
print("Best kernel:", random_search.best_params_['kernel'])
print("Best gamma value:", random_search.best_params_['gamma'])

              precision    recall  f1-score   support

         0.0       0.72      0.77      0.75       288
         1.0       0.79      0.75      0.77       336

    accuracy                           0.76       624
   macro avg       0.76      0.76      0.76       624
weighted avg       0.76      0.76      0.76       624

Best C value: 1072.2672220103254
Best kernel: rbf
Best gamma value: 0.0003053855508833416


In [38]:
C_range = [1042,1052,1062,1072,1082,1092,1102,1112,1122,1132]
gamma_range = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8]
kernel_type = ['rbf']
param_grid = {'C': C_range,
              'gamma': gamma_range,
              'kernel': kernel_type}

clf = SVC()
random_search = RandomizedSearchCV(clf, param_distributions=param_grid, n_iter=200, random_state=42, n_jobs=-1)
random_search.fit(X_train_ei, y_train_ei)

best_clf = random_search.best_estimator_ # seleziono il miglior modello trovato dalla random search
best_clf.fit(X_train_ei, y_train_ei) 

y_pred = best_clf.predict(X_test_ei)

print(classification_report(y_test_ei, y_pred))

print("Best C value:", random_search.best_params_['C'])
print("Best kernel:", random_search.best_params_['kernel'])
print("Best gamma value:", random_search.best_params_['gamma'])



              precision    recall  f1-score   support

         0.0       0.64      0.86      0.73       288
         1.0       0.83      0.58      0.69       336

    accuracy                           0.71       624
   macro avg       0.73      0.72      0.71       624
weighted avg       0.74      0.71      0.71       624

Best C value: 1042
Best kernel: rbf
Best gamma value: 0.1


## Emotion 

In [31]:
C_range = np.logspace(-4, 4, 100)
gamma_range = np.logspace(-4, 4, 100) # possibile gamma range
kernel_type = ['rbf', 'poly'] # possibile kernel type
class_w = ["balanced"]
param_grid = {'C': C_range,
              'gamma': gamma_range,
              'kernel': kernel_type,
                "class_weight": class_w}

clf = SVC()
random_search = RandomizedSearchCV(clf, param_distributions=param_grid, n_iter=200, random_state=42, n_jobs=-1)
random_search.fit(X_train_e, y_train_e)

best_clf = random_search.best_estimator_ # seleziono il miglior modello trovato dalla random search
best_clf.fit(X_train_e, y_train_e) 

y_pred = best_clf.predict(X_test_e)

print(classification_report(y_test_e, y_pred))

print("Best C value:", random_search.best_params_['C'])
print("Best kernel:", random_search.best_params_['kernel'])
print("Best gamma value:", random_search.best_params_['gamma'])

              precision    recall  f1-score   support

         0.0       0.41      0.65      0.50        48
         1.0       0.54      0.71      0.61        96
         2.0       0.46      0.32      0.38        96
         3.0       0.57      0.22      0.32        96
         4.0       0.51      0.89      0.64        96
         5.0       0.59      0.28      0.38        96
         6.0       0.49      0.56      0.52        48
         7.0       0.45      0.46      0.45        48

    accuracy                           0.50       624
   macro avg       0.50      0.51      0.48       624
weighted avg       0.51      0.50      0.47       624

Best C value: 0.018307382802953697
Best kernel: poly
Best gamma value: 0.09770099572992257


In [39]:
C_range = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8]
gamma_range = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8]
kernel_type = ["poly"] # possibile kernel type
class_w = ["balanced"]
param_grid = {'C': C_range,
              'gamma': gamma_range,
              'kernel': kernel_type,
                "class_weight": class_w}

clf = SVC()
random_search = RandomizedSearchCV(clf, param_distributions=param_grid, n_iter=200, random_state=42, n_jobs=-1)
random_search.fit(X_train_e, y_train_e)

best_clf = random_search.best_estimator_ # seleziono il miglior modello trovato dalla random search
best_clf.fit(X_train_e, y_train_e) 

y_pred = best_clf.predict(X_test_e)

print(classification_report(y_test_e, y_pred))

print("Best C value:", random_search.best_params_['C'])
print("Best kernel:", random_search.best_params_['kernel'])
print("Best gamma value:", random_search.best_params_['gamma'])



              precision    recall  f1-score   support

         0.0       0.35      0.75      0.48        48
         1.0       0.54      0.65      0.59        96
         2.0       0.61      0.24      0.34        96
         3.0       0.50      0.22      0.30        96
         4.0       0.43      0.83      0.57        96
         5.0       0.69      0.28      0.40        96
         6.0       0.43      0.44      0.43        48
         7.0       0.44      0.50      0.47        48

    accuracy                           0.47       624
   macro avg       0.50      0.49      0.45       624
weighted avg       0.52      0.47      0.45       624

Best C value: 0.1
Best kernel: poly
Best gamma value: 0.1


# Metriche per la valutazione dei modelli 

## Sex

In [41]:
C_value = 0.7
gamma_value = 0.1
kernel_type = "rbf"

clf = SVC(C=C_value, gamma=gamma_value, kernel=kernel_type, probability=True)
clf.fit(X_train_s, y_train_s)
y_pred = clf.predict(X_test_s)

print("Accuracy:", accuracy_score(y_test_s, y_pred))
print(classification_report(y_test_s, y_pred, digits=6))

Accuracy: 0.8974358974358975
              precision    recall  f1-score   support

         0.0   0.992063  0.801282  0.886525       312
         1.0   0.833333  0.993590  0.906433       312

    accuracy                       0.897436       624
   macro avg   0.912698  0.897436  0.896479       624
weighted avg   0.912698  0.897436  0.896479       624



In [42]:
y_pred_proba = clf.predict_proba(X_test_s)[:, 1]
fpr_s, tpr_s, _ = roc_curve(y_test_s, y_pred_proba, pos_label=1)
auc_score_s = auc(fpr_s, tpr_s)

print("TPR:", tpr_s)
print("FPR:", fpr_s)
print("AUC score:", auc_score_s)

TPR: [0.         0.00320513 0.61217949 0.61217949 0.71794872 0.71794872
 0.77564103 0.77564103 0.83974359 0.83974359 0.85897436 0.85897436
 0.87179487 0.87179487 0.88461538 0.88461538 0.90705128 0.90705128
 0.91346154 0.91346154 0.91987179 0.91987179 0.92628205 0.92628205
 0.93269231 0.93269231 0.93589744 0.93589744 0.93910256 0.93910256
 0.94230769 0.94230769 0.94871795 0.94871795 0.95512821 0.95512821
 0.95833333 0.95833333 0.96153846 0.96153846 0.97435897 0.97435897
 0.98397436 0.98397436 0.98717949 0.98717949 0.99038462 0.99038462
 0.99358974 0.99358974 0.99679487 0.99679487 1.         1.        ]
FPR: [0.         0.         0.         0.00320513 0.00320513 0.00641026
 0.00641026 0.01282051 0.01282051 0.01602564 0.01602564 0.01923077
 0.01923077 0.0224359  0.0224359  0.02564103 0.02564103 0.02884615
 0.02884615 0.03205128 0.03205128 0.04166667 0.04166667 0.04807692
 0.04807692 0.05769231 0.05769231 0.06089744 0.06089744 0.06730769
 0.06730769 0.07051282 0.07051282 0.08012821 0.0801

In [43]:
sex_roc_s = pd.DataFrame(columns=["SVM", "GB"], 
                       index=["fpr", "tpr", "auc"])
sex_roc_s.loc["fpr"] = [None, None]
sex_roc_s.loc["tpr"] = [None, None]
sex_roc_s.loc["auc"] = [None, None]
sex_roc_s

Unnamed: 0,SVM,GB
fpr,,
tpr,,
auc,,


In [44]:
sex_roc_s.loc["fpr"] [0] = fpr_s
sex_roc_s.loc["tpr"] [0] = tpr_s
sex_roc_s.loc["auc"] [0] = auc_score_s

sex_roc_s.to_csv("sex_roc.csv", index=False) 

## Vocal_channel

In [45]:
C_value = 95.45484566618347
gamma_value = 0.0006428073117284319
kernel_type = 'rbf'

clf = SVC(C=C_value, gamma=gamma_value, kernel=kernel_type, probability=True)
clf.fit(X_train_vc, y_train_vc)
y_pred = clf.predict(X_test_vc)

print("Accuracy:", accuracy_score(y_test_vc, y_pred))
print(classification_report(y_test_vc, y_pred, digits=6))

Accuracy: 0.9759615384615384
              precision    recall  f1-score   support

         0.0   0.956044  0.988636  0.972067       264
         1.0   0.991453  0.966667  0.978903       360

    accuracy                       0.975962       624
   macro avg   0.973748  0.977652  0.975485       624
weighted avg   0.976472  0.975962  0.976011       624



In [46]:
y_pred_proba = clf.predict_proba(X_test_vc)[:, 1]
fpr_v, tpr_v, _ = roc_curve(y_test_vc, y_pred_proba, pos_label=1)
auc_score_v = auc(fpr_v, tpr_v)

print("TPR:", tpr_v)
print("FPR:", fpr_v)
print("AUC score:", auc_score_v)

TPR: [0.         0.08333333 0.81666667 0.81666667 0.95       0.95
 0.96666667 0.96666667 0.98055556 0.98055556 0.98611111 0.98611111
 0.99444444 0.99444444 0.99722222 0.99722222 1.         1.
 1.        ]
FPR: [0.         0.         0.         0.00378788 0.00378788 0.00757576
 0.00757576 0.01136364 0.01136364 0.02272727 0.02272727 0.03787879
 0.03787879 0.04166667 0.04166667 0.04545455 0.04545455 0.97727273
 1.        ]
AUC score: 0.9985269360269361


In [47]:
# vocal_channel

vocal_channel_roc_s = pd.DataFrame(columns=["SVM", "GB"],
                       index=["fpr", "tpr", "auc"], )
vocal_channel_roc_s.loc["fpr"] = [None, None]
vocal_channel_roc_s.loc["tpr"] = [None, None]
vocal_channel_roc_s.loc["auc"] = [None, None]
vocal_channel_roc_s

Unnamed: 0,SVM,GB
fpr,,
tpr,,
auc,,


In [48]:
vocal_channel_roc_s.loc["fpr"] [0] = fpr_v
vocal_channel_roc_s.loc["tpr"] [0] = tpr_v
vocal_channel_roc_s.loc["auc"] [0] = auc_score_v

vocal_channel_roc_s.to_csv("vocal_channel_roc.csv", index = False) 

## Emotional_intensity

In [49]:
C_value =  1072.2672220103254
gamma_value = 0.0003053855508833416
kernel_type = 'rbf'

clf = SVC(C=C_value, gamma=gamma_value, kernel=kernel_type, probability=True)
clf.fit(X_train_ei, y_train_ei)
y_pred = clf.predict(X_test_ei)

print("Accuracy:", accuracy_score(y_test_ei, y_pred))
print(classification_report(y_test_ei, y_pred, digits=6))

Accuracy: 0.7580128205128205
              precision    recall  f1-score   support

         0.0   0.724590  0.767361  0.745363       288
         1.0   0.789969  0.750000  0.769466       336

    accuracy                       0.758013       624
   macro avg   0.757279  0.758681  0.757414       624
weighted avg   0.759794  0.758013  0.758341       624



In [50]:
y_pred_proba = clf.predict_proba(X_test_ei)[:, 1]
fpr_ei, tpr_ei, _ = roc_curve(y_test_ei, y_pred_proba, pos_label=1)
auc_score_ei = auc(fpr_ei, tpr_ei)

print("TPR:", tpr_ei)
print("FPR:", fpr_ei)
print("AUC score:", auc_score_ei)

TPR: [0.         0.00297619 0.03571429 0.03571429 0.08928571 0.08928571
 0.27380952 0.27380952 0.36309524 0.36309524 0.37202381 0.37202381
 0.375      0.375      0.38988095 0.38988095 0.41369048 0.41369048
 0.42559524 0.42559524 0.44940476 0.44940476 0.45833333 0.45833333
 0.46130952 0.46130952 0.46428571 0.46428571 0.48809524 0.48809524
 0.5        0.5        0.50297619 0.50297619 0.52678571 0.52678571
 0.5327381  0.5327381  0.54166667 0.54166667 0.54761905 0.54761905
 0.55654762 0.55654762 0.55952381 0.55952381 0.56547619 0.56547619
 0.58630952 0.58630952 0.5922619  0.5922619  0.60714286 0.60714286
 0.61904762 0.61904762 0.62202381 0.62202381 0.625      0.625
 0.64285714 0.64285714 0.6547619  0.6547619  0.6577381  0.6577381
 0.66369048 0.66369048 0.67261905 0.67261905 0.68154762 0.68154762
 0.70238095 0.70238095 0.70833333 0.70833333 0.71428571 0.71428571
 0.7172619  0.7172619  0.72619048 0.72619048 0.72916667 0.72916667
 0.73214286 0.73214286 0.73511905 0.73511905 0.74702381 0.74702

In [51]:
# emotional_intensity

emotional_intensity_roc_s = pd.DataFrame(columns=["SVM", "GB"], 
                       index=["fpr", "tpr", "auc"])
emotional_intensity_roc_s.loc["fpr"] = [None, None]
emotional_intensity_roc_s.loc["tpr"] = [None, None]
emotional_intensity_roc_s.loc["auc"] = [None, None]
emotional_intensity_roc_s

Unnamed: 0,SVM,GB
fpr,,
tpr,,
auc,,


In [52]:
emotional_intensity_roc_s.loc["fpr"] [0] = fpr_ei
emotional_intensity_roc_s.loc["tpr"] [0] = tpr_ei
emotional_intensity_roc_s.loc["auc"] [0] = auc_score_ei

emotional_intensity_roc_s.to_csv("emotional_intensity_roc.csv", index=False) 

## Emotion

In [53]:
C_value = 0.018307382802953697
gamma_value =  0.09770099572992257
class_w = "balanced"
kernel_type = 'poly'

clf = SVC(C=C_value, gamma=gamma_value, kernel=kernel_type, probability=True)
clf.fit(X_train_e, y_train_e)
y_pred = clf.predict(X_test_e)

print("Accuracy:", accuracy_score(y_test_e, y_pred))
print(classification_report(y_test_e, y_pred, digits=6))

Accuracy: 0.4967948717948718
              precision    recall  f1-score   support

         0.0   0.428571  0.625000  0.508475        48
         1.0   0.511111  0.718750  0.597403        96
         2.0   0.514706  0.364583  0.426829        96
         3.0   0.525000  0.218750  0.308824        96
         4.0   0.494048  0.864583  0.628788        96
         5.0   0.619048  0.270833  0.376812        96
         6.0   0.470588  0.500000  0.484848        48
         7.0   0.440000  0.458333  0.448980        48

    accuracy                       0.496795       624
   macro avg   0.500384  0.502604  0.472620       624
weighted avg   0.512845  0.496795  0.470739       624

