# Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("whitegrid", {'grid.linestyle': '--'})
sns.set_style({'font.family':'serif', 'font.serif':'Computer Modern'})
sns.set_context(font_scale=2, rc={"font.size":10,"axes.titlesize":20,"axes.labelsize":15})
from collections import defaultdict
from sklearn.model_selection import train_test_split, cross_val_score 
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.metrics import roc_curve, auc, roc_auc_score
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import classification_report
from scipy.stats import randint

from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.ensemble import HistGradientBoostingClassifier

df_train = pd.read_csv("C:/Users/Utente/OneDrive/Desktop/UNIPI - DS for Business Informatics/Data Mining_2/Progetto/df_train_processed.csv")
df_test = pd.read_csv("C:/Users/Utente/OneDrive/Desktop/UNIPI - DS for Business Informatics/Data Mining_2/Progetto/df_test_processed.csv")

In [32]:
from sklearn.metrics import roc_curve, auc

# Standardization

## Train

In [2]:
df_train_num = df_train.select_dtypes(include="number").drop('actor', axis=1)
df_train_cat = df_train.select_dtypes(include=['object'])

scaler = MinMaxScaler()
df_train_num_scaled = pd.DataFrame(scaler.fit_transform(df_train_num), columns=df_train_num.columns)
df_train = pd.concat([df_train_num_scaled, df_train_cat], axis=1)

## Test

In [3]:
df_test_num = df_test.select_dtypes(include="number").drop('actor', axis=1)
df_test_cat = df_test.select_dtypes(include=['object'])

scaler = MinMaxScaler()
df_test_num_scaled = pd.DataFrame(scaler.fit_transform(df_test_num), columns=df_test_num.columns)
df_test = pd.concat([df_test_num_scaled, df_test_cat], axis=1)

# Trasformation 

In [4]:
def label_encoder(array, label):
    new_array = np.zeros(len(array))
    for i in range(len(array)):
        if array[i] == label:
            new_array[i] = 1
    return new_array

## Vocal_channel

In [5]:
df_train_model_vc = df_train.copy()

In [6]:
df_test_model_vc = df_test.copy()

In [7]:
X_train_vc = df_train_model_vc.drop("vocal_channel", axis=1)
X_train_vc = np.array(pd.get_dummies(X_train_vc))
y_train_vc = label_encoder(df_train_model_vc["vocal_channel"], 'speech')

In [8]:
X_test_vc = df_test_model_vc.drop("vocal_channel", axis=1)
X_test_vc = np.array(pd.get_dummies(X_test_vc))
y_test_vc = label_encoder(df_test_model_vc["vocal_channel"], 'speech') 

## Sex

In [9]:
df_train_model_s = df_train.copy()

In [10]:
df_test_model_s = df_test.copy()

In [11]:
X_train_s = df_train_model_s.drop("sex", axis=1)
X_train_s = np.array(pd.get_dummies(X_train_s))
y_train_s = label_encoder(df_train_model_s["sex"], 'M')

In [12]:
X_test_s = df_test_model_s.drop("sex", axis=1)
X_test_s = np.array(pd.get_dummies(X_test_s))
y_test_s = label_encoder(df_test_model_s["sex"], 'M')

## Emotional_intensity

In [13]:
df_train_model_ei = df_train.copy()

In [14]:
df_test_model_ei = df_test.copy()

In [15]:
X_train_ei = df_train_model_ei.drop("emotional_intensity", axis=1)
X_train_ei = np.array(pd.get_dummies(X_train_ei))
y_train_ei = label_encoder(df_train_model_ei["emotional_intensity"], 'normal')

In [16]:
X_test_ei = df_test_model_ei.drop("emotional_intensity", axis=1)
X_test_ei = np.array(pd.get_dummies(X_test_ei))
y_test_ei = label_encoder(df_test_model_ei["emotional_intensity"], 'normal')

## Emotion

In [17]:
df_train_model_e = df_train.copy()

In [18]:
df_test_model_e = df_test.copy()

In [19]:
def label_encoder_e(array):
    new_array = np.zeros(len(array))
    for i in range(len(array)):
        if array[i] == "calm":
            new_array[i] = 1
        elif array[i] == "happy":
            new_array[i] = 2
        elif array[i] == "sad":
            new_array[i] = 3
        elif array[i] == "angry":
            new_array[i] = 4
        elif array[i] == "fearful":
            new_array[i] = 5
        elif array[i] == "disgust":
            new_array[i] = 6
        elif array[i] == "surprised":
            new_array[i] = 7
    return new_array

In [20]:
X_train_e = df_train_model_e.drop("emotion", axis=1)
X_train_e = np.array(pd.get_dummies(X_train_e))
y_train_e = label_encoder_e(df_train_model_e["emotion"])

In [21]:
X_test_e = df_test_model_e.drop("emotion", axis=1)
X_test_e = np.array(pd.get_dummies(X_test_e))
y_test_e = label_encoder_e(df_test_model_e["emotion"])

# Modelli 

## Sex

In [24]:
param_distribs = {
    'boosting_type': ['gbdt','goss',"dart"], #gbdt, traditional Gradient Boosting Decision Tree,
                                            #dart, Dropouts meet Multiple Additive Regression Trees
                                                # random forest
    'n_estimators': randint(low=50, high=500),
    'max_depth': list(range(2,200)),
    'learning_rate':[0.0001, 0.001, 0.01, 0.1, 1.0],
    'num_leaves': randint(low=5, high=50), # max number of leaves in one tree
    
}

clf = LGBMClassifier()

random_search = RandomizedSearchCV(clf, param_distributions=param_distribs, n_iter=75, cv=5, random_state=42, n_jobs=-1)
random_search.fit(X_train_s, y_train_s)

best_clf = random_search.best_estimator_ 
best_clf.fit(X_train_s, y_train_s)

y_pred = best_clf.predict(X_test_s)

print("Best parameters:", random_search.best_params_)
print(classification_report(y_test_s, y_pred))


Best parameters: {'boosting_type': 'goss', 'learning_rate': 0.1, 'max_depth': 55, 'n_estimators': 392, 'num_leaves': 41}
              precision    recall  f1-score   support

         0.0       0.99      0.76      0.86       312
         1.0       0.80      0.99      0.89       312

    accuracy                           0.88       624
   macro avg       0.90      0.88      0.87       624
weighted avg       0.90      0.88      0.87       624



In [26]:
param_distribs = {
    'boosting_type': ["goss"], 
    'n_estimators': [342,352,362,372,382,392,402,412,422,432],
    'max_depth': [25,35,45,55,65,75,85,95],
    'learning_rate':[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],
    'num_leaves': [11,21,31,41,51,61,71,81], 
}

clf = LGBMClassifier()

random_search = RandomizedSearchCV(clf, param_distributions=param_distribs, n_iter=75, cv=5, random_state=42, n_jobs=-1)
random_search.fit(X_train_s, y_train_s)

best_clf = random_search.best_estimator_ 
best_clf.fit(X_train_s, y_train_s)

y_pred = best_clf.predict(X_test_s)

print("Best parameters:", random_search.best_params_)
print(classification_report(y_test_s, y_pred))

Best parameters: {'num_leaves': 21, 'n_estimators': 422, 'max_depth': 45, 'learning_rate': 0.3, 'boosting_type': 'goss'}
              precision    recall  f1-score   support

         0.0       0.98      0.77      0.86       312
         1.0       0.81      0.99      0.89       312

    accuracy                           0.88       624
   macro avg       0.90      0.88      0.88       624
weighted avg       0.90      0.88      0.88       624



# Vocal_channel

In [27]:
param_distribs = {
    'boosting_type': ['gbdt','goss',"dart"], 
    'n_estimators': randint(low=50, high=500),
    'max_depth': list(range(2,200)),
    'learning_rate':[0.0001, 0.001, 0.01, 0.1, 1.0],
    'num_leaves': randint(low=5, high=50),
    
}

clf = LGBMClassifier()

random_search = RandomizedSearchCV(clf, param_distributions=param_distribs, n_iter=75, cv=5, random_state=42, n_jobs=-1)
random_search.fit(X_train_vc, y_train_vc)

best_clf = random_search.best_estimator_ 
best_clf.fit(X_train_vc, y_train_vc)

y_pred = best_clf.predict(X_test_vc)

print("Best parameters:", random_search.best_params_)
print(classification_report(y_test_vc, y_pred))

Best parameters: {'boosting_type': 'goss', 'learning_rate': 0.1, 'max_depth': 191, 'n_estimators': 345, 'num_leaves': 25}
              precision    recall  f1-score   support

         0.0       0.97      0.99      0.98       264
         1.0       0.99      0.97      0.98       360

    accuracy                           0.98       624
   macro avg       0.98      0.98      0.98       624
weighted avg       0.98      0.98      0.98       624



In [29]:
param_distribs = {
    'boosting_type': ['goss'], 
    'n_estimators': [305,315,325,335,345,355,365,375,385,395,405],
    'max_depth': [121,131,141,151,161,171,181,191,200],
    'learning_rate':[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],
    'num_leaves': [20,22,24,26,28,30,32,34,36,38],
    
}

clf = LGBMClassifier()

random_search = RandomizedSearchCV(clf, param_distributions=param_distribs, n_iter=75, cv=5, random_state=42, n_jobs=-1)
random_search.fit(X_train_vc, y_train_vc)

best_clf = random_search.best_estimator_ 
best_clf.fit(X_train_vc, y_train_vc)

y_pred = best_clf.predict(X_test_vc)

print("Best parameters:", random_search.best_params_)
print(classification_report(y_test_vc, y_pred))

Best parameters: {'num_leaves': 20, 'n_estimators': 395, 'max_depth': 191, 'learning_rate': 0.1, 'boosting_type': 'goss'}
              precision    recall  f1-score   support

         0.0       0.95      0.99      0.97       264
         1.0       0.99      0.96      0.98       360

    accuracy                           0.98       624
   macro avg       0.97      0.98      0.98       624
weighted avg       0.98      0.98      0.98       624



## Emotional_intensity

In [28]:
param_distribs = {
    'boosting_type': ['gbdt','goss',"dart"], 
    'n_estimators': randint(low=50, high=500),
    'max_depth': list(range(2,200)),
    'learning_rate':[0.0001, 0.001, 0.01, 0.1, 1.0],
    'num_leaves': randint(low=5, high=50),
    
}

clf = LGBMClassifier()

random_search = RandomizedSearchCV(clf, param_distributions=param_distribs, n_iter=75, cv=5, random_state=42, n_jobs=-1)
random_search.fit(X_train_ei, y_train_ei)

best_clf = random_search.best_estimator_ 
best_clf.fit(X_train_ei, y_train_ei)

y_pred = best_clf.predict(X_test_ei)

print("Best parameters:", random_search.best_params_)
print(classification_report(y_test_ei, y_pred))

Best parameters: {'boosting_type': 'gbdt', 'learning_rate': 0.1, 'max_depth': 34, 'n_estimators': 447, 'num_leaves': 25}
              precision    recall  f1-score   support

         0.0       0.70      0.77      0.73       288
         1.0       0.78      0.72      0.75       336

    accuracy                           0.74       624
   macro avg       0.74      0.75      0.74       624
weighted avg       0.75      0.74      0.74       624



In [30]:
param_distribs = {
    'boosting_type': ['gbdt'], 
    'n_estimators': [407,417,427,437,447,457,467,477,487,497],
    'max_depth': [4,14,24,34,44,54,64,74],
    'learning_rate':[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],
    'num_leaves': [14,16,18,20,22,24,26,28,30,32,34,36,38],
    
}

clf = LGBMClassifier()

random_search = RandomizedSearchCV(clf, param_distributions=param_distribs, n_iter=75, cv=5, random_state=42, n_jobs=-1)
random_search.fit(X_train_ei, y_train_ei)

best_clf = random_search.best_estimator_ 
best_clf.fit(X_train_ei, y_train_ei)

y_pred = best_clf.predict(X_test_ei)

print("Best parameters:", random_search.best_params_)
print(classification_report(y_test_ei, y_pred))

Best parameters: {'num_leaves': 16, 'n_estimators': 467, 'max_depth': 64, 'learning_rate': 0.2, 'boosting_type': 'gbdt'}
              precision    recall  f1-score   support

         0.0       0.70      0.78      0.74       288
         1.0       0.79      0.72      0.75       336

    accuracy                           0.75       624
   macro avg       0.75      0.75      0.75       624
weighted avg       0.75      0.75      0.75       624



# Emotion

In [31]:
param_distribs = {
    'boosting_type': ['gbdt','goss',"dart"], 
    'n_estimators': randint(low=50, high=500),
    'max_depth': list(range(2,200)),
    'learning_rate':[0.0001, 0.001, 0.01, 0.1, 1.0],
    'num_leaves': randint(low=5, high=50),
    
}

clf = LGBMClassifier()

random_search = RandomizedSearchCV(clf, param_distributions=param_distribs, n_iter=75, cv=5, random_state=42, n_jobs=-1)
random_search.fit(X_train_e, y_train_e)

best_clf = random_search.best_estimator_ 
best_clf.fit(X_train_e, y_train_e)

y_pred = best_clf.predict(X_test_e)

print("Best parameters:", random_search.best_params_)
print(classification_report(y_test_e, y_pred))

Best parameters: {'boosting_type': 'goss', 'learning_rate': 0.1, 'max_depth': 55, 'n_estimators': 392, 'num_leaves': 41}
              precision    recall  f1-score   support

         0.0       0.50      0.15      0.23        48
         1.0       0.54      0.68      0.60        96
         2.0       0.42      0.43      0.42        96
         3.0       0.37      0.40      0.38        96
         4.0       0.52      0.83      0.64        96
         5.0       0.57      0.35      0.44        96
         6.0       0.49      0.35      0.41        48
         7.0       0.41      0.35      0.38        48

    accuracy                           0.48       624
   macro avg       0.48      0.44      0.44       624
weighted avg       0.48      0.48      0.46       624



In [45]:
param_distribs = {
    'boosting_type': ["goss"], 
    'n_estimators': [352,362,372,382,392,402,412,422,432,442],
    'max_depth': [25,35,45,55,65,75,85,95,105],
    'learning_rate':[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],
    'num_leaves': [31,33,35,37,39,41,43,45,47,49],
    
}

clf = LGBMClassifier()

random_search = RandomizedSearchCV(clf, param_distributions=param_distribs, n_iter=75, cv=5, random_state=42, n_jobs=-1)
random_search.fit(X_train_e, y_train_e)

best_clf = random_search.best_estimator_ 
best_clf.fit(X_train_e, y_train_e)

y_pred = best_clf.predict(X_test_e)

print("Best parameters:", random_search.best_params_)
print(classification_report(y_test_e, y_pred))

Best parameters: {'num_leaves': 49, 'n_estimators': 432, 'max_depth': 35, 'learning_rate': 0.1, 'boosting_type': 'goss'}
              precision    recall  f1-score   support

         0.0       0.52      0.23      0.32        48
         1.0       0.53      0.68      0.59        96
         2.0       0.35      0.46      0.40        96
         3.0       0.38      0.34      0.36        96
         4.0       0.53      0.75      0.62        96
         5.0       0.56      0.33      0.42        96
         6.0       0.56      0.40      0.46        48
         7.0       0.44      0.35      0.39        48

    accuracy                           0.47       624
   macro avg       0.48      0.44      0.44       624
weighted avg       0.48      0.47      0.46       624



# Metriche per la valutazione dei modelli

## Sex

In [33]:
clf = LGBMClassifier(num_leaves = 21, n_estimators = 422, max_depth= 45, learning_rate= 0.3, boosting_type = "goss")
clf.fit(X_train_s, y_train_s)
y_pred = clf.predict(X_test_s)

print("Accuracy:", accuracy_score(y_test_s, y_pred))
print(classification_report(y_test_s, y_pred, digits=6))

Accuracy: 0.8766025641025641
              precision    recall  f1-score   support

         0.0   0.983539  0.766026  0.861261       312
         1.0   0.808399  0.987179  0.888889       312

    accuracy                       0.876603       624
   macro avg   0.895969  0.876603  0.875075       624
weighted avg   0.895969  0.876603  0.875075       624



In [34]:
y_pred_proba = clf.predict_proba(X_test_s)[:, 1]
fpr_s, tpr_s, _ = roc_curve(y_test_s, y_pred_proba, pos_label=1)
auc_score_s = auc(fpr_s, tpr_s)

print("TPR:", tpr_s)
print("FPR:", fpr_s)
print("AUC score:", auc_score_s)

TPR: [0.         0.00320513 0.61858974 0.61858974 0.82692308 0.82692308
 0.90705128 0.90705128 0.92307692 0.92307692 0.92628205 0.92628205
 0.93269231 0.93269231 0.93589744 0.93589744 0.93910256 0.93910256
 0.94230769 0.94230769 0.94551282 0.94551282 0.94871795 0.94871795
 0.95192308 0.95192308 0.95512821 0.95512821 0.95833333 0.95833333
 0.96794872 0.96794872 0.97115385 0.97115385 0.9775641  0.9775641
 0.98397436 0.98397436 0.98717949 0.98717949 0.99038462 0.99038462
 0.99358974 0.99358974 0.99679487 0.99679487 1.         1.        ]
FPR: [0.         0.         0.         0.00320513 0.00320513 0.00641026
 0.00641026 0.00961538 0.00961538 0.02564103 0.02564103 0.02884615
 0.02884615 0.03846154 0.03846154 0.04166667 0.04166667 0.04807692
 0.04807692 0.05128205 0.05128205 0.06410256 0.06410256 0.07371795
 0.07371795 0.07692308 0.07692308 0.08653846 0.08653846 0.10576923
 0.10576923 0.13461538 0.13461538 0.17307692 0.17307692 0.17948718
 0.17948718 0.18910256 0.18910256 0.23397436 0.23397

In [35]:
sex_roc_s = pd.DataFrame(columns=["SVM", "GB"], 
                       index=["fpr", "tpr", "auc"])
sex_roc_s.loc["fpr"] = [None, None]
sex_roc_s.loc["tpr"] = [None, None]
sex_roc_s.loc["auc"] = [None, None]
sex_roc_s

Unnamed: 0,SVM,GB
fpr,,
tpr,,
auc,,


In [36]:
sex_roc_s.loc["fpr"] [1] = fpr_s
sex_roc_s.loc["tpr"] [1] = tpr_s
sex_roc_s.loc["auc"] [1] = auc_score_s

sex_roc_s.to_csv("sex_roc_GB.csv", index=False) 

## Vocal_channel

In [37]:
clf = LGBMClassifier(num_leaves = 25, n_estimators = 345, max_depth= 191, learning_rate= 0.1, boosting_type = "goss")
clf.fit(X_train_vc, y_train_vc)
y_pred = clf.predict(X_test_vc)

print("Accuracy:", accuracy_score(y_test_vc, y_pred))
print(classification_report(y_test_vc, y_pred, digits=6))

Accuracy: 0.9823717948717948
              precision    recall  f1-score   support

         0.0   0.966790  0.992424  0.979439       264
         1.0   0.994334  0.975000  0.984572       360

    accuracy                       0.982372       624
   macro avg   0.980562  0.983712  0.982006       624
weighted avg   0.982681  0.982372  0.982401       624



In [38]:
y_pred_proba = clf.predict_proba(X_test_vc)[:, 1]
fpr_v, tpr_v, _ = roc_curve(y_test_vc, y_pred_proba, pos_label=1)
auc_score_v = auc(fpr_v, tpr_v)

print("TPR:", tpr_v)
print("FPR:", fpr_v)
print("AUC score:", auc_score_v)

TPR: [0.         0.00277778 0.69722222 0.69722222 0.92777778 0.92777778
 0.975      0.975      0.98055556 0.98055556 0.98333333 0.98333333
 0.98611111 0.98611111 0.98888889 0.98888889 0.99166667 0.99166667
 0.99722222 0.99722222 1.         1.        ]
FPR: [0.         0.         0.         0.00378788 0.00378788 0.00757576
 0.00757576 0.01136364 0.01136364 0.01515152 0.01515152 0.02272727
 0.02272727 0.02651515 0.02651515 0.03787879 0.03787879 0.04166667
 0.04166667 0.04924242 0.04924242 1.        ]
AUC score: 0.9980534511784512


In [39]:
# vocal_channel

vocal_channel_roc_s = pd.DataFrame(columns=["SVM", "GB"],
                       index=["fpr", "tpr", "auc"], )
vocal_channel_roc_s.loc["fpr"] = [None, None]
vocal_channel_roc_s.loc["tpr"] = [None, None]
vocal_channel_roc_s.loc["auc"] = [None, None]
vocal_channel_roc_s

Unnamed: 0,SVM,GB
fpr,,
tpr,,
auc,,


In [40]:
vocal_channel_roc_s.loc["fpr"] [1] = fpr_v
vocal_channel_roc_s.loc["tpr"] [1] = tpr_v
vocal_channel_roc_s.loc["auc"] [1] = auc_score_v

vocal_channel_roc_s.to_csv("vocal_channel_roc_GB.csv", index = False) 

## Emotional_intensity

In [41]:
clf = LGBMClassifier(num_leaves = 16, n_estimators = 467, max_depth= 64, learning_rate= 0.2, boosting_type = "gbdt")
clf.fit(X_train_ei, y_train_ei)
y_pred = clf.predict(X_test_ei)

print("Accuracy:", accuracy_score(y_test_ei, y_pred))
print(classification_report(y_test_ei, y_pred, digits=6))

Accuracy: 0.7467948717948718
              precision    recall  f1-score   support

         0.0   0.704403  0.777778  0.739274       288
         1.0   0.790850  0.720238  0.753894       336

    accuracy                       0.746795       624
   macro avg   0.747626  0.749008  0.746584       624
weighted avg   0.750951  0.746795  0.747146       624



In [42]:
y_pred_proba = clf.predict_proba(X_test_ei)[:, 1]
fpr_ei, tpr_ei, _ = roc_curve(y_test_ei, y_pred_proba, pos_label=1)
auc_score_ei = auc(fpr_ei, tpr_ei)

print("TPR:", tpr_ei)
print("FPR:", fpr_ei)
print("AUC score:", auc_score_ei)

TPR: [0.         0.00297619 0.05059524 0.05059524 0.28571429 0.28571429
 0.33333333 0.33333333 0.36011905 0.36011905 0.37797619 0.37797619
 0.38988095 0.38988095 0.4077381  0.4077381  0.43154762 0.43154762
 0.4375     0.4375     0.44047619 0.44047619 0.44642857 0.44642857
 0.46428571 0.46428571 0.47321429 0.47321429 0.47916667 0.47916667
 0.49404762 0.49404762 0.50297619 0.50297619 0.51190476 0.51190476
 0.5297619  0.5297619  0.54761905 0.54761905 0.55357143 0.55357143
 0.55654762 0.55654762 0.5625     0.5625     0.56547619 0.56547619
 0.57142857 0.57142857 0.58035714 0.58035714 0.58928571 0.58928571
 0.59821429 0.59821429 0.60119048 0.60119048 0.60416667 0.60416667
 0.61309524 0.61309524 0.61904762 0.61904762 0.62202381 0.62202381
 0.625      0.625      0.63690476 0.63690476 0.63988095 0.63988095
 0.64285714 0.64285714 0.65178571 0.65178571 0.66071429 0.66071429
 0.66666667 0.66666667 0.66964286 0.66964286 0.67857143 0.67857143
 0.68154762 0.68154762 0.68452381 0.68452381 0.69940476 0

In [43]:
# emotional_intensity

emotional_intensity_roc_s = pd.DataFrame(columns=["SVM", "GB"], 
                       index=["fpr", "tpr", "auc"])
emotional_intensity_roc_s.loc["fpr"] = [None, None]
emotional_intensity_roc_s.loc["tpr"] = [None, None]
emotional_intensity_roc_s.loc["auc"] = [None, None]
emotional_intensity_roc_s

Unnamed: 0,SVM,GB
fpr,,
tpr,,
auc,,


In [44]:
emotional_intensity_roc_s.loc["fpr"] [1] = fpr_ei
emotional_intensity_roc_s.loc["tpr"] [1] = tpr_ei
emotional_intensity_roc_s.loc["auc"] [1] = auc_score_ei

emotional_intensity_roc_s.to_csv("emotional_intensity_roc_GB.csv", index=False) 

## Emotion 

In [46]:
clf = LGBMClassifier(num_leaves = 41, n_estimators = 392, max_depth= 55, learning_rate= 0.1, boosting_type = "goss")
clf.fit(X_train_e, y_train_e)
y_pred = clf.predict(X_test_e)

print("Accuracy:", accuracy_score(y_test_e, y_pred))
print(classification_report(y_test_e, y_pred, digits=6))

Accuracy: 0.4791666666666667
              precision    recall  f1-score   support

         0.0   0.500000  0.145833  0.225806        48
         1.0   0.541667  0.677083  0.601852        96
         2.0   0.418367  0.427083  0.422680        96
         3.0   0.372549  0.395833  0.383838        96
         4.0   0.519481  0.833333  0.640000        96
         5.0   0.566667  0.354167  0.435897        96
         6.0   0.485714  0.354167  0.409639        48
         7.0   0.414634  0.354167  0.382022        48

    accuracy                       0.479167       624
   macro avg   0.477385  0.442708  0.437717       624
weighted avg   0.479831  0.479167  0.460462       624

