# Final Detection

##  Ensure the sample counts of two datasets are consistent.

In [1]:
import pandas as pd

# 加载并对齐融合特征和情感特征
def load_and_align_features(fused_path, emotional_path):
    # 加载融合特征
    fused_features_df = pd.read_csv(fused_path)
    fused_features_df.set_index('image_id', inplace=True)
    
    # 加载情感特征
    emotional_features_df = pd.read_csv(emotional_path, index_col=0)
    
    # 确保索引类型一致
    fused_features_df.index = fused_features_df.index.astype(str)
    emotional_features_df.index = emotional_features_df.index.astype(str)
    
    # 删除重复值
    fused_features_df = fused_features_df[~fused_features_df.index.duplicated(keep='first')]
    emotional_features_df = emotional_features_df[~emotional_features_df.index.duplicated(keep='first')]
    
    # 找到共同的 image_id
    common_image_ids = fused_features_df.index.intersection(emotional_features_df.index)
    
    # 打印调试信息
    print(f"Image IDs in fused features but not in emotional features: {set(fused_features_df.index) - set(emotional_features_df.index)}")
    print(f"Image IDs in emotional_features but not in fused features: {set(emotional_features_df.index) - set(fused_features_df.index)}")
    
    # 过滤数据以保留共同的 image_id
    fused_features_df = fused_features_df.loc[common_image_ids]
    emotional_features_df = emotional_features_df.loc[common_image_ids]
    
    return fused_features_df, emotional_features_df

# 文件路径
fused_features_path = 'fused.csv'
emotional_features_path = 'SA_01.csv'

# 执行第一步
fused_features_df, emotional_features_df = load_and_align_features(fused_features_path, emotional_features_path)

# 打印前几行检查
print("Aligned Fused features:")
print(fused_features_df.head())

print("\nAligned Emotional features:")
print(emotional_features_df.head())


# Check if the sample counts are consistent
fused_sample_count = fused_features_df.shape[0]
emotional_sample_count = emotional_features_df.shape[0]

print(f'Fused features sample count: {fused_sample_count}')
print(f'Emotional features sample count: {emotional_sample_count}')

if fused_sample_count != emotional_sample_count:
    print("Warning: The sample counts for fused features and emotional features are not consistent.")
else:
    print("The sample counts for fused features and emotional features are consistent.")


Image IDs in fused features but not in emotional features: set()
Image IDs in emotional_features but not in fused features: set()
Aligned Fused features:
                                         0         1         2         3  \
image_id                                                                   
62b31d36gw1expsi2gfrdj20hm0loq8o -0.521330  0.065525 -0.028026  0.187444   
563a2b53jw1exl77nkup7j20c30f3q4j -0.482425  0.079567 -0.036039  0.194375   
005ldo0ygw1ex23rdfuqcj30xo0k6di0 -0.468386  0.084869 -0.049615  0.144983   
62b31d36gw1exfcmyz8agj20qq0hu77k -0.468805  0.067019 -0.031303  0.199792   
0060kjm0jw1exdjaeiqadj30xc0m8tdw -0.473862  0.089600 -0.023923  0.144352   

                                         4         5         6         7  \
image_id                                                                   
62b31d36gw1expsi2gfrdj20hm0loq8o  0.023185  0.102659 -0.126628 -0.045669   
563a2b53jw1exl77nkup7j20c30f3q4j  0.036827  0.083796 -0.112961  0.000671   
005ldo0yg

## Data alignment - merge through 'image_id'

In [2]:
import pandas as pd
import pickle

# Load labels from pickle file
def load_labels_from_pickle(pickle_file):
    with open(pickle_file, 'rb') as f:
        data = pickle.load(f)
        image_ids = data['image_id']
        labels = data['label']
        # Ensure image_ids are of string type
        image_ids = [str(image_id) for image_id in image_ids]
        labels_dict = dict(zip(image_ids, labels))
    return labels_dict

# Merge features with labels
def merge_with_labels(features_df, labels_dict):
    labels_df = pd.DataFrame(list(labels_dict.items()), columns=['image_id', 'label'])
    labels_df.set_index('image_id', inplace=True)
    
    # Ensure indices are of the same type
    features_df.index = features_df.index.astype(str)
    labels_df.index = labels_df.index.astype(str)
    
    # Merge features with labels
    merged_df = pd.merge(features_df, labels_df, left_index=True, right_index=True, how='inner')
    
    return merged_df

# File path for labels
pickle_path = 'train.pkl'

# Load labels
labels_dict = load_labels_from_pickle(pickle_path)

# Ensure the image_id columns in fused_features_df and emotional_features_df are strings
fused_features_df.index = fused_features_df.index.astype(str)
emotional_features_df.index = emotional_features_df.index.astype(str)

# Merge with labels
fused_merged_df = merge_with_labels(fused_features_df, labels_dict)
emotional_merged_df = merge_with_labels(emotional_features_df, labels_dict)

# Print the first few rows to check
print("Fused features with labels:")
print(fused_merged_df.head())

print("\nEmotional features with labels:")
print(emotional_merged_df.head())

# Check if the sample counts are consistent after merging with labels
fused_sample_count = fused_merged_df.shape[0]
emotional_sample_count = emotional_merged_df.shape[0]

print(f'Fused features with labels sample count: {fused_sample_count}')
print(f'Emotional features with labels sample count: {emotional_sample_count}')

if fused_sample_count != emotional_sample_count:
    print("Warning: The sample counts for fused features and emotional features with labels are not consistent.")
else:
    print("The sample counts for fused features and emotional features with labels are consistent.")

# Save the merged dataframes to CSV files
# fused_merged_df.to_csv('fused_with_labels.csv')
# emotional_merged_df.to_csv('emotional_with_labels.csv')


Fused features with labels:
                                         0         1         2         3  \
image_id                                                                   
62b31d36gw1expsi2gfrdj20hm0loq8o -0.521330  0.065525 -0.028026  0.187444   
563a2b53jw1exl77nkup7j20c30f3q4j -0.482425  0.079567 -0.036039  0.194375   
005ldo0ygw1ex23rdfuqcj30xo0k6di0 -0.468386  0.084869 -0.049615  0.144983   
62b31d36gw1exfcmyz8agj20qq0hu77k -0.468805  0.067019 -0.031303  0.199792   
0060kjm0jw1exdjaeiqadj30xc0m8tdw -0.473862  0.089600 -0.023923  0.144352   

                                         4         5         6         7  \
image_id                                                                   
62b31d36gw1expsi2gfrdj20hm0loq8o  0.023185  0.102659 -0.126628 -0.045669   
563a2b53jw1exl77nkup7j20c30f3q4j  0.036827  0.083796 -0.112961  0.000671   
005ldo0ygw1ex23rdfuqcj30xo0k6di0  0.038398  0.096167 -0.096763 -0.018022   
62b31d36gw1exfcmyz8agj20qq0hu77k  0.033397  0.084818 -0.119

# Split train & test dataset 

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Split the data into features and labels
X_fused = fused_merged_df.reset_index().drop('label', axis=1)
y_fused = fused_merged_df['label']

X_emotional = emotional_merged_df.reset_index().drop('label', axis=1)
y_emotional = emotional_merged_df['label']

# Split the datasets into training and testing sets
X_fused_train, X_fused_test, y_fused_train, y_fused_test = train_test_split(X_fused, y_fused, test_size=0.2, random_state=42)
X_emotional_train, X_emotional_test, y_emotional_train, y_emotional_test = train_test_split(X_emotional, y_emotional, test_size=0.2, random_state=42)

# Store image_id for merging later
image_id_train_f = X_fused_train['image_id']
image_id_test_f= X_fused_test['image_id']

image_id_train_e = X_emotional_train['image_id']
image_id_test_e = X_emotional_test['image_id']


# Remove image_id from the training and testing sets
X_fused_train = X_fused_train.drop('image_id', axis=1)
X_fused_test = X_fused_test.drop('image_id', axis=1)
X_emotional_train = X_emotional_train.drop('image_id', axis=1)
X_emotional_test = X_emotional_test.drop('image_id', axis=1)

# 标准化特征数据
scaler_fused = StandardScaler()
X_fused_train = scaler_fused.fit_transform(X_fused_train)
X_fused_test = scaler_fused.transform(X_fused_test)

scaler_emotional = StandardScaler()
X_emotional_train = scaler_emotional.fit_transform(X_emotional_train)
X_emotional_test = scaler_emotional.transform(X_emotional_test)


# Fused Tensor

## PCA

In [4]:
import numpy as np
from sklearn.decomposition import PCA

# 应用 PCA 降维，只在训练数据上fit，然后transform训练数据和测试数据
pca = PCA(n_components=200)  # 直接指定主成分数量
X_fused_train_pca = pca.fit_transform(X_fused_train) # pca后的训练集
X_fused_test_pca = pca.transform(X_fused_test) # pca后的测试集

# 检查新的维度和解释的方差比
print("New training dimensions:", X_fused_train_pca.shape[1])
print("New testing dimensions:", X_fused_test_pca.shape[1])
# print("Explained variance ratio:", pca.explained_variance_ratio_)
print("Total explained variance:", np.sum(pca.explained_variance_ratio_))


New training dimensions: 200
New testing dimensions: 200
Total explained variance: 0.9493278247524244


In [5]:
# 检查训练和测试标签中的 NaN 值
print("NaN values in y_fused_train:", y_fused_train.isna().sum())
print("NaN values in y_fused_test:", y_fused_test.isna().sum())

NaN values in y_fused_train: 0
NaN values in y_fused_test: 0


In [6]:
# 删除包含 NaN 值的样本
X_fused_train = X_fused_train[y_fused_train.notna()]
y_fused_train = y_fused_train.dropna()

X_fused_test = X_fused_test[y_fused_test.notna()]
y_fused_test = y_fused_test.dropna()


## MLP - 4 hidden layer

In [7]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.pipeline import make_pipeline


# 根据最佳参数设置MLP分类器
mlp_fused = MLPClassifier(
    hidden_layer_sizes=(200,100,50,25,),
    activation='relu',
    solver='adam',
    alpha=0.01,
    learning_rate_init=0.001,
    max_iter=200,
    random_state=42,
    batch_size=64
)

# 创建Pipeline，包含标准化和PCA
pipeline_fused = make_pipeline(StandardScaler(), pca, mlp_fused)

# 训练模型
pipeline_fused.fit(X_fused_train, y_fused_train)

# 预测训练集和测试集
mlp_train_preds = pipeline_fused.predict(X_fused_train)
mlp_test_preds = pipeline_fused.predict(X_fused_test)

# 计算并打印训练集和测试集的准确率
mlp_train_accuracy = accuracy_score(y_fused_train, mlp_train_preds)
mlp_test_accuracy = accuracy_score(y_fused_test, mlp_test_preds)

print(f'Training accuracy: {mlp_train_accuracy}')
print(f'Testing accuracy: {mlp_test_accuracy}')

# 打印分类报告
print("\nClassification Report for Training Set:")
print(classification_report(y_fused_train, mlp_train_preds))

print("\nClassification Report for Testing Set:")
print(classification_report(y_fused_test, mlp_test_preds))

# 计算训练集和测试集的预测概率
fused_probs_train = pipeline_fused.predict_proba(X_fused_train)
fused_probs_test = pipeline_fused.predict_proba(X_fused_test)

# check 
print(f'fused_probs_train: {fused_probs_train[:5]}')
print(f'fused_probs_test: {fused_probs_test[:5]}')

Training accuracy: 0.9840646651270207
Testing accuracy: 0.6103416435826408

Classification Report for Training Set:
              precision    recall  f1-score   support

           0       0.98      0.99      0.98      1999
           1       0.99      0.98      0.99      2331

    accuracy                           0.98      4330
   macro avg       0.98      0.98      0.98      4330
weighted avg       0.98      0.98      0.98      4330


Classification Report for Testing Set:
              precision    recall  f1-score   support

           0       0.59      0.60      0.60       516
           1       0.63      0.62      0.62       567

    accuracy                           0.61      1083
   macro avg       0.61      0.61      0.61      1083
weighted avg       0.61      0.61      0.61      1083

fused_probs_train: [[1.39781401e-01 8.60218599e-01]
 [9.99999624e-01 3.75778251e-07]
 [9.99996572e-01 3.42779744e-06]
 [9.99994032e-01 5.96802291e-06]
 [3.57320608e-05 9.99964268e-01]]
fused

# Emtional Tensor 

## Random Forest

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler


# 创建随机森林分类器，使用已知的最佳参数
rf_emotional = RandomForestClassifier(
               max_depth=10, 
               min_samples_split=2, 
               n_estimators=100, 
               random_state=42)

# 创建Pipeline，包含标准化
pipeline_emotional = make_pipeline(StandardScaler(),rf_emotional)

# 使用训练数据拟合模型
pipeline_emotional.fit(X_emotional_train, y_emotional_train)

# 预测训练集和测试集
rf_train_preds = pipeline_emotional.predict(X_emotional_train)
rf_test_preds = pipeline_emotional.predict(X_emotional_test)

# 计算并打印训练集和测试集的准确率
rf_train_accuracy = accuracy_score(y_emotional_train, rf_train_preds)
rf_test_accuracy = accuracy_score(y_emotional_test, rf_test_preds)

print(f'Training accuracy: {rf_train_accuracy}')
print(f'Testing accuracy: {rf_test_accuracy}')

# 打印分类报告
print("\nClassification Report for Training Set:")
print(classification_report(y_emotional_train, rf_train_preds))

print("\nClassification Report for Testing Set:")
print(classification_report(y_emotional_test, rf_test_preds))

emotional_probs_train = pipeline_emotional.predict_proba(X_emotional_train)
emotional_probs_test = pipeline_emotional.predict_proba(X_emotional_test)

print(f'emotional_probs_train: {emotional_probs_train[:5]}')
print(f'emotional_probs_test: {emotional_probs_test[:5]}')

Training accuracy: 0.7889145496535797
Testing accuracy: 0.5752539242843951

Classification Report for Training Set:
              precision    recall  f1-score   support

           0       0.80      0.72      0.76      1999
           1       0.78      0.85      0.81      2331

    accuracy                           0.79      4330
   macro avg       0.79      0.78      0.79      4330
weighted avg       0.79      0.79      0.79      4330


Classification Report for Testing Set:
              precision    recall  f1-score   support

           0       0.57      0.46      0.51       516
           1       0.58      0.68      0.63       567

    accuracy                           0.58      1083
   macro avg       0.57      0.57      0.57      1083
weighted avg       0.57      0.58      0.57      1083

emotional_probs_train: [[0.44559024 0.55440976]
 [0.59227008 0.40772992]
 [0.44709619 0.55290381]
 [0.49111273 0.50888727]
 [0.42279922 0.57720078]]
emotional_probs_test: [[0.35442145 0.6455

# Meta Model

## Data Merged

In [9]:
import pandas as pd

# 确保 image_id 列都是字符串类型
image_id_train_f = [str(image_id) for image_id in image_id_train_f]
image_id_train_e = [str(image_id) for image_id in image_id_train_e]
image_id_test_f = [str(image_id) for image_id in image_id_test_f]
image_id_test_e = [str(image_id) for image_id in image_id_test_e]

# 预测所有类别的概率
fused_probs_train = pipeline_fused.predict_proba(X_fused_train)
fused_probs_test = pipeline_fused.predict_proba(X_fused_test)
emotional_probs_train = rf_emotional.predict_proba(X_emotional_train)
emotional_probs_test = rf_emotional.predict_proba(X_emotional_test)

# 获取类别标签
class_labels_fused = pipeline_fused.classes_
class_labels_emotional = rf_emotional.classes_

# 将概率转换为数据框，并添加列名
df_fused_probs_train = pd.DataFrame(fused_probs_train, columns=[f'fused_prob_{cls}' for cls in class_labels_fused])
df_fused_probs_test = pd.DataFrame(fused_probs_test, columns=[f'fused_prob_{cls}' for cls in class_labels_fused])
df_emotional_probs_train = pd.DataFrame(emotional_probs_train, columns=[f'emotional_prob_{cls}' for cls in class_labels_emotional])
df_emotional_probs_test = pd.DataFrame(emotional_probs_test, columns=[f'emotional_prob_{cls}' for cls in class_labels_emotional])

# 打印前几行检查
# print(f'df_fused_probs_train:')
# print(df_fused_probs_train.head())

# print(f'df_fused_probs_test:')
# print(df_fused_probs_test.head())

# print(f'df_emotional_probs_train:')
# print(df_emotional_probs_train.head())

# print(f'df_emotional_probs_test:')
# print(df_emotional_probs_test.head())


# 添加 image_id 列
df_fused_probs_train['image_id'] = image_id_train_f
df_fused_probs_test['image_id'] = image_id_test_f
df_emotional_probs_train['image_id'] = image_id_train_e
df_emotional_probs_test['image_id'] = image_id_test_e

# 创建包含 image_id 和标签的数据框
df_label_train = pd.DataFrame({
    'image_id': image_id_train_f,  # 假设训练集中的 image_id 与 fused_train 的相同
    'label': y_fused_train
})

df_label_test = pd.DataFrame({
    'image_id': image_id_test_f,  # 假设测试集中的 image_id 与 fused_test 的相同
    'label': y_fused_test
})

# 在合并之前重置索引
df_fused_probs_train = df_fused_probs_train.reset_index(drop=True)
df_emotional_probs_train = df_emotional_probs_train.reset_index(drop=True)
df_fused_probs_test = df_fused_probs_test.reset_index(drop=True)
df_emotional_probs_test = df_emotional_probs_test.reset_index(drop=True)
df_label_train = df_label_train.reset_index(drop=True)
df_label_test = df_label_test.reset_index(drop=True)

# 通过 image_id 对齐训练集数据
meta_train_df = pd.merge(df_fused_probs_train, df_emotional_probs_train, on='image_id')
meta_train_df = pd.merge(meta_train_df, df_label_train, on='image_id')

# 通过 image_id 对齐测试集数据
meta_test_df = pd.merge(df_fused_probs_test, df_emotional_probs_test, on='image_id')
meta_test_df = pd.merge(meta_test_df, df_label_test, on='image_id')

# 检查合并后的数据框
print(meta_train_df.head())
print(meta_test_df.head())


   fused_prob_0  fused_prob_1                          image_id  \
0      0.139781  8.602186e-01  6693ce84gw1eyfsiy3rptj20c80960u6   
1      1.000000  3.757783e-07  652f5916jw1ezod9f7u5tj20c80963yl   
2      0.999997  3.427797e-06  75b52ed2jw1ewcdnpunn9j20r80kfadv   
3      0.999994  5.968023e-06  63207a53jw1eyffma5xquj20c807gglz   
4      0.000036  9.999643e-01  6bdb0c34jw1eqqhdw68loj20hs0vkgpy   

   emotional_prob_0  emotional_prob_1  label  
0          0.445590          0.554410      0  
1          0.592270          0.407730      0  
2          0.447096          0.552904      0  
3          0.491113          0.508887      0  
4          0.422799          0.577201      1  
   fused_prob_0  fused_prob_1                          image_id  \
0      0.999852      0.000148  620beb06gw1eymgpmehb8j20b308bweh   
1      0.589826      0.410174  6e05a6dagw1evwh8gzxnpj20hs0tx798   
2      0.999803      0.000197  0061d424jw1eqjwrzpv8dj30go0goq6j   
3      0.003976      0.996024  644471aegw1ez9os

## MLP

In [10]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import pandas as pd

# 假设 meta_train_df 和 meta_test_df 已经定义

# 使用 MLP 进行训练
mlp = MLPClassifier(hidden_layer_sizes=(50,), max_iter=500, random_state=42, solver='lbfgs')

# 训练 MLP 模型
X_train = meta_train_df[['fused_prob_1', 'emotional_prob_1']]
y_train = meta_train_df['label']
mlp.fit(X_train, y_train)

# 预测所有样本的标签
X_test = meta_test_df[['fused_prob_1', 'emotional_prob_1']]
y_test = meta_test_df['label']
mlp_predictions = mlp.predict(X_test)

# 检查预测结果是否包含 NaN
if any(pd.isna(mlp_predictions)):
    print("MLP 预测结果中包含 NaN 值")

# 添加 'mlp_pred' 列    
meta_test_df['mlp_pred'] = mlp_predictions

# 计算并输出评估指标
accuracy = accuracy_score(y_test, mlp_predictions)
precision = precision_score(y_test, mlp_predictions, average='weighted')
recall = recall_score(y_test, mlp_predictions, average='weighted')
f1 = f1_score(y_test, mlp_predictions, average='weighted')
classification_rep = classification_report(y_test, mlp_predictions)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print("Classification Report:")
print(classification_rep)

# 检查预测结果是否包含 NaN 值并打印前几行数据
print(meta_test_df['mlp_pred'].isnull().sum())
print(meta_test_df.head())


Accuracy: 0.61865189289012
Precision: 0.6189834518384246
Recall: 0.61865189289012
F1 Score: 0.6187749793108587
Classification Report:
              precision    recall  f1-score   support

           0       0.60      0.61      0.60       516
           1       0.64      0.63      0.63       567

    accuracy                           0.62      1083
   macro avg       0.62      0.62      0.62      1083
weighted avg       0.62      0.62      0.62      1083

0
   fused_prob_0  fused_prob_1                          image_id  \
0      0.999852      0.000148  620beb06gw1eymgpmehb8j20b308bweh   
1      0.589826      0.410174  6e05a6dagw1evwh8gzxnpj20hs0tx798   
2      0.999803      0.000197  0061d424jw1eqjwrzpv8dj30go0goq6j   
3      0.003976      0.996024  644471aegw1ez9osj60jxj20c80c840m   
4      0.998286      0.001714            6ceb7622jw1e1h3mo4pvvj   

   emotional_prob_0  emotional_prob_1  label  mlp_pred  
0          0.354421          0.645579      0         0  
1          0.535562 

# Logistic Regression

In [11]:
from sklearn.linear_model import LogisticRegression


# 使用逻辑回归进行训练
log_reg = LogisticRegression(max_iter=100, random_state=42)

# 训练逻辑回归模型
X_train = meta_train_df[['fused_prob_1', 'emotional_prob_1']]
y_train = meta_train_df['label']
log_reg.fit(X_train, y_train)

# 预测所有样本的标签
X_test = meta_test_df[['fused_prob_1', 'emotional_prob_1']]
y_test = meta_test_df['label']
log_reg_predictions = log_reg.predict(X_test)

# 检查预测结果是否包含 NaN
if any(pd.isna(log_reg_predictions)):
    print("逻辑回归预测结果中包含 NaN 值")

# 添加 'log_reg_pred' 列    
meta_test_df['log_reg_pred'] = log_reg_predictions

# 计算并输出评估指标
accuracy = accuracy_score(y_test, log_reg_predictions)
precision = precision_score(y_test, log_reg_predictions, average='weighted')
recall = recall_score(y_test, log_reg_predictions, average='weighted')
f1 = f1_score(y_test, log_reg_predictions, average='weighted')
classification_rep = classification_report(y_test, log_reg_predictions)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print("Classification Report:")
print(classification_rep)

# 检查预测结果是否包含 NaN 值并打印前几行数据
print(meta_test_df['log_reg_pred'].isnull().sum())
print(meta_test_df.head())


Accuracy: 0.615881809787627
Precision: 0.6160967442173018
Recall: 0.615881809787627
F1 Score: 0.6159703870360157
Classification Report:
              precision    recall  f1-score   support

           0       0.60      0.60      0.60       516
           1       0.63      0.63      0.63       567

    accuracy                           0.62      1083
   macro avg       0.62      0.62      0.62      1083
weighted avg       0.62      0.62      0.62      1083

0
   fused_prob_0  fused_prob_1                          image_id  \
0      0.999852      0.000148  620beb06gw1eymgpmehb8j20b308bweh   
1      0.589826      0.410174  6e05a6dagw1evwh8gzxnpj20hs0tx798   
2      0.999803      0.000197  0061d424jw1eqjwrzpv8dj30go0goq6j   
3      0.003976      0.996024  644471aegw1ez9osj60jxj20c80c840m   
4      0.998286      0.001714            6ceb7622jw1e1h3mo4pvvj   

   emotional_prob_0  emotional_prob_1  label  mlp_pred  log_reg_pred  
0          0.354421          0.645579      0         0       