# Embedding

In [2]:
%pip install protobuf

Collecting protobuf
  Obtaining dependency information for protobuf from https://files.pythonhosted.org/packages/8c/f3/6f58f841f6ebafe076cebeae33fc336e900619d34b1c93e4b5c97a81fdfa/protobuf-6.32.1-cp310-abi3-win_amd64.whl.metadata
  Downloading protobuf-6.32.1-cp310-abi3-win_amd64.whl.metadata (593 bytes)
Downloading protobuf-6.32.1-cp310-abi3-win_amd64.whl (435 kB)
   ---------------------------------------- 0.0/435.7 kB ? eta -:--:--
   ---------------------------------------- 0.0/435.7 kB ? eta -:--:--
    --------------------------------------- 10.2/435.7 kB ? eta -:--:--
    --------------------------------------- 10.2/435.7 kB ? eta -:--:--
    --------------------------------------- 10.2/435.7 kB ? eta -:--:--
    --------------------------------------- 10.2/435.7 kB ? eta -:--:--
    --------------------------------------- 10.2/435.7 kB ? eta -:--:--
   -- ------------------------------------- 30.7/435.7 kB 87.5 kB/s eta 0:00:05
   --- ------------------------------------ 41.0/4


[notice] A new release of pip is available: 23.2.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import os
import pandas as pd
import torch
import torchaudio
from transformers import AutoFeatureExtractor, AutoModel
import warnings
warnings.filterwarnings("ignore")

# Load labels
data = pd.read_csv("datatrain.csv")

# Load pretrained
extractor = AutoFeatureExtractor.from_pretrained("superb/wav2vec2-base-superb-er")
model = AutoModel.from_pretrained("superb/wav2vec2-base-superb-er")
model.eval()

# Load audio dan ekstrak embedding
def extract_embedding(file_path):
    waveform, sr = torchaudio.load(file_path)
    if sr != 16000:
        waveform = torchaudio.functional.resample(waveform, sr, 16000)
    
    inputs = extractor(
        waveform.squeeze().numpy(),
        sampling_rate=16000,
        return_tensors="pt",
        padding=True
    )
    with torch.no_grad():
        outputs = model(**inputs)
        
        # Rata-rata pooling untuk mendapatkan embedding tetap
        embedding = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
    return embedding

embeddings = []
labels = []
ids = []

audio_folder = "audios/train"

for _, row in data.iterrows():
    audio_id = row["id"]
    label = row["emotion"]
    file_path = os.path.join(audio_folder, f"{audio_id}_{label}.mp3")
    if os.path.exists(file_path):
        emb = extract_embedding(file_path)
        embeddings.append(emb)
        labels.append(label)
        ids.append(audio_id)

features_df = pd.DataFrame(embeddings)
features_df["id"] = ids
features_df["emotion"] = labels

print(features_df.head())

          0         1         2         3         4         5         6  \
0  0.237063 -0.070565  0.009176  0.183270 -0.040680 -0.197609  0.232199   
1  0.145789 -0.069121  0.183287  0.131640  0.043517 -0.267986  0.249648   
2 -0.053000  0.000488 -0.003376  0.194948  0.174169 -0.170566  0.207516   
3  0.128055 -0.099878  0.080856  0.132698 -0.040625 -0.131573  0.237325   
4  0.062890 -0.043324  0.102488  0.291972  0.000740 -0.080042  0.171125   

          7         8         9  ...       760       761       762       763  \
0 -0.168209 -0.072978 -0.008000  ...  0.121860 -0.043238 -0.409986 -0.006978   
1 -0.167450 -0.158413 -0.011003  ...  0.051657 -0.144586 -0.323405 -0.018629   
2 -0.152446 -0.123187 -0.016356  ... -0.061031  0.008110 -0.526775 -0.075371   
3 -0.139872 -0.013025 -0.013025  ...  0.046357  0.058788 -0.384343  0.009585   
4 -0.180252  0.212147 -0.034245  ... -0.078140  0.160784 -0.455938  0.058371   

        764       765       766       767  id   emotion  
0 -0.14401

In [7]:
features_df.to_pickle("train_embeddings_audio_2.pkl")

# PEMODELAN

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [9]:
data = pd.read_pickle("train_embeddings_audio_2.pkl")
print(data.head())

          0         1         2         3         4         5         6  \
0  0.237063 -0.070565  0.009176  0.183270 -0.040680 -0.197609  0.232199   
1  0.145789 -0.069121  0.183287  0.131640  0.043517 -0.267986  0.249648   
2 -0.053000  0.000488 -0.003376  0.194948  0.174169 -0.170566  0.207516   
3  0.128055 -0.099878  0.080856  0.132698 -0.040625 -0.131573  0.237325   
4  0.062890 -0.043324  0.102488  0.291972  0.000740 -0.080042  0.171125   

          7         8         9  ...       760       761       762       763  \
0 -0.168209 -0.072978 -0.008000  ...  0.121860 -0.043238 -0.409986 -0.006978   
1 -0.167450 -0.158413 -0.011003  ...  0.051657 -0.144586 -0.323405 -0.018629   
2 -0.152446 -0.123187 -0.016356  ... -0.061031  0.008110 -0.526775 -0.075371   
3 -0.139872 -0.013025 -0.013025  ...  0.046357  0.058788 -0.384343  0.009585   
4 -0.180252  0.212147 -0.034245  ... -0.078140  0.160784 -0.455938  0.058371   

        764       765       766       767  id   emotion  
0 -0.14401

In [10]:
X = data.drop(columns=["id", "emotion"])
y = data["emotion"]

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
# Encode
custom_labels = {
    "Proud": 0,
    "Trust": 1,
    "Joy": 2,
    "Surprise": 3,
    "Neutral": 4,
    "Sadness": 5,
    "Fear": 6,
    "Anger": 7
}

y_train_encoded = y_train.map(custom_labels)
y_val_encoded = y_val.map(custom_labels)

## RANDOM FOREST

In [11]:
import warnings
warnings.filterwarnings("ignore")

In [46]:
from sklearn.ensemble import RandomForestClassifier

model_rf = RandomForestClassifier(n_estimators=100, random_state=42)
model_rf.fit(X_train, y_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [47]:
from sklearn.metrics import classification_report, f1_score

y_pred = model_rf.predict(X_val)
print(classification_report(y_val, y_pred))
print('Macro F1 Score:', f1_score(y_val, y_pred, average='macro'))

              precision    recall  f1-score   support

       Anger       0.00      0.00      0.00         7
        Fear       0.00      0.00      0.00         3
         Joy       0.00      0.00      0.00        10
     Neutral       0.00      0.00      0.00         1
       Proud       0.33      0.07      0.11        30
     Sadness       0.00      0.00      0.00         3
    Surprise       0.42      0.85      0.56        62
       Trust       0.28      0.14      0.19        35

    accuracy                           0.40       151
   macro avg       0.13      0.13      0.11       151
weighted avg       0.30      0.40      0.30       151

Macro F1 Score: 0.10795251795352157


## NAIVE BAYES

In [14]:
from sklearn.naive_bayes import GaussianNB

model_gnb = GaussianNB()
model_gnb.fit(X_train, y_train)

0,1,2
,priors,
,var_smoothing,1e-09


In [15]:
y_pred = model_gnb.predict(X_val)
print(classification_report(y_val, y_pred))
print('Macro F1 Score:', f1_score(y_val, y_pred, average='macro'))

              precision    recall  f1-score   support

       Anger       0.09      0.57      0.15         7
        Fear       0.12      0.67      0.20         3
         Joy       0.11      0.40      0.17        10
     Neutral       0.10      1.00      0.18         1
       Proud       0.29      0.07      0.11        30
     Sadness       0.00      0.00      0.00         3
    Surprise       0.45      0.08      0.14        62
       Trust       0.29      0.14      0.19        35

    accuracy                           0.15       151
   macro avg       0.18      0.37      0.14       151
weighted avg       0.33      0.15      0.15       151

Macro F1 Score: 0.14266018440378178


In [16]:
from sklearn.naive_bayes import BernoulliNB

model_bnb = BernoulliNB()
model_bnb.fit(X_train, y_train)

0,1,2
,alpha,1.0
,force_alpha,True
,binarize,0.0
,fit_prior,True
,class_prior,


In [17]:
y_pred = model_bnb.predict(X_val)
print(classification_report(y_val, y_pred))
print('Macro F1 Score:', f1_score(y_val, y_pred, average='macro'))

              precision    recall  f1-score   support

       Anger       0.07      0.43      0.12         7
        Fear       0.20      0.67      0.31         3
         Joy       0.11      0.40      0.17        10
     Neutral       0.00      0.00      0.00         1
       Proud       0.29      0.07      0.11        30
     Sadness       0.00      0.00      0.00         3
    Surprise       0.42      0.16      0.23        62
       Trust       0.17      0.14      0.16        35

    accuracy                           0.17       151
   macro avg       0.16      0.23      0.14       151
weighted avg       0.28      0.17      0.18       151

Macro F1 Score: 0.13703440275024575


## SVM

In [18]:
from sklearn.svm import SVC

model_svc = SVC(kernel='linear', random_state=42)
model_svc.fit(X_train, y_train)

0,1,2
,C,1.0
,kernel,'linear'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


In [19]:
y_pred = model_svc.predict(X_val)
print(classification_report(y_val, y_pred))
print('Macro F1 Score:', f1_score(y_val, y_pred, average='macro'))

              precision    recall  f1-score   support

       Anger       0.00      0.00      0.00         7
        Fear       1.00      0.67      0.80         3
         Joy       0.00      0.00      0.00        10
     Neutral       0.00      0.00      0.00         1
       Proud       0.29      0.17      0.21        30
     Sadness       0.00      0.00      0.00         3
    Surprise       0.44      0.74      0.55        62
       Trust       0.40      0.29      0.33        35

    accuracy                           0.42       151
   macro avg       0.27      0.23      0.24       151
weighted avg       0.35      0.42      0.36       151

Macro F1 Score: 0.23712468679661955


## KNN

In [34]:
from sklearn.neighbors import KNeighborsClassifier

model_knn = KNeighborsClassifier(n_neighbors=5)
model_knn.fit(X_train, y_train)

0,1,2
,n_neighbors,5
,weights,'uniform'
,algorithm,'auto'
,leaf_size,30
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,


In [35]:
y_pred = model_knn.predict(X_val)
print(classification_report(y_val, y_pred))
print('Macro F1 Score:', f1_score(y_val, y_pred, average='macro'))

              precision    recall  f1-score   support

       Anger       0.00      0.00      0.00         7
        Fear       0.00      0.00      0.00         3
         Joy       0.40      0.20      0.27        10
     Neutral       0.00      0.00      0.00         1
       Proud       0.26      0.33      0.29        30
     Sadness       0.00      0.00      0.00         3
    Surprise       0.47      0.53      0.50        62
       Trust       0.19      0.14      0.16        35

    accuracy                           0.33       151
   macro avg       0.16      0.15      0.15       151
weighted avg       0.32      0.33      0.32       151

Macro F1 Score: 0.1527593295382669


## GPC

In [36]:
from sklearn.gaussian_process import GaussianProcessClassifier

model_gpc = GaussianProcessClassifier(random_state=42)
model_gpc.fit(X_train, y_train)

0,1,2
,kernel,
,optimizer,'fmin_l_bfgs_b'
,n_restarts_optimizer,0
,max_iter_predict,100
,warm_start,False
,copy_X_train,True
,random_state,42
,multi_class,'one_vs_rest'
,n_jobs,


In [37]:
y_pred = model_gpc.predict(X_val)
print(classification_report(y_val, y_pred))
print('Macro F1 Score:', f1_score(y_val, y_pred, average='macro'))

              precision    recall  f1-score   support

       Anger       0.00      0.00      0.00         7
        Fear       0.00      0.00      0.00         3
         Joy       0.00      0.00      0.00        10
     Neutral       0.00      0.00      0.00         1
       Proud       0.45      0.17      0.24        30
     Sadness       0.00      0.00      0.00         3
    Surprise       0.43      0.84      0.57        62
       Trust       0.26      0.14      0.19        35

    accuracy                           0.41       151
   macro avg       0.14      0.14      0.13       151
weighted avg       0.33      0.41      0.33       151

Macro F1 Score: 0.12506452445476834


## SGD

In [68]:
from sklearn.linear_model import SGDClassifier

model_sgd = SGDClassifier(random_state=42)
model_sgd.fit(X_train, y_train)

0,1,2
,loss,'hinge'
,penalty,'l2'
,alpha,0.0001
,l1_ratio,0.15
,fit_intercept,True
,max_iter,1000
,tol,0.001
,shuffle,True
,verbose,0
,epsilon,0.1


In [70]:
import numpy as np

y_pred = model_sgd.predict(X_val)
print(classification_report(y_val, y_pred))
print('Macro F1 Score:', f1_score(y_val, y_pred, average='macro'))

              precision    recall  f1-score   support

       Anger       0.00      0.00      0.00         7
        Fear       1.00      0.33      0.50         3
         Joy       0.33      0.30      0.32        10
     Neutral       0.00      0.00      0.00         1
       Proud       0.23      0.70      0.35        30
     Sadness       0.09      0.33      0.14         3
    Surprise       0.54      0.21      0.30        62
       Trust       0.33      0.14      0.20        35

    accuracy                           0.29       151
   macro avg       0.32      0.25      0.23       151
weighted avg       0.39      0.29      0.27       151

Macro F1 Score: 0.2260099544941539


## LOGREG

In [40]:
from sklearn.linear_model import LogisticRegression

model_logreg = LogisticRegression(max_iter=1000, random_state=42)
model_logreg.fit(X_train, y_train)

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,42
,solver,'lbfgs'
,max_iter,1000


In [41]:
y_pred = model_logreg.predict(X_val)
print(classification_report(y_val, y_pred))
print('Macro F1 Score:', f1_score(y_val, y_pred, average='macro'))

              precision    recall  f1-score   support

       Anger       0.00      0.00      0.00         7
        Fear       0.50      0.33      0.40         3
         Joy       0.00      0.00      0.00        10
     Neutral       0.00      0.00      0.00         1
       Proud       0.27      0.20      0.23        30
     Sadness       0.00      0.00      0.00         3
    Surprise       0.42      0.68      0.52        62
       Trust       0.32      0.23      0.27        35

    accuracy                           0.38       151
   macro avg       0.19      0.18      0.18       151
weighted avg       0.31      0.38      0.33       151

Macro F1 Score: 0.177396878483835


## DCT

In [42]:
from sklearn.tree import DecisionTreeClassifier

model_dct = DecisionTreeClassifier(random_state=42)
model_dct.fit(X_train, y_train)

0,1,2
,criterion,'gini'
,splitter,'best'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,42
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [None]:
import numpy as np

# Get probability scores
proba_sgd = model_sgd.predict_proba(X_val)
# Convert to class predictions by taking argmax
y_pred = np.argmax(proba_sgd, axis=1)

# Convert numeric predictions back to labels using reverse_labels
y_pred_labels = [reverse_labels[pred] for pred in y_pred]

print(classification_report(y_val, y_pred_labels))
print('Macro F1 Score:', f1_score(y_val, y_pred_labels, average='macro'))

In [43]:
y_pred = model_dct.predict(X_val)
print(classification_report(y_val, y_pred))
print('Macro F1 Score:', f1_score(y_val, y_pred, average='macro'))

              precision    recall  f1-score   support

       Anger       0.00      0.00      0.00         7
        Fear       0.20      0.33      0.25         3
         Joy       0.14      0.10      0.12        10
     Neutral       0.00      0.00      0.00         1
       Proud       0.23      0.23      0.23        30
     Sadness       0.00      0.00      0.00         3
    Surprise       0.31      0.34      0.32        62
       Trust       0.14      0.11      0.13        35

    accuracy                           0.23       151
   macro avg       0.13      0.14      0.13       151
weighted avg       0.22      0.23      0.22       151

Macro F1 Score: 0.1313801802772391


## ADABOOST

In [44]:
from sklearn.ensemble import AdaBoostClassifier

model_ada = AdaBoostClassifier(n_estimators=100, random_state=42)
model_ada.fit(X_train, y_train)

0,1,2
,estimator,
,n_estimators,100
,learning_rate,1.0
,algorithm,'deprecated'
,random_state,42


In [45]:
y_pred = model_ada.predict(X_val)
print(classification_report(y_val, y_pred))
print('Macro F1 Score:', f1_score(y_val, y_pred, average='macro'))

              precision    recall  f1-score   support

       Anger       0.00      0.00      0.00         7
        Fear       0.50      0.33      0.40         3
         Joy       0.00      0.00      0.00        10
     Neutral       0.00      0.00      0.00         1
       Proud       0.33      0.43      0.38        30
     Sadness       0.00      0.00      0.00         3
    Surprise       0.39      0.65      0.49        62
       Trust       0.43      0.09      0.14        35

    accuracy                           0.38       151
   macro avg       0.21      0.19      0.18       151
weighted avg       0.34      0.38      0.32       151

Macro F1 Score: 0.1759342018886027


## XGB

In [54]:
%pip install xgboost

Collecting xgboost
  Obtaining dependency information for xgboost from https://files.pythonhosted.org/packages/00/5a/f43bad68b31269a72bdd66102732ea4473e98f421ee9f71379e35dcb56f5/xgboost-3.0.5-py3-none-win_amd64.whl.metadata
  Downloading xgboost-3.0.5-py3-none-win_amd64.whl.metadata (2.1 kB)
Downloading xgboost-3.0.5-py3-none-win_amd64.whl (56.8 MB)
   ---------------------------------------- 0.0/56.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/56.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/56.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/56.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/56.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/56.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/56.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/56.8 MB 36.4 kB/s eta 0:25:59
   ---------------------------------------- 0.0/56.8 MB 36.4 kB/s eta 0:25:59
  


[notice] A new release of pip is available: 23.2.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
from xgboost import XGBClassifier

# Train XGBoost with encoded labels
model_xgb = XGBClassifier(eval_metric='mlogloss', random_state=42)
model_xgb.fit(X_train, y_train_encoded)

0,1,2
,objective,'multi:softprob'
,base_score,
,booster,
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


In [49]:
y_pred = model_xgb.predict(X_val)
print(classification_report(y_val_encoded, y_pred))
print('Macro F1 Score:', f1_score(y_val_encoded, y_pred, average='macro'))

              precision    recall  f1-score   support

           0       0.16      0.10      0.12        30
           1       0.34      0.29      0.31        35
           2       0.50      0.10      0.17        10
           3       0.45      0.71      0.55        62
           4       0.00      0.00      0.00         1
           5       0.00      0.00      0.00         3
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         7

    accuracy                           0.38       151
   macro avg       0.18      0.15      0.14       151
weighted avg       0.33      0.38      0.33       151

Macro F1 Score: 0.14395195578231293


## MLP

In [63]:
%pip install tensorflow

Collecting tensorflow
  Obtaining dependency information for tensorflow from https://files.pythonhosted.org/packages/f9/37/b97abb360b551fbf5870a0ee07e39ff9c655e6e3e2f839bc88be81361842/tensorflow-2.20.0-cp312-cp312-win_amd64.whl.metadata
  Downloading tensorflow-2.20.0-cp312-cp312-win_amd64.whl.metadata (4.6 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Obtaining dependency information for absl-py>=1.0.0 from https://files.pythonhosted.org/packages/8f/aa/ba0014cc4659328dc818a28827be78e6d97312ab0cb98105a770924dc11e/absl_py-2.3.1-py3-none-any.whl.metadata
  Downloading absl_py-2.3.1-py3-none-any.whl.metadata (3.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Obtaining dependency information for astunparse>=1.6.0 from https://files.pythonhosted.org/packages/2b/03/13dde6512ad7b4557eb792fbcf0c653af6076b81e5941d36ec61f7ce6028/astunparse-1.6.3-py2.py3-none-any.whl.metadata
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from ten


[notice] A new release of pip is available: 23.2.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [50]:
import tensorflow as tf
import numpy as np

class F1MacroCallback(tf.keras.callbacks.Callback):
    def __init__(self, validation_data):
        super().__init__()
        self.validation_data = validation_data

    def on_epoch_end(self, epoch, logs=None):
        X_val, y_val = self.validation_data
        y_pred = self.model.predict(X_val, verbose=0)
        y_pred_classes = np.argmax(y_pred, axis=1)

        y_true_classes = y_val.squeeze()

        f1 = f1_score(y_true_classes, y_pred_classes, average="macro")
        logs["val_f1_macro"] = f1
        print(f" — val_f1_macro: {f1:.4f}")

model_mlp = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(768,)),   # match embedding size
    tf.keras.layers.Dense(256, activation="relu"),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(128, activation="relu"),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(len(custom_labels), activation="softmax")
])

model_mlp.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

f1_callback = F1MacroCallback(validation_data=(X_val, y_val_encoded))

model_mlp.fit(
    X_train, y_train_encoded,
    validation_data=(X_val, y_val_encoded),
    epochs=20,
    callbacks=[f1_callback]
)

Epoch 1/20
[1m11/19[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m0s[0m 5ms/step - accuracy: 0.2674 - loss: 1.8518 — val_f1_macro: 0.0728
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 125ms/step - accuracy: 0.3278 - loss: 1.6997 - val_accuracy: 0.4106 - val_loss: 1.5420 - val_f1_macro: 0.0728
Epoch 2/20
[1m14/19[0m [32m━━━━━━━━━━━━━━[0m[37m━━━━━━[0m [1m0s[0m 4ms/step - accuracy: 0.3784 - loss: 1.5376  — val_f1_macro: 0.0728
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.3924 - loss: 1.5940 - val_accuracy: 0.4106 - val_loss: 1.5609 - val_f1_macro: 0.0728
Epoch 3/20
[1m13/19[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m0s[0m 4ms/step - accuracy: 0.4229 - loss: 1.5065  — val_f1_macro: 0.0728
[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.4040 - loss: 1.5904 - val_accuracy: 0.4106 - val_loss: 1.5399 - val_f1_macro: 0.0728
Epoch 4/20
[1m14/19[0m [32m━━━━━━━━━━━━━━[0m[37m━━

<keras.src.callbacks.history.History at 0x1eb45d4d4c0>

In [53]:
# Get probability scores
y_pred_proba = model_mlp.predict(X_val)
# Convert to class predictions by taking argmax
y_pred = np.argmax(y_pred_proba, axis=1)

# Create reverse mapping to convert numeric predictions back to labels
reverse_labels = {v: k for k, v in custom_labels.items()}
y_pred_labels = [reverse_labels[pred] for pred in y_pred]

print(classification_report(y_val, y_pred_labels))
print('Macro F1 Score:', f1_score(y_val, y_pred_labels, average='macro'))

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 
              precision    recall  f1-score   support

       Anger       0.00      0.00      0.00         7
        Fear       0.00      0.00      0.00         3
         Joy       1.00      0.10      0.18        10
     Neutral       0.00      0.00      0.00         1
       Proud       0.26      0.17      0.20        30
     Sadness       0.00      0.00      0.00         3
    Surprise       0.44      0.84      0.57        62
       Trust       0.42      0.14      0.21        35

    accuracy                           0.42       151
   macro avg       0.26      0.16      0.15       151
weighted avg       0.39      0.42      0.34       151

Macro F1 Score: 0.14665642590964595


# ENSEMBLE

In [54]:
# Ambil model dengan f1 score lebih dari 20%

base_models = {
    "SGD": model_sgd,
    "SVM": model_svc
}

## STACKING

In [55]:
from sklearn.ensemble import StackingClassifier

stacking_model = StackingClassifier(
    estimators=[(name, model) for name, model in base_models.items()],
    final_estimator=RandomForestClassifier(),
    passthrough=True
)
stacking_model.fit(X_train, y_train)

0,1,2
,estimators,"[('SGD', ...), ('SVM', ...)]"
,final_estimator,RandomForestClassifier()
,cv,
,stack_method,'auto'
,n_jobs,
,passthrough,True
,verbose,0

0,1,2
,loss,'hinge'
,penalty,'l2'
,alpha,0.0001
,l1_ratio,0.15
,fit_intercept,True
,max_iter,1000
,tol,0.001
,shuffle,True
,verbose,0
,epsilon,0.1

0,1,2
,C,1.0
,kernel,'linear'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [56]:
y_pred = stacking_model.predict(X_val)
print(classification_report(y_val, y_pred))
print('Macro F1 Score:', f1_score(y_val, y_pred, average='macro'))

              precision    recall  f1-score   support

       Anger       0.00      0.00      0.00         7
        Fear       0.00      0.00      0.00         3
         Joy       0.00      0.00      0.00        10
     Neutral       0.00      0.00      0.00         1
       Proud       0.10      0.03      0.05        30
     Sadness       0.00      0.00      0.00         3
    Surprise       0.42      0.84      0.56        62
       Trust       0.33      0.14      0.20        35

    accuracy                           0.38       151
   macro avg       0.11      0.13      0.10       151
weighted avg       0.27      0.38      0.29       151

Macro F1 Score: 0.10114247311827956


## HARD VOTING

In [57]:
from sklearn.ensemble import VotingClassifier

model_voting = VotingClassifier(
    estimators=[(name, model) for name, model in base_models.items()],
    voting='hard'
)
model_voting.fit(X_train, y_train)

0,1,2
,estimators,"[('SGD', ...), ('SVM', ...)]"
,voting,'hard'
,weights,
,n_jobs,
,flatten_transform,True
,verbose,False

0,1,2
,loss,'hinge'
,penalty,'l2'
,alpha,0.0001
,l1_ratio,0.15
,fit_intercept,True
,max_iter,1000
,tol,0.001
,shuffle,True
,verbose,0
,epsilon,0.1

0,1,2
,C,1.0
,kernel,'linear'
,degree,3
,gamma,'scale'
,coef0,0.0
,shrinking,True
,probability,False
,tol,0.001
,cache_size,200
,class_weight,


In [78]:
y_pred = model_voting.predict(X_val)
print(classification_report(y_val, y_pred))
print('Macro F1 Score:', f1_score(y_val, y_pred, average='macro'))

              precision    recall  f1-score   support

       Anger       0.00      0.00      0.00         7
        Fear       1.00      0.67      0.80         3
         Joy       0.33      0.30      0.32        10
     Neutral       0.00      0.00      0.00         1
       Proud       0.23      0.70      0.35        30
     Sadness       0.09      0.33      0.14         3
    Surprise       0.53      0.26      0.35        62
       Trust       0.57      0.11      0.19        35

    accuracy                           0.31       151
   macro avg       0.34      0.30      0.27       151
weighted avg       0.44      0.31      0.30       151

Macro F1 Score: 0.2680070414988243


In [79]:
# Save model
import joblib
joblib.dump(model_voting, "models/model_voting.pkl")

['models/model_voting.pkl']