In [15]:
import os
import numpy as np
import librosa
from hmmlearn import hmm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tqdm import tqdm
import joblib

# -----------------------------
# CONFIG
# -----------------------------
N_MFCC = 13          # 13 is classical for HMMs
MAX_PAD_LEN = 174    # keep same as your pipeline
N_STATES = 8         # hidden states per HMM (tuneable)
COV_TYPE = "diag"    # covariance type for GaussianHMM
N_ITER = 300         # EM iterations for each HMM
MODEL_PATH = "hmm_only_emotion_models.joblib"

# -----------------------------
# FEATURE EXTRACTION
# -----------------------------
def extract_features(file_path, max_pad_len=MAX_PAD_LEN, n_mfcc=N_MFCC):
    signal, sr = librosa.load(file_path, sr=22050)
    mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=n_mfcc)  # shape (n_mfcc, frames)
    # transpose to (frames, n_mfcc)
    mfcc = mfcc.T
    if mfcc.shape[0] < max_pad_len:
        pad_width = max_pad_len - mfcc.shape[0]
        mfcc = np.pad(mfcc, ((0, pad_width), (0, 0)), mode='constant')
    else:
        mfcc = mfcc[:max_pad_len, :]
    return mfcc  # shape (time, n_mfcc)

# -----------------------------
# DATASET PATHS (edit to your dataset)
# -----------------------------
data_dirs = [
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\voice_data\\train_data\\SER",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_01",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_02",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_03",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_04",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_05",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_06",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_07",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_08",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_09",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_10",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_11",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_12",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_13",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_14",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_15",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_16",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_17",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_18",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_19",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_20",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_21",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_22",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_23",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive\\audio_speech_actors_01-24\\Actor_24",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive2\\Angry",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive2\\Happy",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive2\\Natural",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive2\\Sad",
    "K:\\Code\\Project\\Research Paper\\Emotion Detection\\archive2\\Surprised"
]

# -----------------------------
# LOAD DATA (MFCC sequences) AND LABELS
# -----------------------------
X, y, emotions = [], [], []
for data_dir in data_dirs:
    if not os.path.exists(data_dir):
        print(f"‚ö†Ô∏è Warning: {data_dir} not found, skipping...")
        continue
    for fname in os.listdir(data_dir):
        if not fname.lower().endswith(".wav"):
            continue
        label = fname.split("_")[-1].replace(".wav", "")
        if label not in emotions:
            emotions.append(label)
        path = os.path.join(data_dir, fname)
        mfcc_seq = extract_features(path)         # shape (T, n_mfcc)
        X.append(mfcc_seq)
        y.append(emotions.index(label))

if len(X) == 0:
    raise RuntimeError("‚ùå No audio data found. Check your data_dirs paths.")

X = np.array(X, dtype=object)
y = np.array(y)

# -----------------------------
# TRAIN / TEST SPLIT
# -----------------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

# -----------------------------
# TRAIN HMMs (one per emotion)
# -----------------------------
choice = input("Do you want to load the saved model? (yes/no): ").strip().lower()

models = {}  # dict: emotion -> trained GaussianHMM

if choice == "yes" and os.path.exists(MODEL_PATH):
    print("üîÅ Loading saved HMM models...")
    saved = joblib.load(MODEL_PATH)
    models = saved["models"]
    emotions = saved["emotions"]
    print(f"Loaded {len(models)} HMMs: {list(models.keys())}")
else:
    print("üß† Training HMMs for each emotion...")
    for i, emotion in enumerate(emotions):
        # gather all sequences of this emotion
        seqs = [X_train[j] for j in range(len(X_train)) if y_train[j] == i]
        if len(seqs) == 0:
            print(f"‚ö†Ô∏è No training samples for emotion '{emotion}', skipping.")
            continue

        # Concatenate for hmmlearn: X_concat shape (N_total_frames, n_mfcc)
        lengths = [s.shape[0] for s in seqs]
        X_concat = np.vstack(seqs).astype(np.float64)

        model = hmm.GaussianHMM(n_components=N_STATES, covariance_type=COV_TYPE,
                                n_iter=N_ITER, verbose=True, random_state=42)

        # Fit
        print(f"Training HMM for emotion '{emotion}' on {len(seqs)} sequences ({X_concat.shape[0]} frames)...")
        model.fit(X_concat, lengths)
        models[emotion] = model
        print(f"‚úÖ Trained HMM for '{emotion}'")

    # Save trained models and emotions
    joblib.dump({"models": models, "emotions": emotions,
                 "n_mfcc": N_MFCC, "max_pad_len": MAX_PAD_LEN}, MODEL_PATH)
    print(f"üíæ Saved HMM models to {MODEL_PATH}")

# -----------------------------
# PREDICTION UTILITIES
# -----------------------------
def predict_emotion(file_path):
    """
    Given a wav file path, extract MFCC (T, n_mfcc), then compute the log-likelihood
    for each emotion HMM and return the emotion with highest score.
    """
    mfcc = extract_features(file_path)  # shape (T, n_mfcc)
    best_emotion = None
    best_score = -np.inf
    for emotion, model in models.items():
        try:
            score = model.score(mfcc)  # log-likelihood
        except Exception as e:
            score = -np.inf
        if score > best_score:
            best_score = score
            best_emotion = emotion
    return best_emotion

# -----------------------------
# EVALUATE ON TEST SET
# -----------------------------
print("üß™ Evaluating on test set...")
y_pred = []
for seq in tqdm(X_test):
    # each seq is (T, n_mfcc)
    best_emotion = None
    best_score = -np.inf
    for emotion, model in models.items():
        try:
            score = model.score(seq)
        except Exception:
            score = -np.inf
        if score > best_score:
            best_score = score
            best_emotion = emotion
    if best_emotion is None:
        y_pred.append(-1)
    else:
        y_pred.append(emotions.index(best_emotion))

valid_idx = [i for i, v in enumerate(y_pred) if v != -1]
if len(valid_idx) == 0:
    print("‚ö†Ô∏è No valid predictions were made on test set.")
else:
    acc = accuracy_score(y_test[valid_idx], np.array(y_pred)[valid_idx])
    print(f"‚úÖ HMM Test Accuracy: {acc*100:.2f}% on {len(valid_idx)} sequences")

# -----------------------------
# SINGLE FILE PREDICTION TEST
# -----------------------------
test_file = "K:\\Code\\Project\\Research Paper\\Emotion Detection\\voice_data\\test_data\\F_01_OISHI_S_2_SURPRISE_2.wav"
if os.path.exists(test_file):
    print("Predicted Emotion:", predict_emotion(test_file))
else:
    print("‚ö†Ô∏è Test file not found:", test_file)


üß† Training HMMs for each emotion...
Training HMM for emotion 'angry' on 685 sequences (119190 frames)...


         1 -6926125.06177819             +nan
         2 -5435567.87623626 +1490557.18554194
         3 -2055157.99378820 +3380409.88244806
         4 -706350.65489308 +1348807.33889511
         5 -628027.29357147  +78323.36132161
         6 -575854.75758605  +52172.53598542
         7 -549319.58463193  +26535.17295412
         8 -530209.80802395  +19109.77660798
         9 -509887.43501334  +20322.37301062
        10 -490653.24292061  +19234.19209273
        11 -465036.13745827  +25617.10546234
        12 -432743.17210645  +32292.96535182
        13 -391358.64968001  +41384.52242644
        14 -342188.09240812  +49170.55727189
        15 -285650.26730426  +56537.82510386
        16 -219455.61148135  +66194.65582291
        17 -126979.07346917  +92476.53801218
        18   -7417.54243128 +119561.53103789
        19   81970.26974299  +89387.81217427
        20   94440.41538637  +12470.14564338
        21   96025.36412334   +1584.94873697
        22   96977.35121581    +951.98709247
    

‚úÖ Trained HMM for 'angry'
Training HMM for emotion 'disgust' on 506 sequences (88044 frames)...


         1 -5093089.37862890             +nan
         2 -4607925.07548246 +485164.30314644
         3 -3878287.30886119 +729637.76662127
         4 -1598612.91346946 +2279674.39539173
         5 -1498645.13570926  +99967.77776020
         6 -1454397.40180638  +44247.73390289
         7 -1410487.76860667  +43909.63319970
         8 -1389091.32862364  +21396.43998303
         9 -1376053.56186201  +13037.76676163
        10 -1364435.28325367  +11618.27860834
        11 -1352606.84140337  +11828.44185030
        12 -1340213.39479654  +12393.44660683
        13 -1326997.14702179  +13216.24777475
        14 -1312924.14875487  +14072.99826692
        15 -1298574.33861677  +14349.81013810
        16 -1282692.25020140  +15882.08841537
        17 -1264705.86021672  +17986.38998468
        18 -1246263.02750844  +18442.83270828
        19 -1221620.33583176  +24642.69167668
        20 -1188906.46164324  +32713.87418852
        21 -1147889.22897207  +41017.23267117
        22 -1113496.01075831  +34

‚úÖ Trained HMM for 'disgust'
Training HMM for emotion 'fear' on 495 sequences (86130 frames)...


         1 -4806978.17600765             +nan
         2 -4051672.96676479 +755305.20924286
         3 -2015240.35125373 +2036432.61551105
         4  463105.37891206 +2478345.73016579
         5  489565.56337227  +26460.18446021
         6  503085.27755731  +13519.71418505
         7  516035.71104640  +12950.43348909
         8  527129.28684071  +11093.57579430
         9  535493.67708023   +8364.39023952
        10  541695.98243825   +6202.30535802
        11  546864.19422378   +5168.21178553
        12  551274.32356459   +4410.12934080
        13  555973.12785147   +4698.80428689
        14  561312.97699139   +5339.84913992
        15  566849.87787886   +5536.90088748
        16  571706.64351708   +4856.76563822
        17  577585.58350842   +5878.93999133
        18  585073.47127013   +7487.88776171
        19  596241.49209208  +11168.02082196
        20  613853.34150079  +17611.84940870
        21  633941.00490658  +20087.66340579
        22  656808.73414868  +22867.72924210
     

‚úÖ Trained HMM for 'fear'
Training HMM for emotion 'happy' on 676 sequences (117624 frames)...


         1 -6802641.33474645             +nan
         2 -5807363.71124877 +995277.62349768
         3 -1262510.59999128 +4544853.11125749
         4 -1117974.05086227 +144536.54912901
         5 -1060941.28436342  +57032.76649885
         6 -1013737.69510627  +47203.58925715
         7 -977196.15011860  +36541.54498767
         8 -941689.68079732  +35506.46932129
         9 -917485.78101840  +24203.89977891
        10 -900405.62125374  +17080.15976467
        11 -885913.62538279  +14491.99587095
        12 -868132.88781628  +17780.73756651
        13 -849492.51824060  +18640.36957569
        14 -831319.50335957  +18173.01488102
        15 -809907.24848594  +21412.25487363
        16 -786438.37283871  +23468.87564724
        17 -763658.68656419  +22779.68627452
        18 -743566.55028497  +20092.13627922
        19 -724683.85455570  +18882.69572926
        20 -705802.57596348  +18881.27859222
        21 -681787.20395718  +24015.37200630
        22 -646393.78541710  +35393.41854008
   

‚úÖ Trained HMM for 'happy'
Training HMM for emotion 'neutral' on 615 sequences (107010 frames)...


         1 -6213087.46413713             +nan
         2 -5458791.10787645 +754296.35626068
         3 -3331657.11348529 +2127133.99439116
         4 -2420568.35049461 +911088.76299067
         5 -1245965.67363602 +1174602.67685859
         6 -105834.64714710 +1140131.02648892
         7  -63258.05354078  +42576.59360632
         8  -58632.92502431   +4625.12851647
         9  -55985.62041516   +2647.30460915
        10  -54153.22914496   +1832.39127020
        11  -52690.18053434   +1463.04861062
        12  -51523.81116348   +1166.36937086
        13  -50660.16749473    +863.64366876
        14  -50014.20178988    +645.96570484
        15  -49550.03869911    +464.16309078
        16  -49173.84534682    +376.19335228
        17  -48840.62806564    +333.21728118
        18  -48536.09173780    +304.53632784
        19  -48236.07740227    +300.01433553
        20  -47882.31184499    +353.76555728
        21  -47301.29098640    +581.02085859
        22  -46045.10141475   +1256.18957165
  

‚úÖ Trained HMM for 'neutral'
Training HMM for emotion 'ps' on 685 sequences (119190 frames)...


         1 -6914911.38652126             +nan
         2 -6256878.55385639 +658032.83266488
         3 -4332401.77747473 +1924476.77638166
         4 -2718490.24363441 +1613911.53384032
         5 -1530104.85352871 +1188385.39010570
         6 -958160.21227154 +571944.64125717
         7 -936063.43754842  +22096.77472312
         8 -926310.87989822   +9752.55765020
         9 -915701.36003228  +10609.51986594
        10 -908992.08131888   +6709.27871339
        11 -904680.78839145   +4311.29292743
        12 -902161.30458348   +2519.48380798
        13 -900417.79101984   +1743.51356364
        14 -898982.73593481   +1435.05508503
        15 -897039.77353873   +1942.96239608
        16 -893119.82622652   +3919.94731222
        17 -884128.29009984   +8991.53612668
        18 -867692.03322857  +16436.25687127
        19 -859541.03375998   +8150.99946859
        20 -856259.20660459   +3281.82715539
        21 -853916.11938413   +2343.08722046
        22 -852639.44492841   +1276.67445572
  

‚úÖ Trained HMM for 'ps'
Training HMM for emotion 'sad' on 702 sequences (122148 frames)...


         1 -7136860.92801597             +nan
         2 -6178531.97309290 +958328.95492307
         3 -2589606.08335382 +3588925.88973908
         4 -2294998.67132850 +294607.41202532
         5 -2256179.99882718  +38818.67250132
         6 -2194396.54704265  +61783.45178453
         7 -2154970.24760784  +39426.29943480
         8 -2126543.57180278  +28426.67580506
         9 -2103198.43419718  +23345.13760560
        10 -2080295.39104229  +22903.04315489
        11 -2059664.95145182  +20630.43959046
        12 -2040401.41740547  +19263.53404636
        13 -2024162.57826619  +16238.83913927
        14 -2008622.94529900  +15539.63296719
        15 -1987173.93980951  +21449.00548950
        16 -1955049.78348494  +32124.15632456
        17 -1903623.52401761  +51426.25946733
        18 -1828469.73654693  +75153.78747068
        19 -1753547.82052219  +74921.91602474
        20 -1662005.21636847  +91542.60415372
        21 -1574628.96405507  +87376.25231340
        22 -1560311.98990082  +14

‚úÖ Trained HMM for 'sad'
Training HMM for emotion 'calm' on 158 sequences (27492 frames)...


         2 -1422173.73860085 +129452.28128406
         3 -1212904.09768432 +209269.64091653
         4 -1203492.12290739   +9411.97477693
         5 -1199203.25982797   +4288.86307942
         6 -1195560.64237921   +3642.61744876
         7 -1190018.99144307   +5541.65093614
         8 -1179196.50826801  +10822.48317507
         9 -1157593.35009990  +21603.15816811
        10 -1140812.06503167  +16781.28506823
        11 -1134888.38232994   +5923.68270173
        12 -1130534.41647666   +4353.96585328
        13 -1125536.18446879   +4998.23200787
        14 -1119285.61485452   +6250.56961427
        15 -1112533.82383442   +6751.79102010
        16 -1103905.79194916   +8628.03188526
        17 -1091820.84020724  +12084.95174193
        18 -1072254.90014421  +19565.94006303
        19 -1043838.84217380  +28416.05797041
        20 -1016903.00414791  +26935.83802589
        21 -985414.14863127  +31488.85551664
        22 -952848.28071459  +32565.86791668
        23 -928070.07197489  +24778.

‚úÖ Trained HMM for 'calm'
üíæ Saved HMM models to hmm_only_emotion_models.joblib
üß™ Evaluating on test set...


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 798/798 [00:01<00:00, 559.26it/s]

‚úÖ HMM Test Accuracy: 38.97% on 798 sequences
Predicted Emotion: angry



