In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install librosa scikit-learn joblib



In [None]:
import os
import numpy as np
import librosa
import joblib

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [None]:
BASE_DIR = "/content/drive/MyDrive/LID_Project/data"
DEMO_DIR = "/content/drive/MyDrive/LID_Project/demo_audio"

TRAIN_DIR = os.path.join(BASE_DIR, "final_train")
TEST_DIR  = os.path.join(BASE_DIR, "final_test")

MODEL_DIR = os.path.join(BASE_DIR, "mfcc_model")
os.makedirs(MODEL_DIR, exist_ok=True)

LANGUAGES = ['en', 'es', 'ja', 'de', 'tr']

SAMPLE_RATE = 22050
DURATION = 3.0
N_MFCC = 40
MAX_FILES_PER_LANG = 2000


In [None]:
def extract_mfcc(path):
    y, _ = librosa.load(path, sr=SAMPLE_RATE, duration=DURATION)

    mfcc = librosa.feature.mfcc(y=y, sr=SAMPLE_RATE, n_mfcc=N_MFCC)
    delta = librosa.feature.delta(mfcc)
    delta2 = librosa.feature.delta(mfcc, order=2)

    features = []
    for f in [mfcc, delta, delta2]:
        features.append(np.mean(f, axis=1))
        features.append(np.std(f, axis=1))

    return np.hstack(features)  # 240-D feature vector


In [None]:
def build_dataset(split_dir):
    X, y = [], []

    for label, lang in enumerate(LANGUAGES):
        lang_dir = os.path.join(split_dir, lang)
        files = [f for f in os.listdir(lang_dir) if f.endswith(".wav")]
        files = files[:MAX_FILES_PER_LANG]

        print(f"{lang}: {len(files)} files")

        for f in files:
            path = os.path.join(lang_dir, f)
            try:
                X.append(extract_mfcc(path))
                y.append(label)
            except:
                continue

    return np.array(X), np.array(y)


In [None]:
X_train, y_train = build_dataset(TRAIN_DIR)
X_test, y_test = build_dataset(TEST_DIR)

print("Train shape:", X_train.shape)
print("Test shape:", X_test.shape)


en: 2000 files
es: 2000 files
ja: 2000 files
de: 2000 files
tr: 2000 files
en: 2000 files
es: 2000 files
ja: 2000 files
de: 2000 files
tr: 2000 files
Train shape: (10000, 240)
Test shape: (10000, 240)


In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
model = LogisticRegression(
    max_iter=1500,
    n_jobs=-1
)

model.fit(X_train, y_train)


In [None]:
y_pred = model.predict(X_test)

print(f"\nAccuracy: {accuracy_score(y_test, y_pred)*100:.2f}%\n")
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=LANGUAGES))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))



Accuracy: 49.47%

Classification Report:
              precision    recall  f1-score   support

          en       0.49      0.47      0.48      2000
          es       0.48      0.48      0.48      2000
          ja       0.53      0.53      0.53      2000
          de       0.50      0.54      0.52      2000
          tr       0.46      0.45      0.46      2000

    accuracy                           0.49     10000
   macro avg       0.49      0.49      0.49     10000
weighted avg       0.49      0.49      0.49     10000

Confusion Matrix:
[[ 943  264  222  330  241]
 [ 236  961  239  261  303]
 [ 197  221 1059  219  304]
 [ 288  232  195 1080  205]
 [ 246  307  290  253  904]]


In [None]:
model = joblib.dump(model, os.path.join(MODEL_DIR, "mfcc_model.pkl"))
scaler = joblib.dump(scaler, os.path.join(MODEL_DIR, "scaler.pkl"))

print("âœ… MFCC model and scaler saved")


âœ… MFCC model and scaler saved


In [None]:
def predict_language(wav_path):
    feats = extract_mfcc(wav_path).reshape(1, -1)
    feats = scaler.transform(feats)

    pred = model.predict(feats)[0]
    return LANGUAGES[pred]


In [None]:
import joblib

scaler = joblib.load(os.path.join(MODEL_DIR, "scaler.pkl"))
model  = joblib.load(os.path.join(MODEL_DIR, "mfcc_model.pkl"))

print("âœ… MFCC model and scaler loaded correctly")


âœ… MFCC model and scaler loaded correctly


In [None]:
print("\nðŸŽ§ MFCC DEMO RESULTS\n")

for file in sorted(os.listdir(DEMO_DIR)):
    if file.endswith(".wav"):
        path = os.path.join(DEMO_DIR, file)
        pred_lang = predict_language(path)

        print(f"File: {file:15s} â†’ Predicted Language: {pred_lang}")



ðŸŽ§ MFCC DEMO RESULTS

File: de_demo.wav     â†’ Predicted Language: ja
File: en_demo.wav     â†’ Predicted Language: es
File: es_demo.wav     â†’ Predicted Language: es
File: ja_demo.wav     â†’ Predicted Language: ja
File: tr_demo.wav     â†’ Predicted Language: tr


In [None]:
correct = 0
total = 0

for file in sorted(os.listdir(DEMO_DIR)):
    if not file.endswith(".wav"):
        continue

    true_lang = file.split("_")[0]
    pred_lang = predict_language(os.path.join(DEMO_DIR, file))

    if pred_lang == true_lang:
        correct += 1
    total += 1

print(f"\nðŸ“Š MFCC DEMO ACCURACY: {100*correct/total:.2f}%")



ðŸ“Š MFCC DEMO ACCURACY: 60.00%
