In [1]:
import os
import pandas as pd
import numpy as np
import librosa
from transformers import pipeline
import language_tool_python
import torch
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from keras.models import Model
from keras.layers import Input, Dense, Concatenate
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.metrics import mean_absolute_error, r2_score

# Load and preprocess dataset
train_df = pd.read_csv(r'C:\Users\niksh\Downloads\archive (1)\dataset\train.csv')
train_df['file_path'] = train_df['filename'].apply(
    lambda x: os.path.join(r"C:\Users\niksh\Downloads\archive (1)\dataset\audios_train", x)
)
train_df.rename(columns={'label': 'grammar_score'}, inplace=True)

# Audio feature extraction
def extract_audio_features(file_path):
    y, sr = librosa.load(file_path, sr=16000)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    return np.hstack([
        np.mean(mfcc, axis=1),
        np.mean(chroma, axis=1),
        np.mean(contrast, axis=1)
    ])

# Transcription pipeline
asr_pipe = pipeline("automatic-speech-recognition",
                    model="openai/whisper-medium",
                    device="cuda" if torch.cuda.is_available() else "cpu")

def transcribe_audio(file_path):
    return asr_pipe(file_path)["text"]

# Connect to local LanguageTool server
tool = language_tool_python.LanguageTool('en-US', remote_server='http://localhost:8081')

# Grammar feature extraction
def grammar_features(text):
    matches = tool.check(text)
    return [
        len(matches),                         # Total grammar issues
        len(set(m.ruleId for m in matches)),  # Unique grammar rules triggered
        len(text),                            # Length of text
        len(text.split())                     # Number of words
    ]

# Extract and combine features
audio_features_list, text_features_list, labels = [], [], []

for _, row in train_df.iterrows():
    try:
        path = row['file_path']
        score = row['grammar_score']

        audio_feat = extract_audio_features(path)
        transcript = transcribe_audio(path)
        grammar_feat = grammar_features(transcript)

        audio_features_list.append(audio_feat)
        text_features_list.append(grammar_feat)
        labels.append(score)
    except Exception as e:
        print(f"Skipping {row['filename']} due to error: {e}")

# Convert to numpy arrays
X_audio = np.array(audio_features_list)
X_text = np.array(text_features_list)
y = np.array(labels)

# Normalize features
scaler_audio = StandardScaler()
scaler_text = StandardScaler()

X_audio = scaler_audio.fit_transform(X_audio)
X_text = scaler_text.fit_transform(X_text)

# Train-test split
X_train_audio, X_test_audio, X_train_text, X_test_text, y_train, y_test = train_test_split(
    X_audio, X_text, y, test_size=0.2, random_state=42
)

# Build the model
audio_input = Input(shape=(X_audio.shape[1],))
text_input = Input(shape=(X_text.shape[1],))

x_audio = Dense(128, activation='relu')(audio_input)
x_text = Dense(128, activation='relu')(text_input)

merged = Concatenate()([x_audio, x_text])
output = Dense(1, activation='linear')(merged)

model = Model(inputs=[audio_input, text_input], outputs=output)
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train the model
history = model.fit(
    [X_train_audio, X_train_text],
    y_train,
    validation_split=0.2,
    epochs=100,
    callbacks=[
        EarlyStopping(patience=10, restore_best_weights=True),
        ReduceLROnPlateau(factor=0.2, patience=5)
    ]
)

# Evaluate the model
y_pred = model.predict([X_test_audio, X_test_text]).flatten()
print(f"MAE: {mean_absolute_error(y_test, y_pred):.2f}")
print(f"R²: {r2_score(y_test, y_pred):.2f}")





Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Due to a bug fix in https://github.com/huggingface/transformers/pull/28687 transcription using a multilingual Whisper will default to language detection followed by transcription instead of translation to English.This might be a breaking change for your use case. If you want to instead always translate your audio to English, make sure to pass `language='en'`.





From C:\Users\niksh\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\backend.py:1398: The name tf.executing_eagerly_outside_functions is deprecated. Please use tf.compat.v1.executing_eagerly_outside_functions instead.






From C:\Users\niksh\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\optimizers\__init__.py:309: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.



Epoch 1/100



From C:\Users\niksh\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\utils\tf_utils.py:492: The name tf.ragged.RaggedTensorValue is deprecated. Please use tf.compat.v1.ragged.RaggedTensorValue instead.






From C:\Users\niksh\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\base_layer_utils.py:384: The name tf.executing_eagerly_outside_functions is deprecated. Please use tf.compat.v1.executing_eagerly_outside_functions instead.



Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7