In [5]:
import numpy as np
import pandas as pd
import nltk
import re
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize, word_tokenize
from gensim.models import Word2Vec
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, cohen_kappa_score
from tensorflow.keras.models import load_model

# Load your .h5 models
lstm_model = load_model('final_lstm.h5')  # Replace with actual path
gru_model = load_model('final_gru.h5')    # Replace with actual path

# Preparing Dataset
df = pd.read_csv("Dataset/training_set_rel3.tsv", sep='\t', encoding='ISO-8859-1')
df.dropna(axis=1, inplace=True)
df.drop(columns=['domain1_score', 'rater1_domain1', 'rater2_domain1'], inplace=True, axis=1)

temp = pd.read_csv("Processed_data.csv")
temp.drop("Unnamed: 0", inplace=True, axis=1)
df['domain1_score'] = temp['final_score']

# Make Dataset
y = df['domain1_score']
df.drop('domain1_score', inplace=True, axis=1)
X = df
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# PREPROCESSING
train_e = X_train['essay'].tolist()
test_e = X_test['essay'].tolist()
train_sents = []
test_sents = []

stop_words = set(stopwords.words('english')) 

def sent2word(x):
    x = re.sub("[^A-Za-z]", " ", x)
    x.lower()
    filtered_sentence = [] 
    words = x.split()
    for w in words:
        if w not in stop_words: 
            filtered_sentence.append(w)
    return filtered_sentence

def essay2word(essay):
    essay = essay.strip()
    tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
    raw = tokenizer.tokenize(essay)
    final_words = []
    for i in raw:
        if len(i) > 0:
            final_words.append(sent2word(i))
    return final_words

for i in train_e:
    train_sents += essay2word(i)

for i in test_e:
    test_sents += essay2word(i)

# Preparing WORD2VEC and Vectorizing the essays
num_features = 300
model = Word2Vec(train_sents, workers=4, vector_size=num_features, min_count=40, window=10, sample=1e-3)
model.init_sims(replace=True)

def makeVec(words, model, num_features):
    vec = np.zeros((num_features,), dtype="float32")
    noOfWords = 0.
    index2word_set = set(model.wv.index_to_key)
    for i in words:
        if i in index2word_set:
            noOfWords += 1
            vec = np.add(vec, model.wv[i])
    if noOfWords > 0:
        vec = np.divide(vec, noOfWords)
    return vec

def getVecs(essays, model, num_features):
    c = 0
    essay_vecs = np.zeros((len(essays), num_features), dtype="float32")
    for i in essays:
        essay_vecs[c] = makeVec(i, model, num_features)
        c += 1
    return essay_vecs

# Vectorizing the essays
clean_train = [sent2word(i) for i in train_e]
clean_test = [sent2word(i) for i in test_e]

training_vectors = getVecs(clean_train, model, num_features)
testing_vectors = getVecs(clean_test, model, num_features)

# Reshaping the vectors for LSTM and GRU
training_vectors = np.reshape(training_vectors, (training_vectors.shape[0], 1, training_vectors.shape[1]))
testing_vectors = np.reshape(testing_vectors, (testing_vectors.shape[0], 1, testing_vectors.shape[1]))

# Evaluate both models
lstm_predictions = lstm_model.predict(testing_vectors)
gru_predictions = gru_model.predict(testing_vectors)



  model.init_sims(replace=True)


[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m122/122[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step


In [4]:
# Remove Cohen's Kappa and use regression metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Flatten the predictions to be 1D arrays
lstm_predictions = np.squeeze(lstm_predictions)
gru_predictions = np.squeeze(gru_predictions)

# Check the shape of the predictions
print("LSTM Predictions Shape:", lstm_predictions.shape)
print("GRU Predictions Shape:", gru_predictions.shape)

# Flatten y_test as well to ensure it's 1D
y_test = np.array(y_test).flatten()

# Calculate MAE and MSE
lstm_mae = np.mean(np.abs(lstm_predictions - y_test))
gru_mae = np.mean(np.abs(gru_predictions - y_test))

lstm_mse = mean_squared_error(y_test, lstm_predictions)
gru_mse = mean_squared_error(y_test, gru_predictions)

# Calculate R² score (coefficient of determination)
lstm_r2 = r2_score(y_test, lstm_predictions)
gru_r2 = r2_score(y_test, gru_predictions)

# Print metrics for comparison
print("LSTM Model Metrics:")
print(f"Mean Absolute Error: {lstm_mae}")
print(f"Mean Squared Error: {lstm_mse}")
print(f"R² Score: {lstm_r2}")

print("\nGRU Model Metrics:")
print(f"Mean Absolute Error: {gru_mae}")
print(f"Mean Squared Error: {gru_mse}")
print(f"R² Score: {gru_r2}")

# Select the best model based on MAE or other regression metrics
if gru_mae < lstm_mae:
    print("\nGRU model performs better and will be deployed to the frontend.")
    best_model = gru_model
else:
    print("\nLSTM model performs better and will be deployed to the frontend.")
    best_model = lstm_model

# Save the selected model for deployment
best_model.save('best_model_for_deployment.h5')




LSTM Predictions Shape: (3893,)
GRU Predictions Shape: (3893,)
LSTM Model Metrics:
Mean Absolute Error: 1.4317829102744732
Mean Squared Error: 3.4872927366571815
R² Score: 0.42001307454040815

GRU Model Metrics:
Mean Absolute Error: 1.4056211550132247
Mean Squared Error: 3.374278022683318
R² Score: 0.43880904649893904

GRU model performs better and will be deployed to the frontend.
