# Testing Neural Network Model

In [None]:
#imports
import pandas as pd 
import numpy as np 
import joblib 
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
model_path = "../models/nn_genre_model_1.h5"    #Replace with diff. nn models
tokenzier_path = "../models/tokenizer.pkl"
encoder_path = "../models/label_encoder.pkl"

model = load_model(model_path)
tokenizer = joblib.load(tokenzier_path)
label_encoder = joblib.load(encoder_path)



# Test on Singular Example

In [3]:
max_len = 200

sample_text = "A young boy discovers he has magical powers and attends a wizarding school."

# Convert to sequence
seq = tokenizer.texts_to_sequences([sample_text])
padded = pad_sequences(seq, maxlen=max_len, padding="post", truncating="post")

# Predict
pred = model.predict(padded)
predicted_class = np.argmax(pred, axis=1)
predicted_genre = label_encoder.inverse_transform(predicted_class)

print("Input:", sample_text)
print("Predicted genre:", predicted_genre[0])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 398ms/step
Input: A young boy discovers he has magical powers and attends a wizarding school.
Predicted genre: family


# Test on test.csv

In [4]:
test_df = pd.read_csv("../clean_data/test.csv")
X_test = test_df["cleaned_synopsis"]
y_test = test_df["genre"]

In [5]:
#convert to sequence / tokenize & pad 
X_test_seq = tokenizer.texts_to_sequences(X_test)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len, padding="post", truncating="post")

#encode 
y_test_enc = to_categorical(label_encoder.transform(y_test))

In [6]:
loss, accuracy = model.evaluate(X_test_pad, y_test_enc)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

[1m254/254[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 18ms/step - accuracy: 0.2258 - loss: 5.9391
Test Loss: 5.980488300323486
Test Accuracy: 0.2285185158252716


In [8]:
train_df = pd.read_csv("../clean_data/train.csv")
X_train = train_df["cleaned_synopsis"]
y_train = train_df["genre"]

print(y_train.value_counts(normalize=True))
print(y_test.value_counts(normalize=True))

genre
fantasy      0.100005
adventure    0.100005
thriller     0.100005
crime        0.100005
scifi        0.100005
horror       0.100005
romance      0.100005
action       0.100005
mystery      0.099979
family       0.099979
Name: proportion, dtype: float64
genre
scifi        0.1
mystery      0.1
crime        0.1
adventure    0.1
fantasy      0.1
romance      0.1
family       0.1
action       0.1
thriller     0.1
horror       0.1
Name: proportion, dtype: float64
