In [39]:
import pandas as pd
import ast

data_path = '../paired_data.csv'

data = pd.read_csv(data_path)
data["overview_embedding"] = data["overview_embedding"].apply(ast.literal_eval)

In [2]:
from sentence_transformers import SentenceTransformer
transformer = SentenceTransformer('all-MiniLM-L6-v2')

  from .autonotebook import tqdm as notebook_tqdm


In [44]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from joblib import dump

# Convert embedding column from list to numpy array
data["overview_embedding"] = data["overview_embedding"].apply(np.array)

# Split the data into features (X) and target (y)
X = np.vstack(data["overview_embedding"].values)
y = data['average_rating']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(X_train_scaled.shape)

# Define the neural network architecture
model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

# Compile the model
optimizer = tf.keras.optimizers.Adam(learning_rate=0.05)
loss_fn = tf.keras.losses.MeanSquaredError(reduction="sum_over_batch_size", name="mean_absolute_percentage_error")
model.compile(optimizer=optimizer, loss=loss_fn)

# Train the model
model.fit(X_train_scaled, y_train, epochs=30, validation_split=0.2)

# Evaluate the model
y_pred = model.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
print('Mean Squared Error:', mse)

# Save the model
dump(scaler, 'scaler.joblib')
model.save('movie_rating_prediction_model.keras')

(5952, 384)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/30
[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - loss: 21.9383 - val_loss: 0.3498
Epoch 2/30
[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.4111 - val_loss: 0.3753
Epoch 3/30
[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.3898 - val_loss: 0.3706
Epoch 4/30
[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.4052 - val_loss: 0.3548
Epoch 5/30
[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.3986 - val_loss: 0.3527
Epoch 6/30
[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.3810 - val_loss: 0.3519
Epoch 7/30
[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.3969 - val_loss: 0.3491
Epoch 8/30
[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 0.3880 - val_loss: 0.3473
Epoch 9/30
[1m149/149[0m [32m━━━━━━━

In [38]:
from sklearn.preprocessing import StandardScaler
from joblib import load
import numpy as np

overview: str = input("Give your movie overview: ")
print(overview)

# Assuming scaler is your StandardScaler object
scaler = StandardScaler()

# Later, to load the scaler from the file
scaler = load('scaler.joblib')

# Now you can use the loaded scaler to transform new data
scaled_embed = scaler.transform(np.array(transformer.encode(overview)).reshape(1, -1))

predicted_rating = model.predict(scaled_embed)

print(f"prediction: {predicted_rating}")

The filmmaking team behind the hits "Scary Movie," "Date Movie," "Epic Movie" and "Meet The Spartans" this time puts its unique, inimitable stamp on one of the biggest and most bloated movie genres of all time -- the disaster film.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
prediction: [[3.089491]]


In [33]:
import pandas as pd
from sentence_transformers import SentenceTransformer

# Load the SentenceTransformer model
transformer = SentenceTransformer('all-MiniLM-L6-v2')

overview: str = input("Give your movie overview: ")
print(overview)

# Now you can use the loaded scaler to transform new data
scaled_embed = scaler.transform(np.array(transformer.encode(overview)).reshape(1, -1))

predicted_rating = model.predict(scaled_embed)

print(f"prediction: {predicted_rating}")




Imprisoned in the 1940s for the double murder of his wife and her lover, upstanding banker Andy Dufresne begins a new life at the Shawshank prison, where he puts his accounting skills to work for an amoral warden. During his long stretch in prison, Dufresne comes to be admired by the other inmates -- including an older prisoner named Red -- for his integrity and unquenchable sense of hope.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
prediction: [[3.339839]]
