In [3]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

In [4]:
df_steam_games = pd.read_csv('Prepped_games_out.csv')

In [5]:
df_steam_games["Score"] = df_steam_games["Positive Reviews"] / df_steam_games['Total Reviews']
df_rec_data = df_steam_games[['prepped_description_lemm', 'Score']]
df_rec_data = df_rec_data.dropna()

In [6]:
df_steam_games.columns

Index(['App ID', 'Name', 'Short Description', 'Positive Reviews',
       'Negative Reviews', 'Total Reviews', 'prepped_description_lemm',
       'Score'],
      dtype='object')

In [7]:
vectorizer = TfidfVectorizer(token_pattern=r"(?u)\b\w+\b", stop_words=None, ngram_range=(2,2), analyzer='word')
descriptions_vec = vectorizer.fit_transform(df_rec_data['prepped_description_lemm'])

In [8]:
descriptions_vec_train, descriptions_vec_test, score_train, score_test = train_test_split(descriptions_vec, df_rec_data['Score'], test_size=0.2)
print(descriptions_vec_train.shape, descriptions_vec_test.shape)
print(score_train.shape, score_test.shape)

(43967, 563318) (10992, 563318)
(43967,) (10992,)


In [9]:
rf = RandomForestRegressor(n_estimators=100, random_state=42, max_depth=3)
rf.fit(descriptions_vec_train, score_train)

RandomForestRegressor(max_depth=3, random_state=42)

In [None]:
def recommend_movies(movie_name, num_recommendations=10):
    # Find the movie description
    movie_index = np.where(df_steam_games['APP ID'] == movie_name)[0][0]
    movie_description = descriptions_vec[movie_index]
    
    # Compute the cosine similarities
    similarities = cosine_similarity(descriptions_vec, movie_description)
    similar_movies = np.argsort(-similarities)[1:num_recommendations+1]
    
    # Use the Random Forest model to make ratings predictions for the similar movies
    similar_movies_descriptions = descriptions_vec[similar_movies]
    similar_movies_ratings = rf.predict(similar_movies_descriptions)
    
    # Return the recommendations
    recommendations = pd.DataFrame({'Movie': df_steam_games['APP ID'][similar_movies], 'Rating': similar_movies_ratings})
    return recommendations