In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import lightgbm
import xgboost
from lightfm import LightFM
from lightfm.evaluation import precision_at_k

In [None]:
df_actors = pd.read_csv('./data/movie_actors.csv')
df_countries = pd.read_csv('./data/movie_countries.csv')
df_directors = pd.read_csv('./data/movie_directors.csv')
df_genres = pd.read_csv('./data/movie_genres.csv')
df_imdb = pd.read_csv('./data/movie_imdb.csv')
df_locations = pd.read_csv('./data/movie_locations.csv')
df_tags = pd.read_csv('./data/movie_tags.csv')
df_rottem = pd.read_csv('./data/movies_rt.csv')
df_movies = pd.read_csv('./data/movies.csv')

df_ratings = pd.read_csv('./data/ratings_train_mean_similarity.csv')
df_test = pd.read_csv('./data/ratings_test_mean_similarity.csv')

In [None]:
# Separo mi train & test
X_train, X_test, y_train, y_test =\
    train_test_split(df_ratings.drop(['rating', 'ID'], axis=1), df_ratings.rating, test_size=0.3, random_state=0)

In [None]:
# Entrenamiento regresion lineal
model = LinearRegression()
model.fit(X_train, y_train)
y_predicted = model.predict(X_test)
scores = model.score(X_test, y_test)
print("Linear regression: ", mean_squared_error(y_test, y_predicted))
print("Linear regression R2: ", r2_score(y_test, y_predicted))

In [None]:
model.fit(df_ratings.drop(['rating', 'ID'], axis=1), df_ratings.rating)
prediction = model.predict(df_test.drop(['rating', 'ID', 'rating'], axis=1))
submission = pd.DataFrame(
        {'ID': df_test.ID, 'rating': np.around(prediction, 2)})
submission.to_csv('./submission_mean_similarity.csv', index=False)

In [None]:
!kaggle competitions submit -c recomendacion-de-peliculas-fcen-2020 -f submission_mean_similarity.csv -m "Con similaridad * 5"

In [None]:
lgbm = lightgbm.LGBMRegressor()
model = lgbm.fit(X_train, y_train)
y_predicted = model.predict(X_test)
scores = model.score(X_test, y_test)
print("Linear regression: ", mean_squared_error(y_test, y_predicted))
print("Linear regression R2: ", r2_score(y_test, y_predicted))

In [None]:
model.fit(df_ratings.drop(['rating', 'ID'], axis=1), df_ratings.rating)
prediction = model.predict(df_test.drop(['rating', 'ID', 'rating'], axis=1))
submission = pd.DataFrame(
        {'ID': df_test.ID, 'rating': np.around(prediction, 2)})
submission.to_csv('./submission_mean_similarity_gbm.csv', index=False)

In [None]:
!kaggle competitions submit -c recomendacion-de-peliculas-fcen-2020 -f submission_mean_similarity_gbm.csv -m "Con similaridad gbm"

In [None]:
xgb = xgboost.XGBRegressor()
model = xgb.fit(X_train, y_train)
y_predicted = model.predict(X_test)
scores = model.score(X_test, y_test)
print("Linear regression: ", mean_squared_error(y_test, y_predicted))
print("Linear regression R2: ", r2_score(y_test, y_predicted))

In [None]:
model.fit(df_ratings.drop(['rating', 'ID'], axis=1), df_ratings.rating)
prediction = model.predict(df_test.drop(['rating', 'ID', 'rating'], axis=1))
submission = pd.DataFrame(
        {'ID': df_test.ID, 'rating': np.around(prediction, 2)})
submission.to_csv('./submission_mean_similarity_xgb.csv', index=False)

In [None]:
!kaggle competitions submit -c recomendacion-de-peliculas-fcen-2020 -f submission_mean_similarity_xgb.csv -m "Con similaridad xgb"

In [None]:
from lightfm.datasets import fetch_movielens
data = fetch_movielens(min_rating=5.0)

In [None]:
# Instantiate and train the model
model = LightFM(loss='warp')
model.fit(data['train'], epochs=30, num_threads=2)

# Evaluate the trained model
test_precision = precision_at_k(model, data['test'], k=5).mean()