# Just a try

In [7]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import diags

# Caricamento dei dati
movies  = pd.read_csv('./data/Netflix_Dataset_Movie.csv')
ratings = pd.read_csv('./data/Netflix_Dataset_Rating.csv')

# Merge dei dataset
merged_data = ratings.merge(movies, on='Movie_ID')

# Pre-elaborazione dei titoli dei film (unione di anno e titolo per arricchire il contenuto)
movies['Content'] = movies['Name'] + ' ' + movies['Year'].astype(str)

# Trasformazione TF-IDF sui contenuti dei film
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['Content'])

def create_user_profile(user_id):
    # Filtra i film valutati dall'utente
    user_ratings = ratings[ratings['User_ID'] == user_id]
    if user_ratings.empty:
        raise ValueError(f"No ratings found for user ID {user_id}.")

    user_movies = movies[movies['Movie_ID'].isin(user_ratings['Movie_ID'])]

    # Merge per ottenere gli indici corrispondenti
    user_movies = user_movies.merge(user_ratings, on='Movie_ID')
    if user_movies.empty:
        raise ValueError(f"No matching movies found for user ID {user_id}.")

    # Costruzione della matrice diagonale dei rating
    rating_weights = diags(user_movies['Rating'].values)

    # Element-wise multiplication
    weighted_tfidf = rating_weights.dot(tfidf_matrix[user_movies.index])

    if weighted_tfidf.shape[0] == 0:
        raise ValueError(f"TF-IDF matrix has no valid rows for user ID {user_id}.")

    # Media ponderata
    user_profile = np.asarray(weighted_tfidf.mean(axis=0)).flatten()  # Convert to 1D numpy array
    return user_profile

# Funzione per fare raccomandazioni
def recommend_movies(user_id, num_recommendations=5):
    # Create the user profile
    user_profile = create_user_profile(user_id)

    # Reshape user_profile to be a 2D array
    user_profile = user_profile.reshape(1, -1)

    # Calculate cosine similarity
    similarity_scores = cosine_similarity(user_profile, tfidf_matrix)

    # Sort movies by similarity scores
    movie_indices = similarity_scores.argsort()[0][::-1]
    
    # Return top N recommended movies
    recommended_movies = movies.iloc[movie_indices[:num_recommendations]]
    return recommended_movies

# Esempio di utilizzo
user_id = 712664
recommendations = recommend_movies(user_id, num_recommendations=5)
print(recommendations)

       Movie_ID  Year               Name                 Content
12122     12123  2004           S.I.C.K.           S.I.C.K. 2004
2989       2990  2004         Go Further         Go Further 2004
9409       9410  2004               P.S.               P.S. 2004
5964       5965  2004  They Are Among Us  They Are Among Us 2004
365         366  2004              Still              Still 2004


In [14]:
merged_data[merged_data['User_ID'] == user_id].groupby('Year')['Rating'].count()

Year
1925     1
1929     1
1930     1
1931     1
1934     1
1939     1
1940     1
1941     1
1947     1
1949     1
1952     1
1954     1
1955     2
1956     1
1957     1
1959     2
1960     2
1961     2
1962     1
1963     1
1964     3
1966     1
1968     5
1969     1
1970     1
1972     4
1973     4
1974     2
1975     3
1976     4
1978     3
1979     4
1980     3
1981     8
1982     5
1983     7
1984    12
1985     6
1986     6
1987     8
1988    10
1989     4
1990     8
1991    11
1992    11
1993     9
1994    13
1995    16
1996    11
1997    11
1998    29
1999    23
2000    19
2001    15
2002    25
2003    20
2004    17
Name: Rating, dtype: int64