In [4]:
"""
This file arranges the data in the `all_data.csv` file in such a way that 
the content most similar to each user is placed at the top.
"""
import functools
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import streamlit as st

In [6]:
df = pd.read_csv('../data/all_data.csv')

In [20]:
# Gets similarity scores for all other movies
def get_similarities(movie_index):
    similarities = similarities_df.loc[movie_index]
    # similarities.drop(movie_index, inplace=True)
    return similarities.rename('similarity').to_frame()

# Scales relevant features to domain [0, 1]
def scale_features(movie_df):
    scaler = MinMaxScaler((0,1))
    scaler.fit(movie_df.similarity.to_frame())
    movie_df['similarity_scaled'] = scaler.transform(movie_df.similarity.to_frame())
    scaler.fit(movie_df.diversity.to_frame())
    movie_df['diversity_scaled'] = scaler.transform(movie_df.diversity.to_frame())
    return movie_df

# Calculates weighter average for relevant (scaled) features
def weighted_score(movie, similarity_weight, diversity_weight):
    sw = movie ['similarity_scaled'] * similarity_weight
    dw = movie['diversity_scaled'] * diversity_weight
    total_weights = similarity_weight + diversity_weight
    return (sw + dw) / total_weights

# recommmends the movies + uses function from above
def recommend_movies(movie_index, diversity_factor=0.3, similarity_factor=1):
    # Filter movies with the sentiement thats is given in streamlit
    filtered_df = df[df['vader_sentiment'] == "Positive"] 
    similarities = get_similarities(movie_index)
    # DataFrame with relevant features for filtered movies
    movie_df = filtered_df.join(similarities)
    movie_df = scale_features(movie_df)
    # Calculate the weighted score
    weight_func = functools.partial(weighted_score, 
                                    similarity_weight=similarity_factor,
                                    diversity_weight=diversity_factor)
    movie_df['recommender_score'] = movie_df.apply(weight_func, axis='columns')
    return movie_df.sort_values('recommender_score', ascending=False)

In [21]:
# Calculate the diversity score
similarities_df = pd.read_csv('../data/all_data_similarity.csv')
similarity_avg = similarities_df.mean(axis=1)

In [22]:
ratings = pd.read_csv('../data/ratings/' + 'asha' + '_ratings.csv')
highest_rated_row = ratings[ratings['rating']==ratings['rating'].max()]
asha = recommend_movies(highest_rated_row['showId'].iloc[0])

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


In [23]:
asha.head(3)

Unnamed: 0.1,Unnamed: 0,category,title,tags,age_rating,rating_description,description,representation,duration_txt,duration_sec,...,age_rating_numeric,cas48,cas911,cas1214,cas1517,diversity,similarity,similarity_scaled,diversity_scaled,recommender_score
2,2,CBBC,Shaun the Sheep - Series 4: 6. The Smelly Farmer,"BBC, iPlayer, TV, Shaun the Sheep, Series 4: 6...",U,clean,"Shaun, Bitzer and the flock decide to give the...",,7 mins,425,...,1.0,0.30144,0.351305,0.401035,0.550135,0.978893,,,1.0,
16,16,CBBC,Malory Towers - Series 2: 13. The Lost Treasure,"BBC, iPlayer, TV, Malory Towers, Series 2: 13....",U,clean,"Darrell and Sally make a dramatic discovery, b...",3.0,25 mins,1495,...,1.0,0.6939,0.743765,0.793495,0.84448,0.923508,,,0.943306,
19,19,CBBC,Shaun the Sheep - Series 5: 20. Sheep Farmer,"BBC, iPlayer, TV, Shaun the Sheep, Series 5: 2...",U,clean,Timmy sees a particularly grumpy farmer giving...,,7 mins,420,...,1.0,0.69814,0.748005,0.797735,0.84766,0.907088,,,0.926497,


In [24]:
ratings1 = pd.read_csv('../data/ratings/' + 'zang' + '_ratings.csv')
highest_rated_row1 = ratings1[ratings1['rating']==ratings1['rating'].max()]

In [25]:
zang = recommend_movies(highest_rated_row1['showId'].iloc[0])
zang.head(3)

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


Unnamed: 0.1,Unnamed: 0,category,title,tags,age_rating,rating_description,description,representation,duration_txt,duration_sec,...,age_rating_numeric,cas48,cas911,cas1214,cas1517,diversity,similarity,similarity_scaled,diversity_scaled,recommender_score
2,2,CBBC,Shaun the Sheep - Series 4: 6. The Smelly Farmer,"BBC, iPlayer, TV, Shaun the Sheep, Series 4: 6...",U,clean,"Shaun, Bitzer and the flock decide to give the...",,7 mins,425,...,1.0,0.30144,0.351305,0.401035,0.550135,0.978893,,,1.0,
16,16,CBBC,Malory Towers - Series 2: 13. The Lost Treasure,"BBC, iPlayer, TV, Malory Towers, Series 2: 13....",U,clean,"Darrell and Sally make a dramatic discovery, b...",3.0,25 mins,1495,...,1.0,0.6939,0.743765,0.793495,0.84448,0.923508,,,0.943306,
19,19,CBBC,Shaun the Sheep - Series 5: 20. Sheep Farmer,"BBC, iPlayer, TV, Shaun the Sheep, Series 5: 2...",U,clean,Timmy sees a particularly grumpy farmer giving...,,7 mins,420,...,1.0,0.69814,0.748005,0.797735,0.84766,0.907088,,,0.926497,


In [26]:
ratings1 = pd.read_csv('../data/ratings/' + 'sine' + '_ratings.csv')
highest_rated_row1 = ratings1[ratings1['rating']==ratings1['rating'].max()]
sine = recommend_movies(highest_rated_row1['showId'].iloc[0])
sine.head(3)

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


Unnamed: 0.1,Unnamed: 0,category,title,tags,age_rating,rating_description,description,representation,duration_txt,duration_sec,...,age_rating_numeric,cas48,cas911,cas1214,cas1517,diversity,similarity,similarity_scaled,diversity_scaled,recommender_score
2,2,CBBC,Shaun the Sheep - Series 4: 6. The Smelly Farmer,"BBC, iPlayer, TV, Shaun the Sheep, Series 4: 6...",U,clean,"Shaun, Bitzer and the flock decide to give the...",,7 mins,425,...,1.0,0.30144,0.351305,0.401035,0.550135,0.978893,,,1.0,
16,16,CBBC,Malory Towers - Series 2: 13. The Lost Treasure,"BBC, iPlayer, TV, Malory Towers, Series 2: 13....",U,clean,"Darrell and Sally make a dramatic discovery, b...",3.0,25 mins,1495,...,1.0,0.6939,0.743765,0.793495,0.84448,0.923508,,,0.943306,
19,19,CBBC,Shaun the Sheep - Series 5: 20. Sheep Farmer,"BBC, iPlayer, TV, Shaun the Sheep, Series 5: 2...",U,clean,Timmy sees a particularly grumpy farmer giving...,,7 mins,420,...,1.0,0.69814,0.748005,0.797735,0.84766,0.907088,,,0.926497,


In [27]:
ratings2 = pd.read_csv('../data/ratings/' + 'michelle' + '_ratings.csv')
highest_rated_row2 = ratings2[ratings2['rating']==ratings2['rating'].max()]
michelle = recommend_movies(highest_rated_row2['showId'].iloc[0])
michelle.head(3)

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


Unnamed: 0.1,Unnamed: 0,category,title,tags,age_rating,rating_description,description,representation,duration_txt,duration_sec,...,age_rating_numeric,cas48,cas911,cas1214,cas1517,diversity,similarity,similarity_scaled,diversity_scaled,recommender_score
2,2,CBBC,Shaun the Sheep - Series 4: 6. The Smelly Farmer,"BBC, iPlayer, TV, Shaun the Sheep, Series 4: 6...",U,clean,"Shaun, Bitzer and the flock decide to give the...",,7 mins,425,...,1.0,0.30144,0.351305,0.401035,0.550135,0.978893,,,1.0,
16,16,CBBC,Malory Towers - Series 2: 13. The Lost Treasure,"BBC, iPlayer, TV, Malory Towers, Series 2: 13....",U,clean,"Darrell and Sally make a dramatic discovery, b...",3.0,25 mins,1495,...,1.0,0.6939,0.743765,0.793495,0.84448,0.923508,,,0.943306,
19,19,CBBC,Shaun the Sheep - Series 5: 20. Sheep Farmer,"BBC, iPlayer, TV, Shaun the Sheep, Series 5: 2...",U,clean,Timmy sees a particularly grumpy farmer giving...,,7 mins,420,...,1.0,0.69814,0.748005,0.797735,0.84766,0.907088,,,0.926497,


In [28]:
ratings1 = pd.read_csv('../data/ratings/' + 'zane' + '_ratings.csv')
highest_rated_row1 = ratings1[ratings1['rating']==ratings1['rating'].max()]
zane = recommend_movies(highest_rated_row1['showId'].iloc[0])
zane.head(3)

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


Unnamed: 0.1,Unnamed: 0,category,title,tags,age_rating,rating_description,description,representation,duration_txt,duration_sec,...,age_rating_numeric,cas48,cas911,cas1214,cas1517,diversity,similarity,similarity_scaled,diversity_scaled,recommender_score
2,2,CBBC,Shaun the Sheep - Series 4: 6. The Smelly Farmer,"BBC, iPlayer, TV, Shaun the Sheep, Series 4: 6...",U,clean,"Shaun, Bitzer and the flock decide to give the...",,7 mins,425,...,1.0,0.30144,0.351305,0.401035,0.550135,0.978893,,,1.0,
16,16,CBBC,Malory Towers - Series 2: 13. The Lost Treasure,"BBC, iPlayer, TV, Malory Towers, Series 2: 13....",U,clean,"Darrell and Sally make a dramatic discovery, b...",3.0,25 mins,1495,...,1.0,0.6939,0.743765,0.793495,0.84448,0.923508,,,0.943306,
19,19,CBBC,Shaun the Sheep - Series 5: 20. Sheep Farmer,"BBC, iPlayer, TV, Shaun the Sheep, Series 5: 2...",U,clean,Timmy sees a particularly grumpy farmer giving...,,7 mins,420,...,1.0,0.69814,0.748005,0.797735,0.84766,0.907088,,,0.926497,
