In [1]:
"""
This file arranges the data in the `all_data.csv` file in such a way that 
the content most similar to each user is placed at the top.
"""
import functools
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import streamlit as st

In [2]:
df = pd.read_csv('../data/all_data.csv', index_col=0)

In [3]:
df.head(2)

Unnamed: 0,category,title,tags,age_rating,rating_description,description,representation,duration_txt,duration_sec,first_broadcast,...,synopsis,synopsis_nostopwords,unsuitable_ratio,unsuitable_ratio2,age_rating_numeric,cas48,cas911,cas1214,cas1517,diversity
0,CBBC,Operation Ouch! - Series 9: 11. Blink and You’...,"BBC, iPlayer, TV, Operation Ouch!, Series 9: 1...",PG,"injury detail, rude humour, threat",Why is Dr Xand dressed as a bird in the lab? F...,3.0,28 mins,1692,5pm 17 Mar 2021,...,Why is Dr Xand dressed as a bird in the lab? F...,dr xand dressed bird lab? find blink today ’ e...,-0.0,-0.2,0.71,0.18152,0.13669,0.00653,-0.22292,0.989698
1,CBBC,The Worst Witch - Series 4: 1. The Three Impos...,"BBC, iPlayer, TV, The Worst Witch, Series 4: 1...",PG,threat,A botched potions experiment leaves Mildred wi...,3.0,28 mins,1693,27 Jan 2020,...,When a botched potions experiment leaves Mildr...,botched potion experiment leaf mildred whole n...,-0.0,-0.0,0.71,0.21404,0.164435,0.029725,-0.204455,0.985583


In [4]:
# Gets similarity scores for all other movies
def get_similarities(movie_index):
    similarities = similarities_df.loc[movie_index]
    # similarities.drop(movie_index, inplace=True)
    return similarities.rename('similarity').to_frame()

# Scales relevant features to domain [0, 1]
def scale_features(movie_df):
    scaler = MinMaxScaler((0,1))
    scaler.fit(movie_df.similarity.to_frame())
    movie_df['similarity_scaled'] = scaler.transform(movie_df.similarity.to_frame())
    scaler.fit(movie_df.diversity.to_frame())
    movie_df['diversity_scaled'] = scaler.transform(movie_df.diversity.to_frame())
    return movie_df

# Calculates weighter average for relevant (scaled) features
def weighted_score(movie, similarity_weight, diversity_weight):
    sw = movie ['similarity_scaled'] * similarity_weight
    dw = movie['diversity_scaled'] * diversity_weight
    total_weights = similarity_weight + diversity_weight
    return (sw + dw) / total_weights

# recommmends the movies + uses function from above
def recommend_movies(movie_index, diversity_factor=0.5, similarity_factor=1):
    # Filter movies with the sentiement thats is given in streamlit
    filtered_df = df[df['vader_sentiment'] == "Positive"] 
    similarities = get_similarities(movie_index)
    # DataFrame with relevant features for filtered movies
    movie_df = filtered_df.join(similarities)
    movie_df = scale_features(movie_df)
    # Calculate the weighted score
    weight_func = functools.partial(weighted_score, 
                                    similarity_weight=similarity_factor,
                                    diversity_weight=diversity_factor)
    movie_df['recommender_score'] = movie_df.apply(weight_func, axis='columns')
    return movie_df.sort_values('recommender_score', ascending=False)

In [5]:
# Calculate the diversity score
similarities_df = pd.read_csv('../data/all_data_similarity.csv')

In [6]:
one = recommend_movies(1)
one.head()

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


Unnamed: 0,category,title,tags,age_rating,rating_description,description,representation,duration_txt,duration_sec,first_broadcast,...,age_rating_numeric,cas48,cas911,cas1214,cas1517,diversity,similarity,similarity_scaled,diversity_scaled,recommender_score
2,CBBC,Shaun the Sheep - Series 4: 6. The Smelly Farmer,"BBC, iPlayer, TV, Shaun the Sheep, Series 4: 6...",U,clean,"Shaun, Bitzer and the flock decide to give the...",,7 mins,425,3:50pm 10 Feb 2014,...,1.0,0.30144,0.351305,0.401035,0.550135,0.987138,,,0.553257,
16,CBBC,Malory Towers - Series 2: 13. The Lost Treasure,"BBC, iPlayer, TV, Malory Towers, Series 2: 13....",U,clean,"Darrell and Sally make a dramatic discovery, b...",3.0,25 mins,1495,22 Nov 2021,...,1.0,0.6939,0.743765,0.793495,0.84448,0.989856,,,0.749886,
19,CBBC,Shaun the Sheep - Series 5: 20. Sheep Farmer,"BBC, iPlayer, TV, Shaun the Sheep, Series 5: 2...",U,clean,Timmy sees a particularly grumpy farmer giving...,,7 mins,420,3:50pm 18 Nov 2016,...,1.0,0.69814,0.748005,0.797735,0.84766,0.985867,,,0.461284,
22,CBBC,Danger Mouse - Series 1: 24. Escape from Big Head,"BBC, iPlayer, TV, Danger Mouse, Series 1: 24. ...",U,very mild fantasy action,The Big Head base computer locks up everyone i...,,11 mins,664,7:25am 24 Feb 2016,...,1.0,0.29602,0.24606,0.09614,-0.152705,0.987758,,,0.598063,
32,Films,The Keeper,"BBC, iPlayer, TV, The Keeper",15,strong language,"At the end of WWII, Bert Trautmann stays in th...",1.0,109 mins,6554,2018,...,0.21,0.3816,0.4427,0.5419,0.6399,0.988112,,,0.623697,


In [9]:
two = recommend_movies(140)
two.head()

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


Unnamed: 0,category,title,tags,age_rating,rating_description,description,representation,duration_txt,duration_sec,first_broadcast,...,age_rating_numeric,cas48,cas911,cas1214,cas1517,diversity,similarity,similarity_scaled,diversity_scaled,recommender_score
2,CBBC,Shaun the Sheep - Series 4: 6. The Smelly Farmer,"BBC, iPlayer, TV, Shaun the Sheep, Series 4: 6...",U,clean,"Shaun, Bitzer and the flock decide to give the...",,7 mins,425,3:50pm 10 Feb 2014,...,1.0,0.30144,0.351305,0.401035,0.550135,0.987138,,,0.553257,
16,CBBC,Malory Towers - Series 2: 13. The Lost Treasure,"BBC, iPlayer, TV, Malory Towers, Series 2: 13....",U,clean,"Darrell and Sally make a dramatic discovery, b...",3.0,25 mins,1495,22 Nov 2021,...,1.0,0.6939,0.743765,0.793495,0.84448,0.989856,,,0.749886,
19,CBBC,Shaun the Sheep - Series 5: 20. Sheep Farmer,"BBC, iPlayer, TV, Shaun the Sheep, Series 5: 2...",U,clean,Timmy sees a particularly grumpy farmer giving...,,7 mins,420,3:50pm 18 Nov 2016,...,1.0,0.69814,0.748005,0.797735,0.84766,0.985867,,,0.461284,
22,CBBC,Danger Mouse - Series 1: 24. Escape from Big Head,"BBC, iPlayer, TV, Danger Mouse, Series 1: 24. ...",U,very mild fantasy action,The Big Head base computer locks up everyone i...,,11 mins,664,7:25am 24 Feb 2016,...,1.0,0.29602,0.24606,0.09614,-0.152705,0.987758,,,0.598063,
32,Films,The Keeper,"BBC, iPlayer, TV, The Keeper",15,strong language,"At the end of WWII, Bert Trautmann stays in th...",1.0,109 mins,6554,2018,...,0.21,0.3816,0.4427,0.5419,0.6399,0.988112,,,0.623697,


In [None]:
ratings = pd.read_csv('../data/ratings/' + 'asha' + '_ratings.csv')
highest_rated_row = ratings[ratings['rating']==ratings['rating'].max()]

In [None]:
highest_rated_row

In [None]:
highest_rated_row['showId'].iloc[0]

In [None]:
asha = recommend_movies(1)
asha.head(2)

In [None]:
ratings1 = pd.read_csv('../data/ratings/' + 'zang' + '_ratings.csv')
highest_rated_row1 = ratings1[ratings1['rating']==ratings1['rating'].max()]

In [None]:
highest_rated_row1

In [None]:
zang = recommend_movies(40)
zang.head(2)

In [None]:
ratings1 = pd.read_csv('../data/ratings/' + 'sine' + '_ratings.csv')
highest_rated_row1 = ratings1[ratings1['rating']==ratings1['rating'].max()]
sine = recommend_movies(highest_rated_row1['showId'].iloc[0])
sine.head(3)

In [None]:
ratings2 = pd.read_csv('../data/ratings/' + 'michelle' + '_ratings.csv')
highest_rated_row2 = ratings2[ratings2['rating']==ratings2['rating'].max()]
michelle = recommend_movies(highest_rated_row2['showId'].iloc[0])
michelle.head(3)

In [None]:
ratings1 = pd.read_csv('../data/ratings/' + 'zane' + '_ratings.csv')
highest_rated_row1 = ratings1[ratings1['rating']==ratings1['rating'].max()]
zane = recommend_movies(highest_rated_row1['showId'].iloc[0])
zane.head(3)