In [11]:
import pandas as pd
import numpy as np
import json

In [2]:
def collect_master_df():
    df = pd.read_csv('movielens_data_genome/movie_dataset_public_final/scores/glmer.csv')
    titles = df['item_id'].unique()
    return df, titles

In [144]:
def generate_random_watch_history(titles, lines, n = 20):
    titles = np.random.choice(titles,n)
    sample_user = {}
    for line in lines:
        if line['item_id'] in titles:
            sample_user[line['item_id']] = line['title']
    return sample_user

In [78]:
def generate_user_history_tag(sample_user, df):
    sample_user_matrix = []

    for movie in sample_user.keys():
        #skip if for some reason the movie does not contain all labels
        if len(df[df.item_id == movie]) != 1084:
            print(f'error {movie}')
        #if the movie contains all label values, obtain the tag vector from the main df
        else:
            matrix = list(df[df.item_id == movie].score.values)
            sample_user_matrix.append(matrix)

    #turning the list of lists into a matrix as a 2D array
    mat = np.array(sample_user_matrix)

    return mat

In [138]:
def generate_user_pref_vector(mat, f = '2-norm'):

    if f == '2-norm':
        matsq = mat**2
        agg = np.average(matsq, axis = 0)
        aggroot = agg**0.5

        return aggroot
    
    else:
        agg = []

        for col in mat.T:
            val = f(col)
            agg.append(val)
        
        return agg

In [18]:
def generate_master_matrix(df, titles):
    master_mat = []

    for movie in titles:
        if len(df[df.item_id == movie]) != 1084:
            print(f'error {movie}')

        else:
            matrix = list(df[df.item_id == movie].score.values)
            master_mat.append(matrix)
    master_mat = np.array(master_mat)

    return master_mat

In [20]:
def generate_user_movie_interest(master_mat, user_tag_interest, g = '2-norm'):
    if g == '2-norm':
        rows, _ = master_mat.shape
        dist = []
        for r in range(rows):
            d = np.linalg.norm(user_tag_interest-master_mat[r])
            dist.append(d)
        return dist
    
    else:
        dist = []
        for i in master_mat:
            val = g(user_tag_interest,i)
            dist.append(val)
        return dist


In [12]:
def read_metadata():
    lines = []
    for line in open('movielens_data_genome\\movie_dataset_public_final\\raw\\metadata_updated.json', 'r'):
        lines.append(json.loads(line))
    return lines

In [181]:
def generate_recommendation(dists, lines, titles, n_recs = 10, users = 1):
    if users == 1:
        dfresult = pd.DataFrame(titles, columns = ['movie_id'])
        dfresult['score1'] = dists[0]
        dfresult = dfresult.sort_values(by = ['score1'], ascending = False)
        dfrecommend = dfresult[0:n_recs]
        
        movie_titles = {}
        for line in lines:
            if line['item_id'] in dfrecommend.movie_id.values:

                movie_titles[line['item_id']] = line['title']
        
        dfrecommend['movie_title'] = dfrecommend['movie_id'].map(movie_titles)
        dfrecommend.reset_index(inplace = True)
        return dfrecommend
    
    if users == 2:
        dfresult = pd.DataFrame(titles, columns = ['movie_id'])
        dfresult['score1'] = dists[0]
        dfresult['score2'] = dists[1]

        dfresult['average'] = (dfresult['score1'] + dfresult['score2'])/2
        dfresult['dev'] = ((dfresult['score1']-dfresult['average'])**2) + ((dfresult['score2'] - dfresult['average'])**2)
        dfresult = dfresult.sort_values(by = ['average'], ascending = False)

        dfrecommend = dfresult[0:2*n_recs]
        dfrecommend = dfrecommend.sort_values(by= ['dev'], ascending = True)
        dfrecommend = dfrecommend[0:n_recs]

        movie_titles = {}
        for line in lines:
            if line['item_id'] in dfrecommend.movie_id.values:

                movie_titles[line['item_id']] = line['title']
        
        dfrecommend['movie_title'] = dfrecommend['movie_id'].map(movie_titles)
        dfrecommend.reset_index(inplace = True)
        return dfrecommend

In [143]:
df, titles = collect_master_df()
master_mat = generate_master_matrix(df, titles)
lines = read_metadata()

In [167]:
sample_user1 = generate_random_watch_history(titles, lines,10)
mat1 = generate_user_history_tag(sample_user1, df)
agg1 = generate_user_pref_vector(mat1, f = f)
dist1 = generate_user_movie_interest(master_mat, agg1)
dfrecommend1 = generate_recommendation([dist1], lines, titles, n_recs = 20, users = 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfrecommend['movie_title'] = dfrecommend['movie_id'].map(movie_titles)


In [168]:
sample_user2 = generate_random_watch_history(titles, lines,10)
mat2 = generate_user_history_tag(sample_user2, df)
agg2 = generate_user_pref_vector(mat2, f = f)
dist2 = generate_user_movie_interest(master_mat, agg2)
dfrecommend2 = generate_recommendation([dist2], lines, titles, n_recs = 20, users = 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfrecommend['movie_title'] = dfrecommend['movie_id'].map(movie_titles)


In [169]:
sample_user3 = generate_random_watch_history(titles, lines,10)
mat3 = generate_user_history_tag(sample_user3, df)
agg3 = generate_user_pref_vector(mat3, f = '2-norm')
dist3 = generate_user_movie_interest(master_mat, agg3)
dfrecommend3 = generate_recommendation([dist3], lines, titles, n_recs = 20, users = 1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfrecommend['movie_title'] = dfrecommend['movie_id'].map(movie_titles)


In [170]:
def f(a):
    result = 0
    for val in a:
        result+= val**0.5
    
    result = result/np.size(a)
    result = result**2
    return result

In [171]:
dfrecommend1

Unnamed: 0,index,movie_id,score1,movie_title
0,2614,2959,9.549704,Fight Club (1999)
1,977,1089,9.394981,Reservoir Dogs (1992)
2,2267,2571,9.271626,"Matrix, The (1999)"
3,280,296,9.269817,Pulp Fiction (1994)
4,1059,1196,9.187138,Star Wars: Episode V - The Empire Strikes Back...
5,5149,6016,9.085729,City of God (Cidade de Deus) (2002)
6,302,318,9.047456,"Shawshank Redemption, The (1994)"
7,2037,2329,9.005067,American History X (1998)
8,245,260,8.998636,Star Wars: Episode IV - A New Hope (1977)
9,6140,7415,8.995307,"Late Show, The (1977)"


In [172]:
dfrecommend2

Unnamed: 0,index,movie_id,score1,movie_title
0,2267,2571,9.600046,"Matrix, The (1999)"
1,2614,2959,9.558086,Fight Club (1999)
2,1059,1196,9.52717,Star Wars: Episode V - The Empire Strikes Back...
3,977,1089,9.38192,Reservoir Dogs (1992)
4,245,260,9.346555,Star Wars: Episode IV - A New Hope (1977)
5,280,296,9.180485,Pulp Fiction (1994)
6,302,318,9.034652,"Shawshank Redemption, The (1994)"
7,6140,7415,9.034473,"Late Show, The (1977)"
8,1072,1210,9.028406,Star Wars: Episode VI - Return of the Jedi (1983)
9,338,356,8.968013,Forrest Gump (1994)


In [173]:
dfrecommend3

Unnamed: 0,index,movie_id,score1,movie_title
0,1059,1196,8.89358,Star Wars: Episode V - The Empire Strikes Back...
1,2267,2571,8.822033,"Matrix, The (1999)"
2,245,260,8.786159,Star Wars: Episode IV - A New Hope (1977)
3,2614,2959,8.703877,Fight Club (1999)
4,280,296,8.488547,Pulp Fiction (1994)
5,1072,1210,8.484241,Star Wars: Episode VI - Return of the Jedi (1983)
6,977,1089,8.462659,Reservoir Dogs (1992)
7,338,356,8.379094,Forrest Gump (1994)
8,302,318,8.254676,"Shawshank Redemption, The (1994)"
9,1061,1198,8.250301,Raiders of the Lost Ark (Indiana Jones and the...


In [182]:
dfrecommend12 = generate_recommendation([dist1,dist2], lines, titles, n_recs = 10, users = 2)
dfrecommend12

Unnamed: 0,index,movie_id,score1,score2,average,dev,movie_title
0,2614,2959,9.549704,9.558086,9.553895,3.5e-05,Fight Club (1999)
1,302,318,9.047456,9.034652,9.041054,8.2e-05,"Shawshank Redemption, The (1994)"
2,977,1089,9.394981,9.38192,9.388451,8.5e-05,Reservoir Dogs (1992)
3,338,356,8.93753,8.968013,8.952771,0.000465,Forrest Gump (1994)
4,6140,7415,8.995307,9.034473,9.01489,0.000767,"Late Show, The (1977)"
5,1075,1213,8.713398,8.66188,8.687639,0.001327,Goodfellas (1990)
6,280,296,9.269817,9.180485,9.225151,0.00399,Pulp Fiction (1994)
7,2037,2329,9.005067,8.909047,8.957057,0.00461,American History X (1998)
8,4294,4878,8.89336,8.79415,8.843755,0.004921,Donnie Darko (2001)
9,3716,4226,8.979253,8.870184,8.924718,0.005948,Memento (2000)


In [183]:
dfrecommend13 = generate_recommendation([dist1,dist3], lines, titles, n_recs = 10, users = 2)
dfrecommend13

Unnamed: 0,index,movie_id,score1,score2,average,dev,movie_title
0,1072,1210,8.596752,8.484241,8.540496,0.006329,Star Wars: Episode VI - Return of the Jedi (1983)
1,245,260,8.998636,8.786159,8.892397,0.022573,Star Wars: Episode IV - A New Hope (1977)
2,1059,1196,9.187138,8.89358,9.040359,0.043088,Star Wars: Episode V - The Empire Strikes Back...
3,1061,1198,8.611472,8.250301,8.430887,0.065222,Raiders of the Lost Ark (Indiana Jones and the...
4,8313,58559,8.585096,8.157315,8.371205,0.091498,"Dark Knight, The (2008)"
5,2267,2571,9.271626,8.822033,9.04683,0.101067,"Matrix, The (1999)"
6,338,356,8.93753,8.379094,8.658312,0.155925,Forrest Gump (1994)
7,501,527,8.870564,8.20994,8.540252,0.218212,Schindler's List (1993)
8,280,296,9.269817,8.488547,8.879182,0.305191,Pulp Fiction (1994)
9,302,318,9.047456,8.254676,8.651066,0.314251,"Shawshank Redemption, The (1994)"


In [184]:
dfrecommend23 = generate_recommendation([dist2,dist3], lines, titles, n_recs = 10, users = 2)
dfrecommend23

Unnamed: 0,index,movie_id,score1,score2,average,dev,movie_title
0,501,527,8.681455,8.20994,8.445697,0.111163,Schindler's List (1993)
1,1100,1240,8.727016,8.19025,8.458633,0.144059,"Terminator, The (1984)"
2,1072,1210,9.028406,8.484241,8.756324,0.148058,Star Wars: Episode VI - Return of the Jedi (1983)
3,554,589,8.632875,8.07956,8.356217,0.153079,Terminator 2: Judgment Day (1991)
4,245,260,9.346555,8.786159,9.066357,0.157022,Star Wars: Episode IV - A New Hope (1977)
5,338,356,8.968013,8.379094,8.673554,0.173413,Forrest Gump (1994)
6,1059,1196,9.52717,8.89358,9.210375,0.200718,Star Wars: Episode V - The Empire Strikes Back...
7,1061,1198,8.940016,8.250301,8.595159,0.237853,Raiders of the Lost Ark (Indiana Jones and the...
8,280,296,9.180485,8.488547,8.834516,0.239389,Pulp Fiction (1994)
9,2522,2858,8.681158,7.98708,8.334119,0.240873,American Beauty (1999)
