In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
import networkx as nx
import utility as util

In [None]:
class Netflix_Ratings(object):
    
    def __init__(self,ratings_dir,film_title_csv):
        import pandas as pd
        import networkx as nx
        self.ratings_dir = ratings_dir
        self.film_title_csv = film_title_csv
        
        NetflixTitle = pd.read_excel(self.film_title_csv)
        NetflixTitle['Name2'] = NetflixTitle['Name2'].fillna('')
        NetflixTitle['Name3'] = NetflixTitle['Name3'].fillna('')
        NetflixTitle['Unnamed: 5'] = NetflixTitle['Unnamed: 5'].fillna('')
        NetflixTitle['Name'] = NetflixTitle.apply(self.combine, axis=1)
        self.names_df = NetflixTitle
        
        self.ID_to_name, self.name_to_ID = self.create_mappings(self.names_df)
        
    def combine(self,row):
        if row['Name2'] != '':
            row['Name'] = str(row['Name']) + ', ' + str(row['Name2'])
        if row['Name3'] != '':
            row['Name'] = row['Name'] + ', ' + str(row['Name3'])
        if row['Unnamed: 5'] != '':
            row['Unnamed: 5'] = row['Name'] + ', ' + str(row['Unnamed: 5'])
        return row['Name']
    
    def create_mappings(self,names_df):
        IDs = list(names_df['ID'])
        names = list(names_df['Name'].str.lower())
        ID_to_name = dict(zip(IDs,names))
        name_to_ID = dict(zip(names,IDs))
        return ID_to_name, name_to_ID
    
    def create_utility_matrix(self,G):
        '''
        Given a network G, this method will construct the utility matrix for the movies present in 
        the nodeset of G that are also within the ratings listed here.
        '''
        import csv
        import numpy as np
        import scipy.sparse as ss
        
        titles_in_matrix = [i for i in G.nodes()]
        ids_in_matrix = [self.name_to_ID[x] for x in titles_in_matrix]
        
        # Loop through the files for each movie, compile the ratings for each movie, and 
        # get all of the users who rated each movie. 
        ratings_dict = {}
        users = []
        for title in titles_in_matrix[:]: #REMOVE THROTTLE
            # Get the Netflix id of this movie, and the title of the review file.
            filename = f"{self.ratings_dir}mv_{self.name_to_ID[title]:07}.txt"
            # Build a nested dictionary, where the outer key is the title of the movie,
            # the inner key is the numeric identifier of the user, and the value is the
            # rating.
            ratings = {}
            with open(filename,'r') as f:
                reader = csv.reader(f)
                for i,row in enumerate(reader):
                    if i == 0:
                        continue
                    else:
                        users.append(row[0])
                        ratings[row[0]] = row[1]
            ratings_dict[title] = ratings
        users = list(set(users))
        
        # Make mappings for the movie title and user to index
        title_to_index = dict(zip(titles_in_matrix,range(len(titles_in_matrix))))
        index_to_title = dict(zip(range(len(titles_in_matrix)),titles_in_matrix))
        user_to_index = dict(zip(users,range(len(users))))
        index_to_user = dict(zip(range(len(users)),users))

        # Build the utility matrix. [j,i] where j is user and i is movie.
        um = np.full((len(users),len(titles_in_matrix)),0)
        um = ss.lil_matrix(um)
        for title in ratings_dict:
            i = title_to_index[title]
            for user in ratings_dict[title]:
                j = user_to_index[user]
                um[j,i] = ratings_dict[title][user]
        um = ss.csr_matrix(um)
        
        self.um = um
        self.index_to_title = index_to_title
        self.index_to_user = index_to_user
        self.title_to_index = index_to_title
        self.user_to_index = user_to_index

    
G = util.parse_nodes_edge_file('DirectorG.net')
# create_utility_matrix(G)
ratings = Netflix_Ratings('training_set/','Netflix-Dataset/movie_titles_test.xls')
ratings.create_utility_matrix(G)

In [None]:
# Other methods: Build edgelists. In fact, just write out a file for each user? 

In [2]:
a = 9
if type(a) is int:
    print('yep')

yep
