In [1]:
import numpy as np
import pandas as pd
from collections import Counter



In [2]:
item_list = pd.read_csv('movies.csv')
transaction_list = pd.read_csv('ratings.csv')

## Item Feature Table

In [6]:
def create_item_feature_table(item_list, encode_genre=False):
    
    """
    
    Creates item feature table with information extracted from the Movies.csv file from MovieLens dataset
    
    Parameters:    
        item_list (pd.DataFrame)      : MovieLens movies.csv Datraframe containing movieId, title, and genre
        encode_genre (bool)           : Include 1-hot encoding of movie into its indicated genres (default: False)

    
    Returns:
        item_feature_table (pd.DataFrame) :  List containing items and associated details, 1-hot encoding based on category (optional) 
    
    """
    
    item_table = item_list.copy()
    item_table = item_table.set_index('movieId')
    item_table.index.rename('item_id', inplace=True)
    item_table['year'] = item_table['title'].str.slice(-5,-1)
    item_table['genres'] = item_table['genres'].str.split('|')
    
    item_feature_table = item_table
    
    if (encode_genre):
        genre_list = list(set().union(*item_table['genres']))
        genre_list.sort()
        item_feature_table[genre_list] = 0
        
        def set_genres(row):
            row[row['genres']] = 1
            return row
        
        item_feature_table = item_feature_table.apply(set_genres, axis=1)
    
    return item_feature_table
    
    

In [4]:
item_feature = create_item_feature_table(item_list, encode_genre=True)
item_feature.head()

Unnamed: 0_level_0,movieId,title,genres,year,(no genres listed),Action,Adventure,Animation,Children,Comedy,...,Film-Noir,Horror,IMAX,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
item_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1,Toy Story (1995),"[Adventure, Animation, Children, Comedy, Fantasy]",1995,0,0,1,1,1,1,...,0,0,0,0,0,0,0,0,0,0
1,2,Jumanji (1995),"[Adventure, Children, Fantasy]",1995,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,3,Grumpier Old Men (1995),"[Comedy, Romance]",1995,0,0,0,0,0,1,...,0,0,0,0,0,1,0,0,0,0
3,4,Waiting to Exhale (1995),"[Comedy, Drama, Romance]",1995,0,0,0,0,0,1,...,0,0,0,0,0,1,0,0,0,0
4,5,Father of the Bride Part II (1995),[Comedy],1995,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


In [5]:
item_feature.shape

(9742, 24)