In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm
import numpy as np

  from pandas.core import datetools


In [2]:
user_columns = ['user_id', 'age', 'sex', 'occupation', 'zip code']
users = pd.read_csv('ml-100k/u.user', sep = '|', names=user_columns, encoding='latin-1')
users.head()

Unnamed: 0,user_id,age,sex,occupation,zip code
0,1,24,M,technician,85711
1,2,53,F,other,94043
2,3,23,M,writer,32067
3,4,24,M,technician,43537
4,5,33,F,other,15213


In [3]:
movie_columns = ['movie_id', 'title', 'release_date', 'video_release_date', 'imdb_url']
movies = pd.read_csv('ml-100k/u.item', sep = '|', names=movie_columns, encoding='latin-1', usecols = range(5))
movies = movies.drop('video_release_date', axis = 1)
movies.head()

Unnamed: 0,movie_id,title,release_date,imdb_url
0,1,Toy Story (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?Toy%20Story%2...
1,2,GoldenEye (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?GoldenEye%20(...
2,3,Four Rooms (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?Four%20Rooms%...
3,4,Get Shorty (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?Get%20Shorty%...
4,5,Copycat (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?Copycat%20(1995)


In [4]:
ratings_columns = ['user_id','movie_id','rating']
ratings = pd.read_csv('ml-100k/u.data', sep = '\t', names=ratings_columns, encoding='latin-1', usecols = range(3))
ratings.head()

Unnamed: 0,user_id,movie_id,rating
0,196,242,3
1,186,302,3
2,22,377,1
3,244,51,2
4,166,346,1


In [5]:
# Two approaches
# 1. similar characteristics of the movies
# 2. other users that like the same movies - use correlation
# We only have the data to do #2 here

users.to_csv('data/users.csv', sep=',', encoding='utf-8')
movies.to_csv('data/movies.csv', sep=',', encoding='utf-8')
ratings.to_csv('data/ratings.csv', sep=',', encoding='utf-8')

In [6]:
movies = pd.read_csv('data/movies.csv')
movies.head()

Unnamed: 0.1,Unnamed: 0,movie_id,title,release_date,imdb_url
0,0,1,Toy Story (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?Toy%20Story%2...
1,1,2,GoldenEye (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?GoldenEye%20(...
2,2,3,Four Rooms (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?Four%20Rooms%...
3,3,4,Get Shorty (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?Get%20Shorty%...
4,4,5,Copycat (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?Copycat%20(1995)


In [7]:
def make_clickable(val):
    return '<a href="{}">{}</a>'.format(val, val)


In [8]:
# testing
make_clickable('www.wvu.edu')

'<a href="www.wvu.edu">www.wvu.edu</a>'

In [9]:
movies.style.format({'imdb_url':make_clickable})

Unnamed: 0.1,Unnamed: 0,movie_id,title,release_date,imdb_url
0,0,1,Toy Story (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?Toy%20Story%20(1995)
1,1,2,GoldenEye (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?GoldenEye%20(1995)
2,2,3,Four Rooms (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?Four%20Rooms%20(1995)
3,3,4,Get Shorty (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?Get%20Shorty%20(1995)
4,4,5,Copycat (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?Copycat%20(1995)
5,5,6,Shanghai Triad (Yao a yao yao dao waipo qiao) (1995),01-Jan-1995,http://us.imdb.com/Title?Yao+a+yao+yao+dao+waipo+qiao+(1995)
6,6,7,Twelve Monkeys (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?Twelve%20Monkeys%20(1995)
7,7,8,Babe (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?Babe%20(1995)
8,8,9,Dead Man Walking (1995),01-Jan-1995,http://us.imdb.com/M/title-exact?Dead%20Man%20Walking%20(1995)
9,9,10,Richard III (1995),22-Jan-1996,http://us.imdb.com/M/title-exact?Richard%20III%20(1995)


In [10]:
my_ratings = ratings.where(ratings.user_id==196).dropna()
# my_ratings.count()

test_ratings = my_ratings[:15]

my_ratings = my_ratings[~my_ratings.movie_id.isin(test_ratings.movie_id)]
my_ratings.count()

user_id     24
movie_id    24
rating      24
dtype: int64

In [11]:

for movie in my_ratings.movie_id:
    other_ratings = ratings.where(ratings.movie_id==movie)
    

In [None]:
other_ratings