# Movies processing
Processing the `ratings.csv.` dataset so that each row of the Dataframe is an user and each column is a movie with the rating of the movie from the user.

In [1]:
import pandas as pd
import numpy as np

ratings_dataset = pd.read_csv('ratings.csv')
movies_dataset = pd.read_csv('movies.csv')
movies_ids = movies_dataset['movieId'].to_list()


def procress_dataset():
  proccessed_dataset = pd.DataFrame([], columns=movies_ids)
  users = ratings_dataset['userId'].unique()

  for userId in users:
    print(userId)
    user_ratings = ratings_dataset[ratings_dataset['userId'] == userId]
    user_ratings = user_ratings[['movieId', 'rating']]
    user_ratings.set_index('movieId',inplace=True)
    user_ratings = user_ratings.T
    
    new_user = []
    def adjust_rating(rating):
      return ((10 * rating) - 30)

    for movie in movies_ids:
      try:
        new_user.append(adjust_rating(user_ratings.iloc[0][movie]))
      except:
        new_user.append(0)

    proccessed_dataset.loc[len(proccessed_dataset.index)] = new_user
    
  compression_opts = dict(method='zip', archive_name='processed.csv')  
  proccessed_dataset.to_csv('processed.zip', index=False, compression=compression_opts)
  proccessed_dataset.to_pickle('processed.pkl')
  proccessed_dataset.reset_index()

  return proccessed_dataset

# df = procress_dataset()
df = pd.read_pickle('processed.pkl')

The column names are the movie ids.
An adjustment to the movie rating was made such that a score of 3 is mapped to 0 (zero) and the rating was then. Scores below 3 and negative scores.

In [2]:
display(df)

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
0,10.0,0.0,10.0,0.0,0.0,10.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
605,-5.0,0.0,0.0,0.0,0.0,0.0,-5.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
606,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,-5.0,-10.0,-10.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Nearest neighbors
For the recomendation system the nearest neighbor model was used to provide an user with new movies to watch. These new movies are from the account of the user that is closest to the user asking for recomendations based on their reviews. The movies are then sorted by the rating the user that being used as a recomendation gave the movies. 

In [3]:
def nn_recomendations(user):
  from sklearn.neighbors import NearestNeighbors
  nbrs = NearestNeighbors(n_neighbors=5).fit(df)
  _, indices = nbrs.kneighbors(user)
  nearest_user = indices[0][0]

  display('Nearest', indices)
  
  def get_recs():
    recs = []
    for near_user in indices[0]:
      for movie in movies_ids:
        if (df.iloc[near_user].at[movie] > 0):
          recs.append((movie, df.iloc[nearest_user].at[movie]))
    return recs
    
  recs = get_recs()
  sorted_recs = sorted(recs, key=lambda x: x[1], reverse=True)
        
  return sorted_recs


In [4]:
user = df.sample()
display(df.sample())
recs = nn_recomendations(df.sample())

for rec in recs[:10]:
  title = movies_dataset[movies_dataset['movieId'] == rec[0]]['title']
  display(str(title))

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
583,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


'Nearest'

array([[573,  53, 129,  25, 133]])

'43    Seven (a.k.a. Se7en) (1995)\nName: title, dtype: object'

'97    Braveheart (1995)\nName: title, dtype: object'

'197    Dumb & Dumber (Dumb and Dumber) (1994)\nName: title, dtype: object'

'277    Shawshank Redemption, The (1994)\nName: title, dtype: object'

'302    Ace Ventura: Pet Detective (1994)\nName: title, dtype: object'

'506    Aladdin (1992)\nName: title, dtype: object'

'510    Silence of the Lambs, The (1991)\nName: title, dtype: object'

'277    Shawshank Redemption, The (1994)\nName: title, dtype: object'

'510    Silence of the Lambs, The (1991)\nName: title, dtype: object'

'97    Braveheart (1995)\nName: title, dtype: object'

In [5]:
zero = df.sample()
for col in zero.columns:
  zero[col] = 0

# Artsy

In [6]:
import random

black_swan = 81591
twelve_angry_men = 1203
ikiru = 6669
american_history_x = 2329
gattaca = 1653
a_clockwork_orange = 1206
donnie_darko = 4878
harakiri = 26082
silence_of_the_lamb = 593
beautiful_mind = 4995
rain_man = 1961
whiplash = 112552
let_the_right_one_in = 61240
goodnight_mommy = 139655
run_lola_run = 2692
pans_labyrinth = 48394

artsy = zero.copy()
artsy[black_swan] = random.randint(-30, 30)
artsy[twelve_angry_men] = random.randint(-30, 30)
artsy[ikiru] = random.randint(-30, 30)
artsy[american_history_x] = random.randint(-30, 30)
artsy[gattaca] = random.randint(-30, 30)
artsy[a_clockwork_orange] = random.randint(-30, 30)
artsy[donnie_darko] = random.randint(-30, 30)
artsy[harakiri] = random.randint(-30, 30)
artsy[silence_of_the_lamb] = random.randint(-30, 30)
artsy[beautiful_mind] = random.randint(-30, 30)
artsy[rain_man] = random.randint(-30, 30)
artsy[whiplash] = random.randint(-30, 30)
artsy[let_the_right_one_in] = random.randint(-30, 30)
artsy[goodnight_mommy] = random.randint(-30, 30)
artsy[run_lola_run] = random.randint(-30, 30)
artsy[pans_labyrinth] = random.randint(-30, 30)

display(artsy)

recs = nn_recomendations(artsy)

for rec in recs:
  title = movies_dataset[movies_dataset['movieId'] == rec[0]]['title']
  display(str(title))


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
142,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


'Nearest'

array([[213,  53, 193, 162, 608]])

"809    William Shakespeare's Romeo + Juliet (1996)\nName: title, dtype: object"

'277    Shawshank Redemption, The (1994)\nName: title, dtype: object'

'314    Forrest Gump (1994)\nName: title, dtype: object'

'510    Silence of the Lambs, The (1991)\nName: title, dtype: object'

'140    First Knight (1995)\nName: title, dtype: object'

'274    Specialist, The (1994)\nName: title, dtype: object'

"300    Muriel's Wedding (1994)\nName: title, dtype: object"

'484    Three Musketeers, The (1993)\nName: title, dtype: object'

'792    Sound of Music, The (1965)\nName: title, dtype: object'

'829    Platoon (1986)\nName: title, dtype: object'

'951    Chinatown (1974)\nName: title, dtype: object'

'987    This Is Spinal Tap (1984)\nName: title, dtype: object'

'1076    Sneakers (1992)\nName: title, dtype: object'

'1146    Grosse Pointe Blank (1997)\nName: title, dtype: object'

'1243    Gattaca (1997)\nName: title, dtype: object'

'1806    Romancing the Stone (1984)\nName: title, dtype: object'

'2027    Arachnophobia (1990)\nName: title, dtype: object'

'3673    M*A*S*H (a.k.a. MASH) (1970)\nName: title, dtype: object'

'4795    Last Samurai, The (2003)\nName: title, dtype: object'

'1191    George of the Jungle (1997)\nName: title, dtype: object'

'1209    Air Force One (1997)\nName: title, dtype: object'

'1224    Game, The (1997)\nName: title, dtype: object'

'1284    Good Will Hunting (1997)\nName: title, dtype: object'

'1322    As Good as It Gets (1997)\nName: title, dtype: object'

'9    GoldenEye (1995)\nName: title, dtype: object'

'217    Interview with the Vampire: The Vampire Chroni...\nName: title, dtype: object'

'257    Pulp Fiction (1994)\nName: title, dtype: object'

'277    Shawshank Redemption, The (1994)\nName: title, dtype: object'

'314    Forrest Gump (1994)\nName: title, dtype: object'

'398    Fugitive, The (1993)\nName: title, dtype: object'

'508    Dances with Wolves (1990)\nName: title, dtype: object'

"591    Heaven's Prisoners (1996)\nName: title, dtype: object"

'869    Return of Martin Guerre, The (Retour de Martin...\nName: title, dtype: object'

'872    Tin Drum, The (Blechtrommel, Die) (1979)\nName: title, dtype: object'