In [2]:
import sys

sys.executable

'/Users/marcelc/Documents/10_sem/pisr/pro/pisr_venv/bin/python'

In [3]:
import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from surprise import Dataset, SVD, Reader
from surprise.model_selection import cross_validate

pd.set_option('display.max_columns', None)

In [70]:
credits = pd.read_csv('../data/credits.csv')  # 45_476
keywords = pd.read_csv('../data/keywords.csv')  # 46_419
links_small = pd.read_csv('../data/links_small.csv')  # 9_125
# links = pd.read_csv('../data/links.csv')  # 45_843
movies = pd.read_csv('../data/movies_metadata.csv')  # 45_466
ratings_small = pd.read_csv('../data/ratings_small.csv')  # 100_004
# ratings = pd.read_csv('../data/ratings.csv')  # 26_024_289

### Credits dataframe preparation

In [71]:
credits['cast'] = credits['cast']\
    .apply(literal_eval)\
    .apply(lambda x: [i['name'].replace(' ', '_').lower() for i in x[:10]])

credits['crew'] = credits['crew']\
    .apply(literal_eval)\
    .apply(lambda x: [i['name'].replace(' ', '_').lower() for i in x[:10]])

credits = credits.rename(columns={'id': 'tmdb_id'})

In [72]:
print(credits.info())
credits.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45476 entries, 0 to 45475
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   cast     45476 non-null  object
 1   crew     45476 non-null  object
 2   tmdb_id  45476 non-null  int64 
dtypes: int64(1), object(2)
memory usage: 1.0+ MB
None


Unnamed: 0,cast,crew,tmdb_id
0,"[tom_hanks, tim_allen, don_rickles, jim_varney...","[john_lasseter, joss_whedon, andrew_stanton, j...",862
1,"[robin_williams, jonathan_hyde, kirsten_dunst,...","[larry_j._franco, jonathan_hensleigh, james_ho...",8844
2,"[walter_matthau, jack_lemmon, ann-margret, sop...","[howard_deutch, mark_steven_johnson, mark_stev...",15602
3,"[whitney_houston, angela_bassett, loretta_devi...","[forest_whitaker, ronald_bass, ronald_bass, ez...",31357
4,"[steve_martin, diane_keaton, martin_short, kim...","[alan_silvestri, elliot_davis, nancy_meyers, n...",11862


### Keywords dataframe preparation

In [73]:
keywords['keywords'] = keywords['keywords']\
    .apply(literal_eval)\
    .apply(lambda x: [i['name'].replace(' ', '_').lower() for i in x[:10]])

keywords = keywords.rename(columns={'id': 'tmdb_id'})

In [74]:
print(keywords.info())
keywords.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 46419 entries, 0 to 46418
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   tmdb_id   46419 non-null  int64 
 1   keywords  46419 non-null  object
dtypes: int64(1), object(1)
memory usage: 725.4+ KB
None


Unnamed: 0,tmdb_id,keywords
0,862,"[jealousy, toy, boy, friendship, friends, riva..."
1,8844,"[board_game, disappearance, based_on_children'..."
2,15602,"[fishing, best_friend, duringcreditsstinger, o..."
3,31357,"[based_on_novel, interracial_relationship, sin..."
4,11862,"[baby, midlife_crisis, confidence, aging, daug..."


### Links dataframe preparation

In [76]:
links_small.columns = ['movie_id', 'imbd_id', 'tmdb_id']
links_small = links_small.dropna()

links_small['tmdb_id'] = links_small['tmdb_id'].astype('int')
links_small['movie_id'] = links_small['movie_id'].astype('int')

In [77]:
print(links_small.info())
links_small.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9112 entries, 0 to 9124
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype
---  ------    --------------  -----
 0   movie_id  9112 non-null   int64
 1   imbd_id   9112 non-null   int64
 2   tmdb_id   9112 non-null   int64
dtypes: int64(3)
memory usage: 284.8 KB
None


Unnamed: 0,movie_id,imbd_id,tmdb_id
0,1,114709,862
1,2,113497,8844
2,3,113228,15602
3,4,114885,31357
4,5,113041,11862


### Movies dataframe preparation

In [78]:
to_drop = list(movies[
    movies['id'].isnull()
].index)

movies = movies.drop(to_drop)
movies = movies.drop([19730, 29503, 35587])

movies['id'] = movies['id'].astype('int')

movies = movies.rename(columns={'id': 'tmdb_id'})

movies['genres'] = movies['genres']\
    .apply(literal_eval)\
    .apply(lambda x: [i['name'] for i in x])

movies['year'] = movies['release_date']\
    .apply(lambda x: str(x).split('-')[0] if x != np.nan else np.nan)

movies['tagline'] = movies['tagline'].fillna('')
movies['overview'] = movies['overview'].fillna('')

movies['description'] = movies['overview'] + movies['tagline']
movies['description'] = movies['description'].fillna('')

movies = pd.merge(movies, credits, on='tmdb_id')
movies = pd.merge(movies, keywords, on='tmdb_id')

movies = movies.drop_duplicates('tmdb_id').shape

# metadata['belongs_to_collection'].fillna('{\'name\': \'\'}', inplace=True)
# metadata['collection'] = metadata.progress_apply(lambda row: literal_eval(row['belongs_to_collection'])['name'], axis=1)

In [142]:
print(movies.info())
movies.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 45432 entries, 0 to 46627
Data columns (total 29 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   adult                  45432 non-null  object 
 1   belongs_to_collection  4488 non-null   object 
 2   budget                 45432 non-null  object 
 3   genres                 45432 non-null  object 
 4   homepage               7774 non-null   object 
 5   tmdb_id                45432 non-null  int64  
 6   imdb_id                45415 non-null  object 
 7   original_language      45421 non-null  object 
 8   original_title         45432 non-null  object 
 9   overview               45432 non-null  object 
 10  popularity             45429 non-null  object 
 11  poster_path            45046 non-null  object 
 12  production_companies   45429 non-null  object 
 13  production_countries   45429 non-null  object 
 14  release_date           45345 non-null  object 
 15  re

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,tmdb_id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,year,description,cast,crew,keywords
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[Animation, Comedy, Family]",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",21.946943,/rhIRbceoE9lR4veEXuwCC2wARtG.jpg,"[{'name': 'Pixar Animation Studios', 'id': 3}]","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-10-30,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0,1995,"Led by Woody, Andy's toys live happily in his ...","[tom_hanks, tim_allen, don_rickles, jim_varney...","[john_lasseter, joss_whedon, andrew_stanton, j...","[jealousy, toy, boy, friendship, friends, riva..."
1,False,,65000000,"[Adventure, Fantasy, Family]",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,17.015539,/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg,"[{'name': 'TriStar Pictures', 'id': 559}, {'na...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-15,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0,1995,When siblings Judy and Peter discover an encha...,"[robin_williams, jonathan_hyde, kirsten_dunst,...","[larry_j._franco, jonathan_hensleigh, james_ho...","[board_game, disappearance, based_on_children'..."
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[Romance, Comedy]",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,11.7129,/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg,"[{'name': 'Warner Bros.', 'id': 6194}, {'name'...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-22,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0,1995,A family wedding reignites the ancient feud be...,"[walter_matthau, jack_lemmon, ann-margret, sop...","[howard_deutch, mark_steven_johnson, mark_stev...","[fishing, best_friend, duringcreditsstinger, o..."
3,False,,16000000,"[Comedy, Drama, Romance]",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",3.859495,/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg,[{'name': 'Twentieth Century Fox Film Corporat...,"[{'iso_3166_1': 'US', 'name': 'United States o...",1995-12-22,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0,1995,"Cheated on, mistreated and stepped on, the wom...","[whitney_houston, angela_bassett, loretta_devi...","[forest_whitaker, ronald_bass, ronald_bass, ez...","[based_on_novel, interracial_relationship, sin..."
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,[Comedy],,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,8.387519,/e64sOI48hQXyru7naBFyssKFxVd.jpg,"[{'name': 'Sandollar Productions', 'id': 5842}...","[{'iso_3166_1': 'US', 'name': 'United States o...",1995-02-10,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0,1995,Just when George Banks has recovered from his ...,"[steve_martin, diane_keaton, martin_short, kim...","[alan_silvestri, elliot_davis, nancy_meyers, n...","[baby, midlife_crisis, confidence, aging, daug..."


In [146]:
movies.head().transpose()

Unnamed: 0,0,1,2,3,4
adult,False,False,False,False,False
belongs_to_collection,"{'id': 10194, 'name': 'Toy Story Collection', ...",,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",,"{'id': 96871, 'name': 'Father of the Bride Col..."
budget,30000000,65000000,0,16000000,0
genres,"[Animation, Comedy, Family]","[Adventure, Fantasy, Family]","[Romance, Comedy]","[Comedy, Drama, Romance]",[Comedy]
homepage,http://toystory.disney.com/toy-story,,,,
tmdb_id,862,8844,15602,31357,11862
imdb_id,tt0114709,tt0113497,tt0113228,tt0114885,tt0113041
original_language,en,en,en,en,en
original_title,Toy Story,Jumanji,Grumpier Old Men,Waiting to Exhale,Father of the Bride Part II
overview,"Led by Woody, Andy's toys live happily in his ...",When siblings Judy and Peter discover an encha...,A family wedding reignites the ancient feud be...,"Cheated on, mistreated and stepped on, the wom...",Just when George Banks has recovered from his ...


### Ratings dataframe preparation

In [147]:
ratings_small.columns = ['user_id', 'movie_id', 'rating', 'timestamp']

In [148]:
print(ratings_small.info())
ratings_small.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100004 entries, 0 to 100003
Data columns (total 4 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   user_id    100004 non-null  int64  
 1   movie_id   100004 non-null  int64  
 2   rating     100004 non-null  float64
 3   timestamp  100004 non-null  int64  
dtypes: float64(1), int64(3)
memory usage: 3.1 MB
None


Unnamed: 0,user_id,movie_id,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


### Simple recomendation

IMDB's weighted rating formula:

$WR = (\frac{v}{v + m} . R) + (\frac{m}{v + m} . C)$
```
where,
    v is the number of votes for the movie
    m is the minimum votes required to be listed in the chart
    R is the average rating of the movie
    C is the mean vote across the whole report
```

In [149]:
# v
vote_counts = movies[movies['vote_count'].notnull()]['vote_count'].astype('int')

# R
vote_averages = movies[movies['vote_average'].notnull()]['vote_average']

# C
C = vote_averages.mean()

# m
m = vote_counts.quantile(0.9)

# print(f'v: {vote_counts}')
# print(f'R: {vote_averages}')
print(f'C: {C}')
print(f'm: {m}')

C: 5.61845297056946
m: 160.0


In [150]:
to_recommend = movies[
    (movies['vote_count'] >= m) & 
    (movies['vote_count'].notnull()) & 
    (movies['vote_average'].notnull())
][['title', 'year', 'original_language','vote_count', 
   'vote_average', 'popularity', 'genres']]

to_recommend.rename(columns={'original_language': 'language'}, inplace=True)

to_recommend['vote_count'] = to_recommend['vote_count'].astype('int')
to_recommend.shape

(4551, 7)

In [151]:
def weighted_rating(x):
    v = x['vote_count']
    R = x['vote_average']
    wr = (v/(v+m) * R) + (m/(m+v) * C)
    return wr


to_recommend['weighted_rating'] = to_recommend.apply(weighted_rating, axis=1)

to_recommend = to_recommend.sort_values('weighted_rating', ascending=False)

In [152]:
to_recommend.head(20)

Unnamed: 0,title,year,language,vote_count,vote_average,popularity,genres,weighted_rating
314,The Shawshank Redemption,1994,en,8358,8.5,51.645403,"[Drama, Crime]",8.445874
841,The Godfather,1972,en,6024,8.5,41.109264,"[Drama, Crime]",8.425445
10397,Dilwale Dulhania Le Jayenge,1995,hi,661,9.1,34.457024,"[Comedy, Drama, Romance]",8.421501
12589,The Dark Knight,2008,en,12269,8.3,123.167259,"[Drama, Action, Crime, Thriller]",8.26548
2870,Fight Club,1999,en,9678,8.3,63.869599,[Drama],8.256389
292,Pulp Fiction,1994,en,8670,8.3,140.950236,"[Thriller, Crime]",8.25141
522,Schindler's List,1993,en,4436,8.3,41.725123,"[Drama, History, War]",8.206648
23868,Whiplash,2014,en,4376,8.3,64.29999,[Drama],8.205413
5529,Spirited Away,2001,ja,3968,8.3,41.048867,"[Fantasy, Adventure, Animation, Family]",8.196064
2231,Life Is Beautiful,1997,it,3643,8.3,39.39497,"[Comedy, Drama]",8.187182


### Collaborative Filtering

In [153]:
# reader = Reader(rating_scale=(0.5, 5)) 
reader = Reader()

ratings_df = Dataset.load_from_df(ratings_small[['user_id', 'movie_id', 'rating']], reader)

svd = SVD()

# Run 5-fold cross-validation and print results
cross_validate(svd, ratings_df, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8926  0.8982  0.8939  0.9047  0.8902  0.8959  0.0051  
MAE (testset)     0.6875  0.6944  0.6861  0.6959  0.6856  0.6899  0.0044  
Fit time          5.00    4.92    4.96    5.01    5.06    4.99    0.05    
Test time         0.14    0.13    0.13    0.14    0.13    0.13    0.00    


{'test_rmse': array([0.89255349, 0.89818893, 0.89390678, 0.90469343, 0.8901895 ]),
 'test_mae': array([0.68751276, 0.69436022, 0.6860504 , 0.69594614, 0.68555857]),
 'fit_time': (5.0014848709106445,
  4.924375057220459,
  4.9615113735198975,
  5.008440971374512,
  5.059353828430176),
 'test_time': (0.13767337799072266,
  0.12867379188537598,
  0.13202404975891113,
  0.13726019859313965,
  0.13040566444396973)}

In [154]:
trainset = ratings_df.build_full_trainset()

svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x13e1fc130>

### Content-based

In [155]:
movies_in_links = movies[
    movies['tmdb_id'].isin(links_small['tmdb_id'])
]

movies_in_links.shape

(9082, 29)

In [163]:
movies_in_links = movies_in_links.reset_index()
titles = movies_in_links['title']
indices = pd.Series(movies_in_links.index, index=movies_in_links['title'])

indices.head()

title
Toy Story                      0
Jumanji                        1
Grumpier Old Men               2
Waiting to Exhale              3
Father of the Bride Part II    4
dtype: int64

### C-B using movie overviews and taglines (description)

In [157]:
tf = TfidfVectorizer(
    analyzer='word', 
    ngram_range=(1, 2), 
    min_df=0, 
    stop_words='english'
)

tfidf_matrix = tf.fit_transform(movies_in_links['description'])

In [158]:
tfidf_matrix.shape

(9082, 268124)

In [164]:
# http://scikit-learn.org/stable/modules/metrics.html#linear-kernel
cosine_sim_desc = linear_kernel(tfidf_matrix, tfidf_matrix)

cosine_sim_desc.shape

(9082, 9082)

### C-B using movie cast and crew

In [161]:
movies_in_links['cast_crew'] = (movies_in_links['cast'] + movies_in_links['crew']).apply(lambda x: ' '.join(x))

In [169]:
tf = TfidfVectorizer(
    analyzer='word', 
    ngram_range=(1, 2), 
    min_df=0, 
    stop_words='english'
)

tfidf_matrix = tf.fit_transform(movies_in_links['cast_crew'])

In [170]:
cosine_sim_cc = linear_kernel(tfidf_matrix, tfidf_matrix)

cosine_sim_cc.shape

(9082, 9082)

### Hybrid recommendation system

In [171]:
id_map = links_small[['movie_id', 'tmdb_id']]
id_map = id_map.merge(movies_in_links[['title', 'tmdb_id']], on='tmdb_id').set_index('title')

In [172]:
indices_map = id_map.set_index('tmdb_id')

In [173]:
# cosine_sim = cosine_sim_desc
cosine_sim = cosine_sim_cc

def hybrid(user_id, title):
    idx = indices[title]
    tmdb_id = id_map.loc[title]['tmdb_id']
    movie_id = id_map.loc[title]['movie_id']

    sim_scores = list(enumerate(cosine_sim[int(idx)]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:30]

    movie_indices = [i[0] for i in sim_scores]
    movies = movies_in_links.iloc[movie_indices][['title', 'genres', 'vote_count', 'vote_average', 'release_date', 'tmdb_id']]
    movies['est'] = movies['tmdb_id'].apply(lambda x: svd.predict(user_id, indices_map.loc[x]['movie_id']).est)
    movies = movies.sort_values('est', ascending=False)

    return movies.head(10)

In [186]:
hybrid(1, 'Toy Story')[['title', 'genres']]

Unnamed: 0,title,genres
2501,Toy Story 2,"[Animation, Comedy, Family]"
3798,"Monsters, Inc.","[Animation, Comedy, Family]"
7531,Toy Story 3,"[Animation, Family, Comedy]"
6961,WALL·E,"[Animation, Family]"
4602,Finding Nemo,"[Animation, Family]"
6175,Serenity,"[Science Fiction, Action, Adventure, Thriller]"
4294,My Dinner with André,"[Comedy, Drama]"
7216,Up,"[Animation, Comedy, Family, Adventure]"
7163,Dr. Horrible's Sing-Along Blog,"[Adventure, Action, Comedy, Science Fiction, M..."
1862,A Bug's Life,"[Adventure, Animation, Comedy, Family]"


In [183]:
hybrid(7, 'Toy Story')[['title', 'genres']]

Unnamed: 0,title,genres
6175,Serenity,"[Science Fiction, Action, Adventure, Thriller]"
7531,Toy Story 3,"[Animation, Family, Comedy]"
7163,Dr. Horrible's Sing-Along Blog,"[Adventure, Action, Comedy, Science Fiction, M..."
2501,Toy Story 2,"[Animation, Comedy, Family]"
6961,WALL·E,"[Animation, Family]"
4602,Finding Nemo,"[Animation, Family]"
7359,Partly Cloudy,"[Animation, Family]"
3798,"Monsters, Inc.","[Animation, Comedy, Family]"
7216,Up,"[Animation, Comedy, Family, Adventure]"
2070,Goodbye Lover,"[Thriller, Comedy, Crime, Mystery]"


In [182]:
hybrid(1, 'Avatar')[['title', 'genres']]

Unnamed: 0,title,genres
8600,Guardians of the Galaxy,"[Action, Science Fiction, Adventure]"
8749,Avengers: Age of Ultron,"[Action, Adventure, Science Fiction]"
953,Aliens,"[Horror, Action, Thriller, Science Fiction]"
901,The Abyss,"[Adventure, Action, Thriller, Science Fiction]"
522,Terminator 2: Judgment Day,"[Action, Thriller, Science Fiction]"
990,The Terminator,"[Action, Thriller, Science Fiction]"
4288,Galaxy of Terror,"[Action, Adventure, Horror, Science Fiction]"
7864,The Avengers,"[Science Fiction, Action, Adventure]"
6427,Pirates of the Caribbean: Dead Man's Chest,"[Adventure, Fantasy, Action]"
3338,Battle Beyond the Stars,[Science Fiction]


In [181]:
hybrid(7, 'Avatar')[['title', 'genres']]

Unnamed: 0,title,genres
8600,Guardians of the Galaxy,"[Action, Science Fiction, Adventure]"
990,The Terminator,"[Action, Thriller, Science Fiction]"
953,Aliens,"[Horror, Action, Thriller, Science Fiction]"
522,Terminator 2: Judgment Day,"[Action, Thriller, Science Fiction]"
7864,The Avengers,"[Science Fiction, Action, Adventure]"
8749,Avengers: Age of Ultron,"[Action, Adventure, Science Fiction]"
6427,Pirates of the Caribbean: Dead Man's Chest,"[Adventure, Fantasy, Action]"
901,The Abyss,"[Adventure, Action, Thriller, Science Fiction]"
3338,Battle Beyond the Stars,[Science Fiction]
6315,The Cutting Edge: The Magic of Movie Editing,[Documentary]


In [50]:
to_recommend.head(20)

Unnamed: 0,title,year,language,vote_count,vote_average,popularity,genres,weighted_rating
314,The Shawshank Redemption,1994,en,8358,8.5,51.645403,"[Drama, Crime]",8.445869
834,The Godfather,1972,en,6024,8.5,41.109264,"[Drama, Crime]",8.425439
10309,Dilwale Dulhania Le Jayenge,1995,hi,661,9.1,34.457024,"[Comedy, Drama, Romance]",8.421453
12481,The Dark Knight,2008,en,12269,8.3,123.167259,"[Drama, Action, Crime, Thriller]",8.265477
2843,Fight Club,1999,en,9678,8.3,63.869599,[Drama],8.256385
292,Pulp Fiction,1994,en,8670,8.3,140.950236,"[Thriller, Crime]",8.251406
522,Schindler's List,1993,en,4436,8.3,41.725123,"[Drama, History, War]",8.206639
23673,Whiplash,2014,en,4376,8.3,64.29999,[Drama],8.205404
5481,Spirited Away,2001,ja,3968,8.3,41.048867,"[Fantasy, Adventure, Animation, Family]",8.196055
2211,Life Is Beautiful,1997,it,3643,8.3,39.39497,"[Comedy, Drama]",8.187171
