## Movie Sequel ratings

In [None]:
# Packages:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from matplotlib.colors import LinearSegmentedColormap, to_hex
import re
import plotly.figure_factory as ff

# Display options:
pd.set_option("display.width", 1200)
pd.set_option("display.max_columns", 300)
pd.set_option("display.max_rows", 300)

## Data

In [2]:
df_franchises = pd.read_csv("Movie_Franchises.csv")

In [3]:
df_franchises.head(10)

Unnamed: 0,Franchise,Movie
0,Mad Max,Mad Max
1,Mad Max,The Road Warrior
2,Mad Max,Mad Max Beyond Thunderdome
3,Mad Max,Mad Max: Fury Road
4,The Lord of the Rings,The Lord of the Rings: The Fellowship of the Ring
5,The Lord of the Rings,The Lord of the Rings: The Two Towers
6,The Lord of the Rings,The Lord of the Rings: The Return of the King
7,The Hobbit,The Hobbit: An Unexpected Journey
8,The Hobbit,The Hobbit: The Desolation of Smaug
9,The Hobbit,The Hobbit: The Battle of the Five Armies


In [4]:
df_rating = pd.read_csv("title.ratings.tsv", sep = "\t")

In [5]:
df_rating

Unnamed: 0,tconst,averageRating,numVotes
0,tt0000001,5.7,1887
1,tt0000002,5.9,250
2,tt0000003,6.5,1676
3,tt0000004,5.8,163
4,tt0000005,6.2,2495
...,...,...,...
1252703,tt9916690,6.5,6
1252704,tt9916720,5.3,223
1252705,tt9916730,8.4,6
1252706,tt9916766,6.7,20


In [6]:
df_titles = pd.read_csv("title.basics_filtered.csv")
df_titles["movie_year"] = df_titles["movie_year"].apply(pd.to_numeric, errors = "coerce")
df_titles = df_titles.dropna()

In [7]:
df_titles

Unnamed: 0,movie_id,movie_title_main,movie_title_original,movie_year
0,tt0000502,Bohemios,Bohemios,1905.0
1,tt0000574,The Story of the Kelly Gang,The Story of the Kelly Gang,1906.0
2,tt0000591,The Prodigal Son,L'enfant prodigue,1907.0
3,tt0000615,Robbery Under Arms,Robbery Under Arms,1907.0
4,tt0000630,Hamlet,Amleto,1908.0
...,...,...,...,...
279282,tt9916270,Il talento del calabrone,Il talento del calabrone,2020.0
279283,tt9916362,Coven,Akelarre,2020.0
279284,tt9916428,The Secret of China,Hong xing zhao yao Zhong guo,2019.0
279285,tt9916538,Kuambil Lagi Hatiku,Kuambil Lagi Hatiku,2019.0


In [8]:
df_movies = pd.merge(left = df_titles, right = df_rating, left_on = "movie_id", right_on = "tconst", how = "left")
df_movies = df_movies.loc[:, ~df_movies.columns.isin(["tconst", "movie_id"])]

In [9]:
df_movies

Unnamed: 0,movie_title_main,movie_title_original,movie_year,averageRating,numVotes
0,Bohemios,Bohemios,1905.0,4.5,14
1,The Story of the Kelly Gang,The Story of the Kelly Gang,1906.0,6.0,772
2,The Prodigal Son,L'enfant prodigue,1907.0,4.5,18
3,Robbery Under Arms,Robbery Under Arms,1907.0,4.5,23
4,Hamlet,Amleto,1908.0,3.9,25
...,...,...,...,...,...
279243,Il talento del calabrone,Il talento del calabrone,2020.0,5.8,1340
279244,Coven,Akelarre,2020.0,6.4,4697
279245,The Secret of China,Hong xing zhao yao Zhong guo,2019.0,3.8,14
279246,Kuambil Lagi Hatiku,Kuambil Lagi Hatiku,2019.0,8.3,6


In [10]:
df_franchises = df_franchises.merge(df_movies, left_on = "Movie", right_on = "movie_title_main", how = "left")
df_franchises = df_franchises.loc[:, ~df_franchises.columns.isin(["movie_title_main", "movie_title_original"])]

In [11]:
df_franchises

Unnamed: 0,Franchise,Movie,movie_year,averageRating,numVotes
0,Mad Max,Mad Max,1979.0,6.8,204547.0
1,Mad Max,The Road Warrior,1981.0,7.6,178116.0
2,Mad Max,Mad Max Beyond Thunderdome,1985.0,6.2,135391.0
3,Mad Max,Mad Max: Fury Road,2015.0,8.1,973185.0
4,The Lord of the Rings,The Lord of the Rings: The Fellowship of the Ring,2001.0,8.8,1804967.0
...,...,...,...,...,...
336,John Wick,John Wick: Chapter 3 - Parabellum,2019.0,7.4,328069.0
337,The Matrix,The Matrix,1999.0,8.7,1864956.0
338,The Matrix,The Matrix Reloaded,2003.0,7.2,586141.0
339,The Matrix,The Matrix Revolutions,2003.0,6.7,505906.0
