In [None]:
import numpy as np
import pandas as pd

In [None]:
credits = pd.read_csv('../input/tmdb-movie-metadata/tmdb_5000_credits.csv')

In [None]:
movies_df = pd.read_csv('../input/tmdb-movie-metadata/tmdb_5000_movies.csv')

In [None]:
credits.head()

In [None]:
movies_df.head()

In [None]:
print(credits.shape)
print(movies_df.shape)

Combining the dataframes

In [None]:
credits_column_renamed = credits.rename(index = str, columns={'movie_id':'id'})
movies_df_merge = movies_df.merge(credits_column_renamed, on='id')
movies_df_merge.head()

In [None]:
movies_cleaned_df = movies_df_merge.drop(columns = ['homepage','title_x','title_y','status','production_countries'])
movies_cleaned_df.head()

In [None]:
movies_cleaned_df.info()

In [None]:
v = movies_cleaned_df['vote_count']
R = movies_cleaned_df['vote_average']
C = movies_cleaned_df['vote_average'].mean()
m = movies_cleaned_df['vote_count'].quantile(0.70)

In [None]:
movies_cleaned_df['weighted_average'] = ((R*v)+(C*m))/(v+m)

In [None]:
movies_cleaned_df.head()

In [None]:
movies_sorted_ranking = movies_cleaned_df.sort_values('weighted_average',ascending=False)
movies_sorted_ranking[['original_title','vote_count','vote_average','weighted_average','popularity']].head(20)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(12,6))
axis1=sns.barplot(x=movies_sorted_ranking['weighted_average'].head(10), y=movies_sorted_ranking['original_title'].head(10), data=movies_sorted_ranking)
plt.xlim(4, 10)
plt.title('Best Movies by average votes', weight='bold')
plt.xlabel('Weighted Average Score', weight='bold')
plt.ylabel('Movie Title', weight='bold')
plt.show()

In [None]:
popularity_sorted = movies_sorted_ranking.sort_values('popularity',ascending=False)
plt.figure(figsize=(12,6))
axis2=sns.barplot(x=popularity_sorted['popularity'].head(10), y=popularity_sorted['original_title'].head(10), data=popularity_sorted)
plt.title('Best Movies by Popularity', weight='bold')
plt.xlabel('Popularity', weight='bold')
plt.ylabel('Movie Title', weight='bold')
plt.show()

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaling = MinMaxScaler()
movies_scaled_df = pd.DataFrame(scaling.fit_transform(movies_cleaned_df[['weighted_average','popularity']]),columns=['weighted_average','popularity'])

In [None]:
movies_scaled_df.head()

In [None]:
movies_cleaned_df[['normalized_weighted_average','normalized_popularity']] = movies_scaled_df

In [None]:
movies_cleaned_df.head()

In [None]:
movies_cleaned_df['score'] = movies_cleaned_df['normalized_weighted_average'] * 0.5 + movies_cleaned_df['normalized_popularity'] * 0.5
movies_scored_df = movies_cleaned_df.sort_values(['score'], ascending=False)
movies_scored_df[['original_title', 'normalized_weighted_average', 'normalized_popularity', 'score']].head(20)

In [None]:
scored_df = movies_cleaned_df.sort_values('score', ascending=False)

plt.figure(figsize=(16,6))

ax = sns.barplot(x=scored_df['score'].head(10), y=scored_df['original_title'].head(10), data=scored_df, palette='deep')

#plt.xlim(3.55, 5.25)
plt.title('Best Rated & Most Popular Blend', weight='bold')
plt.xlabel('Score', weight='bold')
plt.ylabel('Movie Title', weight='bold')
