# Getting Started

In [None]:
import pandas as pd
import numpy as np

In [None]:
import nltk
nltk.download('stopwords')
nltk.download('wordnet')

from nltk.tokenize import word_tokenize
from nltk import WordNetLemmatizer

from nltk.stem import PorterStemmer
stemmer = PorterStemmer()

from textblob import TextBlob
from wordcloud import WordCloud

import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')
import re

# Metacritic Reviews Overview

In [None]:
df_review= pd.read_csv('metacritic_reviews.csv') 
df_review = df_review[['review_date','individual_meta_score']].dropna(axis=0, how='any')
df_review['review_date'] = pd.to_datetime(df_review['review_date'])
df_review = df_review.set_index('review_date')

In [None]:
graph_1 = df_review.groupby(df_review.index.map(lambda x: x.year)).agg({'individual_meta_score':['count']}).plot.bar(color='#7393BC', width = 0.5)

graph_1.legend(["Review", ""])

# Data Cleaning 

In [None]:
df= pd.read_csv('metacritic_reviews.csv')

In [None]:
### drop nan values 
df = df.dropna(axis=0, how='any')

In [None]:
df_critics = df.groupby(['media','critic_name']).agg({'individual_meta_score':['mean','count']})

In [None]:
df['text'] = df['text'].str.lower()
df['text'] = df['text'].apply(lambda x: re.sub('\s+', ' ', x))

In [None]:
df['text'] = df['text'].apply(lambda x: re.sub('[^\w\s]', '', x))

In [None]:
from nltk.corpus import stopwords
stop = stopwords.words('english')+['movie','make','film','one','even','films','movies','way','makes','made','thats']
df['text'] = df['text'].apply(lambda text: " ".join(word for word in text.split() if word not in stop))

In [None]:
df['review_date'] = pd.to_datetime(df['review_date'])
df

# Word Cloud

In [None]:

#df['release_date'] = pd.to_datetime(df['release_date'])
mask1 = (df['review_date'] > '1/1/2009') & (df['review_date'] <= '1/1/2019') & (df['metascore'] > 70)
mask2 = (df['review_date'] > '1/1/2009') & (df['review_date'] <= '1/1/2019') & (df['metascore'] < 30)
mask3 = (df['review_date'] > '1/1/2009') & (df['review_date'] <= '1/1/2019') & (df['metascore'] > 30) & (df['metascore'] < 70)

In [None]:
wc1 = WordCloud(background_color="white", max_words=2000, width=1600, height=800)
# generate word cloud
wc1.generate(' '.join(df.loc[mask1]['text']))
plt.figure(figsize=(12, 6))
plt.imshow(wc1, interpolation='bilinear')
plt.axis("off")
plt.show()

In [None]:
wc2 = WordCloud(background_color="white", max_words=800, width=1600, height=800)
# generate word cloud
wc2.generate(' '.join(df.loc[mask2]['text']))
plt.figure(figsize=(12, 6))
plt.imshow(wc2, interpolation='bilinear')
plt.axis("off")
plt.show()

In [None]:
# wc3 = WordCloud(background_color="white", max_words=2000, width=800, height=400)
# # generate word cloud
# wc.generate(' '.join(df.loc[mask3]['text']))
# plt.figure(figsize=(12, 6))
# plt.imshow(wc3, interpolation='bilinear')
# plt.axis("off")
# plt.show()

# Sentiment Analysis 

In [None]:
df_positive = df.loc[df.individual_meta_score > 70]
df_negative = df.loc[df.individual_meta_score < 30]
sample_size = 2000

def sentiment_func(x):
    sentiment = TextBlob(x['text'])
    x['polarity'] = sentiment.polarity
    x['subjectivity'] = sentiment.subjectivity
    return x

sample_1 = df_positive.sample(sample_size).apply(sentiment_func, axis=1)
sample_2 = df_negative.sample(sample_size).apply(sentiment_func, axis=1)


sample_1.plot.scatter('individual_meta_score', 'polarity')

In [None]:
sample_2.plot.scatter('individual_meta_score', 'polarity')

In [None]:
import plotly.figure_factory as ff

fig = ff.create_distplot([sample_1.polarity,sample_2.polarity ], ['Metascore>70','Metascore<30'], bin_size=.15)
fig.show()

# Media 

In [None]:
%matplotlib inline 
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
plt.style.use('ggplot')

In [None]:
df_graph_1 = df.groupby([df['review_date'].map(lambda x: x.year),'media']).agg({'individual_meta_score':['mean','count']})
df_graph_1

In [None]:
years = df_graph_1.index.get_level_values(0).tolist()

In [None]:
year_list = df_date.index.year.unique().tolist()
year_list.sort()
year_list

# Critic's Review Quantity

In [None]:
# !pip install plotly_express
# !pip install --upgrade plotly
import plotly
plotly.offline.init_notebook_mode(connected=True)
import plotly.graph_objects as go
from plotly.graph_objs import Figure, Histogram, Layout
import plotly.express as px

In [None]:
df_graph_3 = df.groupby(df['critic_name']).agg({'individual_meta_score':['count','mean']})

In [None]:
df_temp = df[['critic_name', 'individual_meta_score']]

In [None]:
res1 = df_temp.groupby('critic_name').agg(['count', 'mean'])

In [None]:
df_mean = pd.Series(res1.unstack()['individual_meta_score']['mean'], name = 'mean_score')

In [None]:
df_count = pd.Series(res1.unstack()['individual_meta_score']['count'], name = 'count')
pd.concat([df_mean, df_count], axis=1)
pd_combined.plot.scatter(x = 'count', y = 'mean_score', )

In [None]:
plt.figure(figsize=(16,8))
plt.scatter(pd_combined['count'], pd_combined['mean_score'])
plt.xscale('log')
# plt.xlim(0, 500)
# plt.ylim(-1, 1)

# Review Date Analysis

In [None]:
import plotly.express as px

df_joker = df.loc[df.movie_title=='Joker',:].sort_values('review_date')
df_joker.groupby(df['review_date'])
df_joker['individual_meta_score'].mean()

In [None]:
fig = px.scatter(df_joker, x="review_date", y="individual_meta_score",marginal_x="histogram",marginal_y="histogram",size='individual_meta_score')
 
fig.update_layout(title = 'Joker')


fig.show()

In [None]:
import random

random_name = random.choice(df.movie_title.tolist())
print(random_name)

df.ramdom = df.loc[df.movie_title==random.choice(df.movie_title.tolist()),:]
fig = px.scatter(df.ramdom, x="review_date", y="individual_meta_score",marginal_x="histogram",marginal_y="histogram",size='individual_meta_score')
 
fig.update_layout(title = random_name)
fig.show()

In [None]:
df_100 = df.loc[df.individual_meta_score <20,:]
df_100 = df_100.replace('TBA', np.nan).dropna(axis=0, how='any')
df_100['release_date'] = pd.to_datetime(df_100['release_date'])

In [None]:
df_100['date'] = df_100['review_date'] < df_100['release_date']
print(sum(df_100['date'])/df_100.shape[0])