## Importando Bibliotecas

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from plotnine import *
import plotly.express as px
import plotly.graph_objects as go
from plotly.offline import iplot
from plotly.subplots import make_subplots
import warnings
warnings.simplefilter(action='ignore')

## Leitura de Dados e Visualização de Conteúdo

In [None]:
df = pd.read_csv('games.csv')

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.dtypes

In [None]:
df['metascore'] = pd.to_numeric(df['metascore'],downcast='float')
df['user_score'] = df['user_score'].str.replace('tbd', '')
df['user_score'] = pd.to_numeric(df['user_score'],downcast='float') * 10

In [None]:
df['release_date'] = pd.to_datetime(df['release_date'])
df['year'] = df['release_date'].dt.year
df['age'] = 2023 - df['release_date'].dt.year

In [None]:
#salvando os dados do dataframe em um arquivo csv
df.to_csv('games_analiseexpl.csv', index=False)

## Análise Exploratória de Dados / Visualização de Dados

Distribuição de valores

In [None]:
fig = make_subplots(rows=2, cols=2, subplot_titles=(
    "MetaScore", "UserScore", "Plataforma", "Idade"))

fig.add_trace(
    go.Histogram(x=df['metascore'], name="nota críticos"),
    row=1, col=1
)

fig.add_trace(
    go.Histogram(x=df['user_score'], name="nota players"),
    row=1, col=2
)

fig.add_trace(
    go.Histogram(x=df['platform'], name="plataforma",texttemplate="%{y}", textfont_size=8),
    row=2, col=1
).update_xaxes(categoryorder='total descending')

fig.add_trace(
    go.Histogram(x=df['age'].astype(np.float64), name="idade (anos)"),
    row=2, col=2
).update_xaxes(categoryorder='total descending')


fig.update_layout(height=600, width=1200, title_text="Distribuição de notas")
fig.show()


Top 10 Jogos - Notas de Críticos

In [None]:
top_user_score = df.sort_values(['metascore'], ascending=False)[:10]
top_user_score[['title','user_score','platform']]

Top 10 Jogos - Nota de usuários

In [None]:
top_user_score = df.sort_values(['user_score'], ascending=False)[:10]
top_user_score[['title','user_score','platform']]

Geração com os melhores jogos

In [None]:
era = df
era['release_date'] = pd.to_datetime(era['release_date'])
era['year'] = df['release_date'].dt.year
era1 =df.groupby('year')
era2 = df.sort_values(['metascore'], ascending = False)
era2[['year','metascore']]
era2 =era.groupby('year')
era2.head()
era2['metascore'].sum().sort_values(ascending = False).reset_index()
era3 = era2['metascore'].sum().sort_values(ascending = False).reset_index()
era3.columns = ["year", "Total"] 

In [None]:
sns.catplot(x="year", y="Total", kind="bar", data=era3, height=6, aspect=3)
plt.title("Year that had the best video games ( Popularity ) ")
plt.xlabel('Year')
plt.ylabel("Popularity of Video Games ")
plt.grid(True)

Média de MetaScore por Plataforma

In [None]:
mean_by_platform = df \
    .groupby('platform', as_index = False) \
    .agg({'metascore' : 'mean'}) \
    .rename(columns = {'metascore' : 'avg_meta_score'}) \
    .sort_values('avg_meta_score', ascending = False)
mean_by_platform['avg_meta_score'] = round(mean_by_platform['avg_meta_score'], 2)

fig = go.Figure(data=[go.Bar(
            x = mean_by_platform['platform'], 
            y = mean_by_platform['avg_meta_score'],
            text = mean_by_platform['avg_meta_score'],
            marker_color = '#A6ACEC',
            textposition = 'outside',
            textfont_color = 'black',
            textfont_size = 8,
            textfont_family = 'Arial'
        )])

fig.update_layout(
    title = dict(
        y = 0.93,
        text ='Média de nota de críticos por plataforma',
        font = dict(
            color = 'black',
            size = 26,
            family = 'Arial')),
    plot_bgcolor = 'white',
    paper_bgcolor = 'white',
    showlegend = False)

fig.update_xaxes(showgrid = False, 
                 showline = True,
                 color = 'black',
                 tickangle = -90,
                 linecolor = 'black',
                 tickfont = dict(
                     color = 'black',
                     family = 'Arial')) 

fig.update_yaxes(visible = False)

fig.show()

In [None]:
mean_by_year = df \
    .groupby('year', as_index = False) \
    .agg({'metascore' : 'mean'}) \
    .rename(columns = {'metascore' : 'mean'})

fig = go.Figure(data=go.Scatter(x = mean_by_year['year'], 
                                y = mean_by_year['mean'],
                                mode = 'lines+markers',
                                marker = dict(
                                    color = 'black',
                                    size = 10),
                                line = dict(
                                    color = '#A6ACEC',
                                    width = 3)))
fig.add_shape(type = 'line',
              x0 = mean_by_year['year'].min(), y0 = round(mean_by_year['mean'].mean()), 
              x1 = mean_by_year['year'].max(), y1 = round(mean_by_year['mean'].mean()),
              line=dict(
                  color = 'crimson',
                  width = 2,
                  dash = 'dot'))

fig.add_vrect(
    x0 = 1999, 
    x1= 2009,
    y1 = 0.92,
    fillcolor = 'crimson', 
    opacity = 0.1,
    layer = 'below', 
    line_width=0,
)

fig.add_annotation(
    showarrow = False,
    x = 2004,
    y = 96,
    text = 
f''' Período em que houve crescimento de número de jogos lançados por ano''',
    font = dict(
        size = 13, 
        color = 'black', 
        family = 'Arial'),
    bordercolor = 'black',
    align = 'left',
    borderwidth = 0,
    borderpad = 1,
    bgcolor = 'white',
    opacity = 0.8
  )

fig.update_layout(
    title = dict(
    text = 'Média de nota de críticos por ano de lançamento',
    font = dict(
        color = 'black',
        size = 26,
        family = 'Arial')),
    plot_bgcolor = 'white',
    paper_bgcolor = 'white',
    showlegend = False)
fig.update_xaxes(showgrid = True, 
                 gridwidth = 1, 
                 gridcolor = '#DCDCDC', 
                 showline = True,
                 color = 'black',
                 linecolor = 'black',
                 tickfont = dict(
                     color = 'black',
                     family = 'Arial')) 
fig.update_yaxes(showgrid = True, 
                 gridwidth = 1,
                 gridcolor = '#DCDCDC',
                 showline = True,
                 color = 'black',
                 linecolor = 'black',
                 rangemode = 'tozero',
                 tickfont = dict(
                     color = 'black',
                     family = 'Arial'))
fig.show()

In [None]:
fig = px.scatter(df, x = df['user_score'], y = df['metascore'], trendline = 'ols')
fig.data[1].marker.color = '#A6ACEC'
fig.data[1].line.color = 'crimson'
fig.data[1].line.width = 3
fig.update_layout(
    title = dict(
    text = 'Razão de nota de jogadores vs nota de críticos',
    font = dict(
        color = 'black',
        size = 26,
        family = 'Arial')),
    plot_bgcolor = 'white',
    paper_bgcolor = 'white',
    showlegend = False)

fig.update_xaxes(
                 title = 'nota de jogadores',
                 showgrid = True, 
                 gridwidth = 1, 
                 gridcolor = '#DCDCDC', 
                 showline = True,
                 color = 'black',
                 linecolor = 'black',
                 tickfont = dict(
                     color = 'black',
                     family = 'Arial')) 
fig.update_yaxes(
                 title = 'nota de críticos',
                 showgrid = True, 
                 gridwidth = 1,
                 gridcolor = '#DCDCDC',
                 showline = True,
                 color = 'black',
                 linecolor = 'black',
                 rangemode = 'tozero',
                 tickfont = dict(
                     color = 'black',
                     family = 'Arial'))
fig.show()

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatter(x = [1995, 2025], 
                                y = [1, 1],
                                mode = 'lines',
                                marker = dict(
                                    color = '#A6ACEC')))
fig.add_trace(go.Scatter(x = [2000, 2005, 2013, 2020], 
                                y = [1, 1, 1, 1],
                                mode = 'markers',
                                marker = dict(
                                    color = 'crimson',
                                    size = 10)))

fig.add_annotation(    
    x = 1997.5,
    y = 1,
    text = 'era_1',
    yanchor = 'bottom',
    arrowhead = 5,
    arrowwidth = 1,
    arrowcolor = 'black',
    ax = 0,
    ay = -20,
    font = dict(
        size = 12, 
        color = 'black', 
        family = 'Arial'),
    bordercolor='black',
    align= 'center',
    borderwidth = 1,
    borderpad = 4,
    bgcolor = '#badbda'
  )

fig.add_annotation(    
    x = 2002.5,
    y = 1,
    text = 'era_2',
    yanchor = 'bottom',
    arrowhead = 5,
    arrowwidth = 1,
    arrowcolor = 'black',
    ax = 0,
    ay = -20,
    font = dict(
        size = 12, 
        color = 'black', 
        family = 'Arial'),
    bordercolor='black',
    align= 'center',
    borderwidth = 1,
    borderpad = 4,
    bgcolor = '#badbda'
  )

fig.add_annotation(    
    x = 2008.5,
    y = 1,
    text = 'era_3',
    yanchor = 'bottom',
    arrowhead = 5,
    arrowwidth = 1,
    arrowcolor = 'black',
    ax = 0,
    ay = -20,
    font = dict(
        size = 12, 
        color = 'black', 
        family = 'Arial'),
    bordercolor='black',
    align= 'center',
    borderwidth = 1,
    borderpad = 4,
    bgcolor = '#badbda'
  )

fig.add_annotation(    
    x = 2016.5,
    y = 1,
    text = 'era_4',
    yanchor = 'bottom',
    arrowhead = 5,
    arrowwidth = 1,
    arrowcolor = 'black',
    ax = 0,
    ay = -20,
    font = dict(
        size = 12, 
        color = 'black', 
        family = 'Arial'),
    bordercolor='black',
    align= 'center',
    borderwidth = 1,
    borderpad = 4,
    bgcolor = '#badbda'
  )
fig.add_annotation(    
    x = 2022.5,
    y = 1,
    text = 'era_5',
    yanchor = 'bottom',
    arrowhead = 5,
    arrowwidth = 1,
    arrowcolor = 'black',
    ax = 0,
    ay = -20,
    font = dict(
        size = 12, 
        color = 'black', 
        family = 'Arial'),
    bordercolor='black',
    align= 'center',
    borderwidth = 1,
    borderpad = 4,
    bgcolor = '#badbda'
  )

fig.update_layout(title = dict(
    text ='As eras dos vídeo-games',
    font = dict(
        color = 'black',
        size = 26,
        family = 'Arial')),
    plot_bgcolor = 'white',
    paper_bgcolor = 'white',
    showlegend = False)
fig.update_xaxes(showgrid = True, 
                 gridwidth = 1, 
                 gridcolor = '#DCDCDC', 
                 showline = True,
                 color = 'black',
                 linecolor = 'black',
                 dtick = 1,
                 tickangle = -60,
                 tickfont = dict(
                     color = 'black',
                     family = 'Arial')) 
fig.update_yaxes(showgrid = True, 
                 gridwidth = 1,
                 gridcolor = '#DCDCDC',
                 showline = True,
                 color = 'black',
                 linecolor = 'black',
                 tickfont = dict(
                     color = 'white',
                     family = 'Arial'))
fig.show()

In [None]:
def era(data):
    if 1995 <= data <= 1999:
        x = 'era_1'
    elif 2000 <= data <= 2004:
        x = 'era_2'
    elif 2005 <= data <= 2012:
        x = 'era_3'
    elif 2013 <= data <= 2019:
        x = 'era_4'
    else:
        x = 'era_5'
    return x
df['era'] = df['year'].apply(era)

era_avg = df \
    .groupby('era', as_index = False) \
    .agg({'metascore' : 'mean'}) \
    .rename(columns = {'metascore' : 'avg_meta_score'})
era_avg['avg_meta_score'] = round(era_avg['avg_meta_score'], 2)

fig = go.Figure(data=[go.Bar(
            x = era_avg['era'], 
            y = era_avg['avg_meta_score'],
            text = era_avg['avg_meta_score'],
            marker_color = '#A6ACEC',
            textposition = 'outside',
            textfont_color = 'black',
            textfont_size = 12,
            textfont_family = 'Arial'
        )])

fig.update_layout(title = dict(
    text ='Média de nota de críticos por era',
    font = dict(
        color = 'black',
        size = 26,
        family = 'Arial')),
    plot_bgcolor = 'white',
    paper_bgcolor = 'white',
    showlegend = False)

fig.update_xaxes(showgrid = False, 
                 showline = True,
                 color = 'black',
                 linecolor = 'black',
                 tickfont = dict(
                     color = 'black',
                     family = 'Arial'),
                 dtick = 1) 

fig.update_yaxes(visible = False)

fig.show()


## Preparação de Dados

Correlação de Variáveis

In [None]:
corr = df.corr()
sns.set_context("notebook", font_scale=1.0, rc={"lines.linewidth": 2.5})
plt.figure(figsize=(6,3))
mask = np.zeros_like(corr)
mask[np.triu_indices_from(mask, 1)] = True
a = sns.heatmap(corr,mask=mask, annot=True, fmt='.2f')
rotx = a.set_xticklabels(a.get_xticklabels(), rotation=90)
roty = a.set_yticklabels(a.get_yticklabels(), rotation=30)

Checagem de duplicados

In [None]:
df['id'].duplicated().sum()

Remoção de Colunas

In [None]:
df.drop(columns=['release_date','sort_no','summary','title','id'],axis=1,inplace=True)

Remoção de NaNs

In [None]:
df.isna().sum()

In [None]:
# Optado manter as entradas sem user score, copiando o metascore para o mesmo
df['user_score'].fillna(df['metascore'],inplace=True)
df.isna().sum()

In [None]:
df['platform'].value_counts()

In [None]:
#categorizando os valores de metascore
df['binned_metascore']=pd.cut(df['metascore'], bins=[0,40,60,80,100], right=True, labels=False)+1

In [None]:
#criando novas colunas para transformar os valores categóricos de 'platform' (plataforma)
#em valores numéricos
df = pd.get_dummies(data = df, columns=['platform'], prefix=['platform'], drop_first=True)

In [None]:
df.to_csv('games_com_notabinada.csv', index=False)

## Engenharia de Recursos

In [None]:
list(df.columns)

In [None]:
#escolhendo as colunas do dataframe que serão nossos valores de entrada para o modelo
X=pd.DataFrame(columns=['user_score','age','platform_DS','platform_Dreamcast',
                        'platform_Game Boy Advance','platform_GameCube','platform_Nintendo 64','platform_PC','platform_PSP',
                        'platform_PlayStation','platform_PlayStation 2','platform_PlayStation 3',
                        'platform_PlayStation 4','platform_PlayStation 5',
                        'platform_PlayStation Vita','platform_Stadia','platform_Switch',
                        'platform_Wii','platform_Wii U','platform_Xbox',
                        'platform_Xbox 360','platform_Xbox One','platform_Xbox Series X',],data=df)

In [None]:
#escolhendo a(s) coluna(s) do dataframe que serão a resposta do modelo
y = pd.DataFrame(columns=['binned_metascore'], data=df)

In [None]:
#importando o pacote de divisão dos dados em treinamento e teste
from sklearn.model_selection import train_test_split

In [None]:
#dividindo os dados em treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
#normalizando os dados
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)

# **Machine Learning**

In [None]:
#importando, configurando e treinando o modelo de regressão 
from sklearn.linear_model import LogisticRegression
logit =LogisticRegression(verbose=0, max_iter=1000)
logit.fit(X_train,np.ravel(y_train,order='C'))
y_pred=logit.predict(X_test)

In [None]:
#verificando os valores preditos
y_pred

In [None]:
#importando o pacote de métricas e calculando a matriz de confusão
from sklearn import metrics
cnf_matrix =  metrics.confusion_matrix(y_test, y_pred)
print(cnf_matrix)

In [None]:
#verificando quantos valores existem de cada categoria em 'binned_metascore'
df['binned_metascore'].value_counts()

In [None]:
#métricas finais
print(metrics.classification_report(y_test, y_pred, target_names=['1','2', '3', '4']))

In [None]:
#importação do pacote para salvar o modelo
import pickle

In [None]:
#definindo em qual caminho vamos salvar o modelo
modelo_treinado = 'modelo_games.sav'

In [None]:
#salvando o modelo
pickle.dump(logit, open(modelo_treinado, 'wb'))

In [None]:
#carregando o modelo treinado
modelo_carregado = pickle.load(open(modelo_treinado, 'rb'))

In [None]:
#Olhando o conteúdo de um vetor de teste
X_test[0]

In [None]:
#fazendo predição do novo dado com o modelo carregado
modelo_carregado.predict([X_test[222]])