# Regressão Multiplas Saídas - Base Video Game

Base de dados: https://www.kaggle.com/datasets/gregorut/videogamesales

## Importando bibliotecas

In [1]:
!pip install -q tensorflow==2.16.1

In [2]:
# Importacao desta lib para desativar erro no TensorFlow
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [3]:
import pandas as pd
import tensorflow as tf
import sklearn

2025-01-09 17:18:57.222084: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-09 17:18:57.468613: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-09 17:18:58.232774: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
pd.__version__,tf.__version__,sklearn.__version__

('2.2.2', '2.16.1', '1.4.2')

In [5]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, Activation, Input
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

## Importando base de dados

In [6]:
base = pd.read_csv('games.csv')
base

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.00
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.00,31.37
...,...,...,...,...,...,...,...,...,...,...,...
16593,16596,Woody Woodpecker in Crazy Castle 5,GBA,2002.0,Platform,Kemco,0.01,0.00,0.00,0.00,0.01
16594,16597,Men in Black II: Alien Escape,GC,2003.0,Shooter,Infogrames,0.01,0.00,0.00,0.00,0.01
16595,16598,SCORE International Baja 1000: The Official Game,PS2,2008.0,Racing,Activision,0.00,0.00,0.00,0.00,0.01
16596,16599,Know How 2,DS,2010.0,Puzzle,7G//AMES,0.00,0.01,0.00,0.00,0.01


## Pre-processamento da base de dados

### Apagando dados irrelevantes

In [7]:
base = base.drop('Rank',axis=1)
base = base.drop('Other_Sales',axis=1)
base = base.drop('Global_Sales',axis=1)

In [8]:
base.shape

(16598, 8)

### Verificar se tem dados nulos

base.isnull().sum()

Neste caso, como é uma base de teste, podemos deletar as colunas nulas, porém em um projeto real deveria-se entender o motivo disto e corrigir

In [9]:
base = base.dropna(axis=0)

In [10]:
base.shape

(16291, 8)

In [12]:
base.isnull().sum()

Name         0
Platform     0
Year         0
Genre        0
Publisher    0
NA_Sales     0
EU_Sales     0
JP_Sales     0
dtype: int64

### Verificar a repeticao do campo de name

In [13]:
base['Name'].value_counts()

Name
Need for Speed: Most Wanted    12
FIFA 14                         9
Ratatouille                     9
LEGO Marvel Super Heroes        9
Cars                            8
                               ..
PGA Tour 96                     1
Game & Wario                    1
Angry Birds                     1
Shadow Hearts: Covenant         1
Know How 2                      1
Name: count, Length: 11325, dtype: int64

Baseado na quantidade de nomes, e do total de registros, o campo de name não é relevante para a Rede Neural aprender algo com ele, por conta disto pode-se deletar esta coluna

In [14]:
base = base.drop('Name',axis=1)

In [15]:
base.shape

(16291, 7)

In [16]:
base.head(5)

Unnamed: 0,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales
0,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77
1,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81
2,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79
3,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28
4,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22


### Divisão de previsores e alvo

In [17]:
base.columns

Index(['Platform', 'Year', 'Genre', 'Publisher', 'NA_Sales', 'EU_Sales',
       'JP_Sales'],
      dtype='object')

In [18]:
X = base.iloc[:,[0,1,2,3]].values
X

array([['Wii', 2006.0, 'Sports', 'Nintendo'],
       ['NES', 1985.0, 'Platform', 'Nintendo'],
       ['Wii', 2008.0, 'Racing', 'Nintendo'],
       ...,
       ['PS2', 2008.0, 'Racing', 'Activision'],
       ['DS', 2010.0, 'Puzzle', '7G//AMES'],
       ['GBA', 2003.0, 'Platform', 'Wanadoo']], dtype=object)

In [23]:
## Alvos baseados em cada região
y_na = base.iloc[:,4].values
y_ue = base.iloc[:,5].values
y_jp = base.iloc[:,6].values

In [24]:
y_na

array([4.149e+01, 2.908e+01, 1.585e+01, ..., 0.000e+00, 0.000e+00,
       1.000e-02])

In [25]:
y_ue

array([2.902e+01, 3.580e+00, 1.288e+01, ..., 0.000e+00, 1.000e-02,
       0.000e+00])

In [26]:
y_jp

array([3.77, 6.81, 3.79, ..., 0.  , 0.  , 0.  ])

### Converter dados categóricos nominais em ordinais com OneHotEncoder

In [27]:
# PS2 1 0 0 0 0 ...
# PS3 0 1 0 0 0 ...
base['Platform'].value_counts()

Platform
DS      2131
PS2     2127
PS3     1304
Wii     1290
X360    1234
PSP     1197
PS      1189
PC       938
XB       803
GBA      786
GC       542
3DS      499
PSV      410
PS4      336
N64      316
SNES     239
XOne     213
SAT      173
WiiU     143
2600     116
NES       98
GB        97
DC        52
GEN       27
NG        12
SCD        6
WS         6
3DO        3
TG16       2
GG         1
PCFX       1
Name: count, dtype: int64

In [28]:
base.columns

Index(['Platform', 'Year', 'Genre', 'Publisher', 'NA_Sales', 'EU_Sales',
       'JP_Sales'],
      dtype='object')

In [29]:
onehotencoder = ColumnTransformer(transformers=[("OneHot", OneHotEncoder(), [0,2,3])], remainder='passthrough')

In [30]:
X = onehotencoder.fit_transform(X).toarray()
X.shape

(16291, 620)

In [31]:
X[0]

array([0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 1.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
       0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 

## Estrutura da Rede Neural

In [32]:
## Calcular quantidade de neuronios camada oculta
## somar neuronios de entrada + de saide e dividir por 2
( 620 + 3 ) / 2

311.5

In [33]:
## Aqui as camadas são conectas desta forma - Que é diferente do Sequential
camada_entrada = Input(shape=(620,))
camada_oculta1 = Dense(units = 312, activation='relu')(camada_entrada)
camada_oculta2 = Dense(units = 312, activation='relu')(camada_oculta1)
camada_saida1 = Dense(units = 1, activation='linear')(camada_oculta2)
camada_saida2 = Dense(units = 1, activation='linear')(camada_oculta2)
camada_saida3 = Dense(units = 1, activation='linear')(camada_oculta2)

In [34]:
regressor = Model(inputs = camada_entrada, outputs = [camada_saida1,camada_saida2,camada_saida3] )

In [35]:
regressor.compile(optimizer='adam',loss='mse')

In [36]:
regressor.fit(X,[y_na,y_ue,y_jp],epochs=500,batch_size=100)

Epoch 1/500
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - dense_2_loss: 907.9509 - dense_3_loss: 19.4139 - dense_4_loss: 313.3368 - loss: 1240.7037
Epoch 2/500
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - dense_2_loss: 0.6639 - dense_3_loss: 0.2473 - dense_4_loss: 0.1154 - loss: 1.0266
Epoch 3/500
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - dense_2_loss: 0.7508 - dense_3_loss: 0.5692 - dense_4_loss: 0.1903 - loss: 1.5102
Epoch 4/500
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - dense_2_loss: 0.9051 - dense_3_loss: 1.3864 - dense_4_loss: 0.4131 - loss: 2.7047
Epoch 5/500
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - dense_2_loss: 2.0445 - dense_3_loss: 3.1672 - dense_4_loss: 2.3821 - loss: 7.5938
Epoch 6/500
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - dense_2_loss: 1.8796 - dense_3_loss: 2.1079 - de

<keras.src.callbacks.history.History at 0x726fb4fce210>

In [37]:
previsao_na, previsao_eu, previsao_jp = regressor.predict(X)

[1m510/510[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


In [38]:
previsao_na, previsao_na.mean()

(array([[2.0004206 ],
        [2.6410708 ],
        [2.288507  ],
        ...,
        [0.5264826 ],
        [0.16004458],
        [0.19905242]], dtype=float32),
 0.32618183)

In [39]:
y_na, y_na.mean()

(array([4.149e+01, 2.908e+01, 1.585e+01, ..., 0.000e+00, 0.000e+00,
        1.000e-02]),
 0.26564667607881653)

In [40]:
from sklearn.metrics import mean_absolute_error

In [41]:
mean_absolute_error(y_na,previsao_na)

0.29275244625418473

In [42]:
previsao_eu, previsao_eu.mean()

(array([[0.95681274],
        [1.0658588 ],
        [1.053056  ],
        ...,
        [0.30513513],
        [0.09754215],
        [0.11882943]], dtype=float32),
 0.18863016)

In [43]:
y_ue, y_ue.mean()

(array([2.902e+01, 3.580e+00, 1.288e+01, ..., 0.000e+00, 1.000e-02,
        0.000e+00]),
 0.14773126266036463)

In [44]:
mean_absolute_error(y_ue,previsao_eu)

0.19293249122181488

In [45]:
previsao_jp, previsao_jp.mean()

(array([[0.822733  ],
        [1.0148048 ],
        [0.91108406],
        ...,
        [0.06782256],
        [0.07005881],
        [0.06622669]], dtype=float32),
 0.080956064)

In [46]:
y_jp, y_jp.mean()

(array([3.77, 6.81, 3.79, ..., 0.  , 0.  , 0.  ]), 0.0788330980295869)

In [47]:
mean_absolute_error(y_jp,previsao_jp)

0.10707540538171825