In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

First of all, we need to install the **neurolab** library ([https://pypi.org/project/neurolab/](https://pypi.org/project/neurolab/). It will simplify the work with the neural network.

In [None]:
pip install neurolab

Now we can import all the libraries we need.

In [None]:
import random
import numpy as np
import pandas as pd
import neurolab as nl
import matplotlib.pyplot as plt

Those are the lists that will contains the data we'll use to work.

In [None]:
# Total dataset values 1
names_t1 = []
labels_t1 = []
data_t1 = []

# Total dataset values 2
names_t2 = []
labels_t2 = []
data_t2 = []

# Training values
names_t = []
labels_t = []
data_t = []

# Simulation values
names_s = []
labels_s = []
data_s = []

The first thing we need to do is read the CSV training file and divide every line in name, data and label.

In [None]:
# Upload the dataset
dataset = pd.read_csv("/kaggle/input/video-games-rating-by-esrb/Video_games_esrb_rating.csv")

# Division of the dataset into names, data and labels
for i in dataset.index:
    row = []
    for j in dataset.columns:
        value = dataset.iloc[i][j]
        if j == "title":
            names_t1.append(value)
        elif j == "esrb_rating":
            labels_t1.append(value)
        else:
            row.append(value)
    data_t1.append(row)

Then, we have to modify the labels, replacing every value with a list.

In [None]:
# Changing the values of the labels
# Being strings, they must be encoded with a list
# ("E" = [1, 0, 0, 0], "ET" = [0, 1, 0, 0], "T" = [0, 0, 1, 0], "M" = [0, 0, 0, 1])
list_labels = []
for index, element in np.ndenumerate(labels_t1):
    if element == "E":
        list_labels.append([1, 0, 0, 0])
    elif element == "ET":
        list_labels.append([0, 1, 0, 0])
    elif element == "T":
        list_labels.append([0, 0, 1, 0])
    elif element == "M":
        list_labels.append([0, 0, 0, 1])
labels_t1 = list_labels

The next thing we need to do is selecting a pool of data to use for the training. We can perform a print of the selected rows to look at the data.

In [None]:
# Select a sample of N data to use
N = 100
samples_idx = random.sample(range(0, len(data_t1)), N)
for i in samples_idx:
    names_t.append(names_t1[i])
    data_t.append(data_t1[i])
    labels_t.append(labels_t1[i])
    
# Convert from array to ndarray
data_t = np.array(data_t)
labels_t = np.array(labels_t)
dataset.iloc[samples_idx]

The next step is to create and train the neural network (we'll use the gradient descend algorithm).

In [None]:
# Definition of a multilayer neural network with 1 input
# Each element of the list in the first argument
# Specify the minimum and maximum of the input values
# Each element of the list in the second argument
# Specify the size of the layer (the last one is the output one)
input_l = [[0, 1] for i in range(data_t.shape[1])]
multilayer_net = nl.net.newff(input_l, [10, 10, 10, 10, 4])

# Set the gradient descent training algorithm
multilayer_net.trainf = nl.train.train_gd        

# Neural network training
# The lr value represents the learning rate
error = multilayer_net.train(data_t, labels_t, epochs=2000, show=100, lr=0.01)

We can also plot the error values of the neural network through the epochs.

In [None]:
# Results chart
plt.figure()
plt.plot(error)
plt.xlabel("Number of epochs")
plt.ylabel("Training error")
plt.title("Progress of the training error")
plt.grid()
plt.show()

Now that we've created a neural network, we need to test it on some value. To do that, we can use the CSV simulation file, that we need to read and modify in the same way we did before.

In [None]:
# Upload the dataset
dataset = pd.read_csv("/kaggle/input/video-games-rating-by-esrb/test_esrb.csv")

# Division of the dataset into names, data and labels
for i in dataset.index:
    row = []
    for j in dataset.columns:
        value = dataset.iloc[i][j]
        if j == "title":
            names_t2.append(value)
        elif j == "esrb_rating":
            labels_t2.append(value)
        else:
            row.append(value)
    data_t2.append(row)
    
# Changing the values of the labels
# Being strings, they must be encoded with a list
# ("E" = [1, 0, 0, 0], "ET" = [0, 1, 0, 0], "T" = [0, 0, 1, 0], "M" = [0, 0, 0, 1])
list_labels = []
for index, element in np.ndenumerate(labels_t2):
    if element == "E":
        list_labels.append([1, 0, 0, 0])
    elif element == "ET":
        list_labels.append([0, 1, 0, 0])
    elif element == "T":
        list_labels.append([0, 0, 1, 0])
    elif element == "M":
        list_labels.append([0, 0, 0, 1])
labels_t2 = list_labels

And, just like before, we select a pool of data to use for the simulation (we can also print those data).

In [None]:
# Select a sample of N data to use
N = 300
samples_idx = random.sample(range(0, len(data_t2)), N)
for i in samples_idx:
    names_s.append(names_t2[i])
    data_s.append(data_t2[i])
    labels_s.append(labels_t2[i])
    
# Convert from array to ndarray
data_s = np.array(data_s)
labels_s = np.array(labels_s)
dataset.iloc[samples_idx]

Now we just need to simulate the neural network with those values. To obtain the predicted values from the neural network, we just need to look at the maximum element for each data, and convert it using their index.

In [None]:
# Network simulation
results = multilayer_net.sim(data_s)

# Print the results, with their actual value
c_cv = np.array([0, 0, 0, 0])
c_v = np.array([0, 0, 0, 0])
for i in range(len(results)):
    idx1 = np.argmax(results[i])
    if idx1 == 0: pv = "E"
    elif idx1 == 1: pv = "ET"
    elif idx1 == 2: pv = "T"
    elif idx1 == 3: pv = "M"
    idx2 = np.argmax(labels_s[i])
    if idx2 == 0: ev = "E"
    elif idx2 == 1: ev = "ET"
    elif idx2 == 2: ev = "T"
    elif idx2 == 3: ev = "M"
    print("Predicted value: ", pv, "; correct value: ", ev)
    c_v[idx2] += 1
    if pv == ev:
        c_cv[idx1] += 1

We can also calculate the ratio of guessed values.

In [None]:
# Calculate the ratio of correctly predicted values to total values
print("Percentage of correctly predicted values: ", (np.sum(c_cv)/np.sum(c_v))*100, "%")
print("Percentage of correctly E predicted values: ", (c_cv[0]/c_v[0])*100, "%")
print("Percentage of correctly ET predicted values: ", (c_cv[1]/c_v[1])*100, "%")
print("Percentage of correctly T predicted values: ", (c_cv[2]/c_v[2])*100, "%")
print("Percentage of correctly M predicted values: ", (c_cv[3]/c_v[3])*100, "%")