In [1]:
%matplotlib inline

In [2]:
from random import shuffle

parts_i = list(range(862))
shuffle(parts_i)

test_parts = parts_i[0:100]
train_parts = parts_i[100:]

In [3]:
import matplotlib.pyplot as plt

def show_loss_chart(history, output):
    loss = history.history[output + '_loss']
    val_loss = history.history['val_' + output + '_loss']
    epochs = range(1, len(loss) + 1)
    plt.plot(epochs, loss, 'bo', label='Training loss')
    plt.plot(epochs, val_loss, 'b', label='Validation loss')
    plt.title('Training and validation loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss - ' + output)
    plt.legend()
    plt.show()
    
def show_accuracy_chart(history, output):
    acc = history.history[output + '_mean_absolute_error']
    val_acc = history.history['val_' + output + '_mean_absolute_error']
    epochs = range(1, len(acc) + 1)
    plt.plot(epochs, acc, 'bo', label='Training mean_absolute_error')
    plt.plot(epochs, val_acc, 'b', label='Validation mean_absolute_error')
    plt.title('Training and validation accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Mean Abs Error - ' + output)
    plt.legend()
    plt.show()

In [4]:
import pandas as pd
import numpy as np

part_format = "/part{:04d}.csv"
base_dir = 'ENEM_2016_NORMALIZED'

def part_reader(parts):
    while True:
        for p in parts:
            data = pd.read_csv(base_dir + part_format.format(p))
            yield (data.drop(target_labels + ['Unnamed: 0'], axis=1).values,data[target_labels].values)

def part_reader_f(parts):
    while True:
        for p in parts:
            data = pd.read_csv(base_dir + part_format.format(p))
            y = data[target_labels].values.reshape((5,-1)).tolist()
            x = data.drop(target_labels + ['Unnamed: 0'], axis=1).values.reshape(-1,55)
            yield (x, y)

In [5]:
data = pd.read_csv(base_dir + part_format.format(test_parts[1]))
#data.drop(target_labels + ['Unnamed: 0'], axis=1).values

In [6]:
data

Unnamed: 0.1,Unnamed: 0,NU_IDADE,TP_SEXO,TP_ESTADO_CIVIL,TP_COR_RACA,CO_UF_NASCIMENTO,TP_ST_CONCLUSAO,TP_ESCOLA,TP_ENSINO,IN_TREINEIRO,...,Q028,Q042,Q043,Q044,Q045,Q046,Q047,Q048,Q049,Q050
0,2790000,-0.048087,1.0,-0.041688,-0.0215,-0.075853,-0.260100,-0.126433,0.000000,-0.1373,...,0.0,0.250,0.25,0.333333,0.25,0.25,0.2,0.25,0.333333,0.25
1,2790001,-0.100719,1.0,-0.041688,-0.2215,0.090813,0.406567,-0.126433,0.000000,0.8627,...,0.0,0.375,0.25,0.333333,0.25,0.75,0.8,0.25,0.333333,0.25
2,2790002,-0.065631,0.5,-0.041688,-0.2215,0.233670,-0.260100,-0.126433,0.000000,-0.1373,...,0.0,0.125,0.25,0.333333,0.25,0.25,0.2,0.25,0.333333,0.25
3,2790003,-0.083175,1.0,-0.041688,-0.2215,0.090813,0.073233,0.206900,-0.115621,-0.1373,...,0.2,0.125,0.25,0.333333,0.25,0.50,0.2,0.25,0.333333,0.25
4,2790004,0.688755,0.5,0.291645,-0.2215,-0.123473,-0.260100,-0.126433,0.000000,-0.1373,...,1.0,0.375,0.25,0.333333,0.25,0.25,0.2,0.25,1.000000,0.25
5,2790005,-0.100719,0.5,-0.041688,-0.2215,0.090813,0.406567,-0.126433,0.000000,0.8627,...,0.0,0.375,0.25,0.333333,0.25,0.75,0.8,0.25,0.666667,0.25
6,2790008,-0.065631,1.0,-0.041688,-0.2215,0.019385,-0.260100,-0.126433,0.000000,-0.1373,...,0.0,0.375,0.25,0.333333,0.25,0.25,0.8,0.25,0.333333,0.25
7,2790009,-0.065631,0.5,-0.041688,-0.2215,0.281289,0.073233,0.540233,-0.115621,-0.1373,...,0.0,0.375,0.25,0.333333,0.25,0.50,1.0,0.25,0.333333,0.50
8,2790010,-0.030543,1.0,-0.041688,0.1785,-0.194901,0.073233,0.206900,-0.115621,-0.1373,...,0.0,0.125,0.25,0.333333,0.25,0.50,0.2,0.25,0.333333,0.25
9,2790012,0.039632,1.0,-0.041688,-0.2215,-0.432996,-0.260100,-0.126433,0.000000,-0.1373,...,0.0,0.125,0.25,0.333333,0.25,0.25,0.2,0.25,0.333333,0.25


In [7]:
validation = train_parts[:50]
partial_train = train_parts[50:]
target_labels = ["NU_NOTA_MT", "NU_NOTA_CN", "NU_NOTA_CH", "NU_NOTA_LC", "NU_NOTA_REDACAO"]

In [8]:
from keras import models
from keras import layers
from keras.models import Model


inp = layers.Input((55,))
x = layers.Dense(32, activation='relu')(inp)
x = layers.Dense(64, activation='relu')(x)
x = layers.Dense(128, activation='relu')(x)
x = layers.Dense(32, activation='relu')(x)
#x = layers.Dense(512, activation='relu')(x)
#x = layers.Dense(1024, activation='relu')(x)
#x = layers.Dense(2048, activation='relu')(x)
'''
x = layers.Dense(256, activation='relu')(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dense(128, activation='relu')(x)
x = layers.Dense(64, activation='relu')(x)
'''

out1 = layers.Dense(1, activation='linear', name='NU_NOTA_MT')(x)
out2 = layers.Dense(1, activation='linear', name='NU_NOTA_CN')(x)
out3 = layers.Dense(1, activation='linear', name='NU_NOTA_CH')(x)
out4 = layers.Dense(1, activation='linear', name='NU_NOTA_LC')(x)
out5 = layers.Dense(1, activation='linear', name='NU_NOTA_REDACAO')(x)

model = Model(inputs=inp, outputs=[out1, out2, out3, out4, out5])

from keras import optimizers
from keras import metrics

rmsprop = optimizers.RMSprop(lr=0.0001)

model.compile(optimizer=rmsprop, loss=['mse', 'mse', 'mse', 'mse', 'mse'], 
              metrics={ 'NU_NOTA_MT': 'mae',
                        'NU_NOTA_CN': 'mae',
                        'NU_NOTA_CH': 'mae',
                        'NU_NOTA_LC': 'mae',
                        'NU_NOTA_REDACAO': 'mae'})

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [None]:
history = model.fit_generator(part_reader_f(partial_train),
                              steps_per_epoch=len(partial_train),
                              epochs=40,
                              validation_data=part_reader_f(validation),
                              validation_steps=len(validation))

show_accuracy_chart(history, 'NU_NOTA_MT')

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40


Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40


Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40