## Test GPU

In [1]:
from keras import backend as K
K.tensorflow_backend._get_available_gpus()

Using TensorFlow backend.


['/job:localhost/replica:0/task:0/device:GPU:0']

## Dataset

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

In [3]:
df = pd.read_csv("data/dataset.csv")
df.values.sort(axis=1)
df

Unnamed: 0,A,B,C,D,E,F,G,H,I,J,K,L,M,N,O
0,2,3,5,6,9,10,11,13,14,16,18,20,23,24,25
1,1,4,5,6,7,9,11,12,13,15,16,19,20,23,24
2,1,4,6,7,8,9,10,11,12,14,16,17,20,23,24
3,1,2,4,5,8,10,12,13,16,17,18,19,23,24,25
4,1,2,4,8,9,11,12,13,15,16,19,20,23,24,25
5,1,2,4,5,6,7,10,12,15,16,17,19,21,23,25
6,1,4,7,8,10,12,14,15,16,18,19,21,22,23,25
7,1,5,6,8,9,10,13,15,16,17,18,19,20,22,25
8,3,4,5,9,10,11,13,15,16,17,19,20,21,24,25
9,2,3,4,5,6,8,9,10,11,12,14,19,20,23,24


## Training data

In [4]:
df_train = df.iloc[:-256, :]
df_train

Unnamed: 0,A,B,C,D,E,F,G,H,I,J,K,L,M,N,O
0,2,3,5,6,9,10,11,13,14,16,18,20,23,24,25
1,1,4,5,6,7,9,11,12,13,15,16,19,20,23,24
2,1,4,6,7,8,9,10,11,12,14,16,17,20,23,24
3,1,2,4,5,8,10,12,13,16,17,18,19,23,24,25
4,1,2,4,8,9,11,12,13,15,16,19,20,23,24,25
5,1,2,4,5,6,7,10,12,15,16,17,19,21,23,25
6,1,4,7,8,10,12,14,15,16,18,19,21,22,23,25
7,1,5,6,8,9,10,13,15,16,17,18,19,20,22,25
8,3,4,5,9,10,11,13,15,16,17,19,20,21,24,25
9,2,3,4,5,6,8,9,10,11,12,14,19,20,23,24


## Validation data

In [5]:
df_valid = df.iloc[-256:, :]
df_valid

Unnamed: 0,A,B,C,D,E,F,G,H,I,J,K,L,M,N,O
1513,3,4,5,7,8,11,12,13,15,16,17,19,20,21,24
1514,1,2,3,5,8,9,11,12,13,16,17,18,19,20,23
1515,1,2,6,7,10,11,12,14,16,18,19,20,22,23,24
1516,2,3,5,8,10,11,12,14,15,18,20,21,23,24,25
1517,2,3,4,5,7,8,11,12,14,16,17,20,22,24,25
1518,1,4,6,8,13,14,16,17,18,20,21,22,23,24,25
1519,1,2,4,8,9,10,12,13,14,16,17,18,19,24,25
1520,1,2,3,4,5,7,8,12,14,15,17,19,20,23,24
1521,1,8,9,10,11,13,14,15,16,18,19,20,21,23,24
1522,3,5,6,7,9,10,12,13,14,16,18,19,21,23,25


## Normalize

In [6]:
scaler = StandardScaler().fit(df_train.values)
transformed_dataset = scaler.transform(df_train.values)
transformed_df_train = pd.DataFrame(data=transformed_dataset, index=df_train.index)

scaler = StandardScaler().fit(df_valid.values)
transformed_dataset = scaler.transform(df_valid.values)
transformed_df_valid = pd.DataFrame(data=transformed_dataset, index=df_valid.index)



In [7]:
# amount of past games we need to take in consideration for prediction
window_length = 4

number_of_rows_train = df_train.values.shape[0]
number_of_features_train = df_train.values.shape[1]

number_of_rows_valid = df_valid.values.shape[0]
number_of_features_valid = df_valid.values.shape[1]

In [8]:
train = np.empty([number_of_rows_train-window_length, window_length, number_of_features_train], dtype=float)
label_train = np.empty([number_of_rows_train-window_length, number_of_features_train], dtype=float)

valid = np.empty([number_of_rows_valid-window_length, window_length, number_of_features_valid], dtype=float)
label_valid = np.empty([number_of_rows_valid-window_length, number_of_features_valid], dtype=float)

for i in range(0, number_of_rows_train-window_length):
    train[i]=transformed_df_train.iloc[i:i+window_length, 0: number_of_features_train]
    label_train[i]=transformed_df_train.iloc[i+window_length: i+window_length+1, 0: number_of_features_train]
    
for i in range(0, number_of_rows_valid-window_length):
    valid[i]=transformed_df_valid.iloc[i:i+window_length, 0: number_of_features_valid]
    label_valid[i]=transformed_df_valid.iloc[i+window_length: i+window_length+1, 0: number_of_features_valid]

## Shapes

In [9]:
train.shape

(1509, 4, 15)

In [10]:
label_train.shape

(1509, 15)

In [11]:
train[0]

array([[ 0.43420445, -0.18343936,  0.09987553, -0.28293799,  0.46502513,
         0.11885834, -0.20220994, -0.00418896, -0.31117316, -0.13434727,
         0.05076418,  0.27914094,  1.22643934,  0.94724224,  0.65433786],
       [-0.65707683,  0.57904348,  0.09987553, -0.28293799, -0.62580126,
        -0.40851019, -0.20220994, -0.53234643, -0.83025253, -0.66646091,
        -1.07873886, -0.32334158, -0.80450125,  0.14208634, -0.39886765],
       [-0.65707683,  0.57904348,  0.7484235 ,  0.30187783, -0.08038807,
        -0.40851019, -0.72969896, -1.06050391, -1.34933191, -1.19857455,
        -1.07873886, -1.52830662, -0.80450125,  0.14208634, -0.39886765],
       [-0.65707683, -0.94592221, -0.54867244, -0.8677538 , -0.08038807,
         0.11885834,  0.32527909, -0.00418896,  0.72698559,  0.39776638,
         0.05076418, -0.32334158,  1.22643934,  0.94724224,  0.65433786]])

In [12]:
label_train[0]

array([-0.65707683, -0.94592221, -0.54867244,  0.88669364,  0.46502513,
        0.64622687,  0.32527909, -0.00418896,  0.20790621, -0.13434727,
        0.6155157 ,  0.27914094,  1.22643934,  0.94724224,  0.65433786])

## LSTM Model

In [13]:
import numpy as np
import keras

from keras import *
from keras.models import Sequential
from keras.layers import LSTM, Dense,Dropout, Embedding

In [14]:
rmsprop = keras.optimizers.RMSprop(lr=0.03, rho=0.5, epsilon=0.01, decay=0.01)

model = Sequential()

model.add(LSTM(2**8,      
           input_shape=(window_length, number_of_features_train),
           return_sequences=True))

model.add(Dropout(0.75))

model.add(LSTM(2**8,           
           return_sequences=False))

model.add(Dropout(0.75))

model.add(Dense(number_of_features_train, activation='linear'))
model.compile(loss=losses.mean_absolute_error, optimizer=rmsprop)

## Custom Metric

In [15]:
import sys

class Metrics(keras.callbacks.Callback):
    
    def on_train_begin(self, logs={}):
        self._data = []

    def on_epoch_end(self, batch, logs={}):
        X_val, y_val = self.validation_data[0], self.validation_data[1]
        y_predict = np.asarray(model.predict(X_val))
        count=0
        max=0
        min=15
        for p, y in zip(y_predict, y_val):
            real = scaler.inverse_transform(y).astype(int)
            pred = scaler.inverse_transform(p).astype(int)
            
            points = len(list(set(pred).intersection(real)))
            count += points
            
            if points>max:
                max=points
                
            if points<min:
                min=points
                
        sys.stdout.write(' '+str(round(count/len(y_val),2))+'['+str(min)+','+str(max)+ ']')
        sys.stdout.flush()
        return

    def get_data(self):
        return self._data
    
metrics = Metrics()

## Training

In [16]:
model.fit(train, label_train, 
            batch_size=2**30, epochs=200, verbose=0,
            validation_data=(valid, label_valid), 
            callbacks=[metrics])

 8.91[6,13] 8.85[6,12] 8.83[5,12] 8.84[5,13] 8.82[5,12] 8.83[5,12] 8.87[5,12] 8.87[5,12] 8.89[6,13] 8.88[5,12] 8.92[6,13] 8.89[5,12] 8.87[5,12] 8.88[5,12] 8.85[5,12] 8.88[5,12] 8.89[5,12] 8.92[6,13] 8.9[5,12] 8.89[5,12] 8.92[5,12] 8.88[5,12] 8.94[5,13] 8.97[6,13] 8.91[6,13] 8.91[5,12] 8.94[6,13] 8.92[6,13] 8.91[5,12] 8.9[5,12] 8.89[5,12] 8.95[6,13] 8.92[5,12] 8.93[6,13] 8.94[6,13] 8.94[5,12] 8.92[5,12] 8.9[5,12] 8.92[5,13] 8.94[5,13] 8.91[5,13] 8.9[5,12] 8.91[5,12] 8.92[5,13] 8.89[6,12] 8.95[6,13] 8.96[6,12] 8.96[6,13] 9.0[6,13] 8.99[6,13] 8.96[6,13] 9.0[6,13] 8.96[6,13] 8.92[6,12] 8.94[6,12] 8.96[6,12] 8.97[6,12] 8.97[6,13] 9.0[6,13] 8.98[6,13] 9.0[6,13] 9.0[6,12] 9.0[6,12] 9.07[6,13] 9.02[6,13] 9.01[6,13] 9.0[6,12] 8.99[6,13] 8.98[6,12] 8.97[6,12] 9.0[5,13] 8.94[6,12] 9.0[6,12] 8.99[6,12] 8.96[6,12] 8.99[6,12] 8.95[6,12] 9.0[6,12] 8.98[6,12] 9.0[6,12] 8.96[6,12] 8.99[6,12] 8.99[6,12] 8.99[6,12] 8.97[6,13] 8.99[6,12] 9.03[6,12] 9.02[6,13] 8.98[6,12] 9.02[6,12] 8.97[6,13] 8.96[6,12] 9.

<keras.callbacks.History at 0x7fe5b5f969b0>

## Prediction

In [17]:
to_predict=np.array([[1,3,7,8,9,10,11,12,13,14,15,16,20,21,23],
[2,4,5,6,7,9,10,11,15,16,19,20,23,24,25],
[1,2,4,6,10,12,14,17,18,19,20,22,23,24,25],
[1,3,5,7,8,10,12,13,14,16,17,18,20,23,25],])
scaled_to_predict = scaler.transform(to_predict)



## Result

In [18]:
scaled_predicted_output_1 = model.predict(np.array([scaled_to_predict]))
# print(scaler.inverse_transform(scaled_predicted_output_1).astype(int)[0])

## Conclusion

The model is not able to predict the 15 numbers of the next game. Actually, in average, it will predict correctly only 9 numbers. 

On the other hand, the neural network was able to suggest 15 different numbers between 1 and 25, with a good even-odd distribution. Maybe it can be considered a good guess.