## Test GPU

In [1]:
from keras import backend as K
K.tensorflow_backend._get_available_gpus()

Using TensorFlow backend.


['/job:localhost/replica:0/task:0/device:GPU:0']

## Dataset

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

In [3]:
df = pd.read_csv("data/dataset.csv")
df.values.sort(axis=1)
df

Unnamed: 0,A,B,C,D,E,F,G,H,I,J,K,L,M,N,O
0,2,3,5,6,9,10,11,13,14,16,18,20,23,24,25
1,1,4,5,6,7,9,11,12,13,15,16,19,20,23,24
2,1,4,6,7,8,9,10,11,12,14,16,17,20,23,24
3,1,2,4,5,8,10,12,13,16,17,18,19,23,24,25
4,1,2,4,8,9,11,12,13,15,16,19,20,23,24,25
5,1,2,4,5,6,7,10,12,15,16,17,19,21,23,25
6,1,4,7,8,10,12,14,15,16,18,19,21,22,23,25
7,1,5,6,8,9,10,13,15,16,17,18,19,20,22,25
8,3,4,5,9,10,11,13,15,16,17,19,20,21,24,25
9,2,3,4,5,6,8,9,10,11,12,14,19,20,23,24


## Training data

In [4]:
df_train = df.iloc[-100:-10, :]
df_train

Unnamed: 0,A,B,C,D,E,F,G,H,I,J,K,L,M,N,O
1723,5,6,7,9,10,11,12,13,15,16,18,20,21,22,23
1724,1,5,6,8,12,13,14,15,18,19,21,22,23,24,25
1725,1,3,5,6,8,10,11,12,14,15,17,19,20,22,25
1726,1,2,5,6,7,9,10,13,14,15,16,20,21,22,24
1727,2,3,5,6,7,8,9,10,11,12,14,15,17,21,22
1728,1,2,4,6,8,9,10,11,12,14,16,18,21,24,25
1729,1,7,8,10,12,13,14,15,16,18,19,22,23,24,25
1730,1,3,5,7,8,9,10,13,14,17,19,20,22,24,25
1731,1,3,5,9,10,12,13,14,17,18,19,20,21,22,24
1732,1,2,4,5,8,9,12,13,14,15,16,19,21,23,25


## Validation data

In [5]:
df_valid = df.iloc[-10:, :]
df_valid

Unnamed: 0,A,B,C,D,E,F,G,H,I,J,K,L,M,N,O
1813,1,2,3,5,7,12,14,15,16,18,19,21,23,24,25
1814,3,6,7,10,11,12,13,14,15,18,19,20,21,24,25
1815,1,2,4,6,7,8,9,11,13,14,15,16,18,20,21
1816,1,4,7,9,10,11,14,15,16,17,18,21,22,23,24
1817,1,4,5,6,8,9,10,11,14,15,16,19,21,23,24
1818,1,6,9,10,11,12,13,14,15,16,17,18,19,20,23
1819,1,4,5,7,10,13,14,16,17,18,19,20,21,22,25
1820,2,3,4,5,7,9,10,13,14,16,17,19,20,21,25
1821,1,4,6,9,12,13,14,16,17,18,19,20,21,24,25
1822,3,5,6,7,8,9,10,12,13,15,16,17,18,19,22


## Normalize

In [6]:
scaler = StandardScaler().fit(df_train.values)
transformed_dataset = scaler.transform(df_train.values)
transformed_df_train = pd.DataFrame(data=transformed_dataset, index=df_train.index)

scaler = StandardScaler().fit(df_valid.values)
transformed_dataset = scaler.transform(df_valid.values)
transformed_df_valid = pd.DataFrame(data=transformed_dataset, index=df_valid.index)



In [7]:
# amount of past games we need to take in consideration for prediction
window_length = 1

number_of_rows_train = df_train.values.shape[0]
number_of_features_train = df_train.values.shape[1]

number_of_rows_valid = df_valid.values.shape[0]
number_of_features_valid = df_valid.values.shape[1]

In [8]:
train = np.empty([number_of_rows_train-window_length, window_length, number_of_features_train], dtype=float)
label_train = np.empty([number_of_rows_train-window_length, number_of_features_train], dtype=float)

valid = np.empty([number_of_rows_valid-window_length, window_length, number_of_features_valid], dtype=float)
label_valid = np.empty([number_of_rows_valid-window_length, number_of_features_valid], dtype=float)

for i in range(0, number_of_rows_train-window_length):
    train[i]=transformed_df_train.iloc[i:i+window_length, 0: number_of_features_train]
    label_train[i]=transformed_df_train.iloc[i+window_length: i+window_length+1, 0: number_of_features_train]
    
for i in range(0, number_of_rows_valid-window_length):
    valid[i]=transformed_df_valid.iloc[i:i+window_length, 0: number_of_features_valid]
    label_valid[i]=transformed_df_valid.iloc[i+window_length: i+window_length+1, 0: number_of_features_valid]

## Shapes

In [9]:
train.shape

(99, 1, 15)

In [10]:
label_train.shape

(99, 15)

In [11]:
train[0]

array([[ 4.09644015,  2.06684732,  1.40827033,  1.38222051,  0.89765488,
         0.53612292,  0.18582616, -0.2172416 , -0.01566179, -0.35037328,
        -0.02435044,  0.2038319 , -0.10472065, -0.69448679, -1.60410507]])

In [12]:
label_train[0]

array([-0.58520574,  1.29850259,  0.71794174,  0.7733128 ,  1.97269067,
        1.61919952,  1.2476899 ,  0.84247353,  1.55051751,  1.31807092,
        1.80193262,  1.43917672,  1.20428752,  0.88389228,  0.59329913])

## LSTM Model

In [13]:
import numpy as np
import keras

from keras import *
from keras.models import Sequential
from keras.layers import LSTM, Dense,Dropout, Embedding

In [14]:
rmsprop = keras.optimizers.RMSprop(lr=0.5, rho=0.25, epsilon=0.01, decay=0.15)

model = Sequential()

model.add(LSTM(2**8,      
           input_shape=(window_length, number_of_features_train),
           return_sequences=True))

model.add(Dropout(0.2))

model.add(LSTM(2**8,           
           return_sequences=False))

model.add(Dropout(0.2))

model.add(Dense(number_of_features_train, activation='linear'))
model.compile(loss=losses.mean_absolute_error, optimizer=rmsprop)

## Custom Metric

In [15]:
import sys

class Metrics(keras.callbacks.Callback):
    
    def on_train_begin(self, logs={}):
        self._data = []

    def on_epoch_end(self, batch, logs={}):
        X_val, y_val = self.validation_data[0], self.validation_data[1]
        y_predict = np.asarray(model.predict(X_val))
        count=0
        max=0
        min=15
        lst=[]
        for p, y in zip(y_predict, y_val):
            real = scaler.inverse_transform(y).astype(int)
            pred = scaler.inverse_transform(p).astype(int)
            
            points = len(list(set(pred).intersection(real)))            
            count += points
            lst.append(points)
            
            if points>max:
                max=points
                
            if points<min:
                min=points
                
        sys.stdout.write(' '+str(round(count/len(y_val),2))+'['+str(min)+','+str(max)+ ']')
        
        for i in lst:
            sys.stdout.write(' ' + str(i))
        sys.stdout.write('\n')
        
        sys.stdout.flush()
        return

    def get_data(self):
        return self._data
    
metrics = Metrics()

## Training

In [16]:
model.fit(train, label_train, 
            batch_size=2**30, epochs=600, verbose=0,
            validation_data=(valid, label_valid), 
            callbacks=[metrics])

 9.11[8,11] 10 8 8 8 9 10 9 9 11
 8.44[5,10] 9 8 5 7 9 10 10 8 10
 9.0[7,10] 9 7 7 9 10 10 9 10 10
 9.22[7,11] 9 7 7 9 10 10 11 9 11
 8.44[6,11] 9 6 6 8 9 9 9 9 11
 9.11[7,11] 9 8 7 9 10 9 11 9 10
 9.11[7,11] 9 7 7 9 10 10 9 10 11
 8.22[6,11] 9 6 6 8 9 8 9 8 11
 9.11[7,11] 9 7 7 9 10 10 9 10 11
 8.89[6,11] 9 7 6 9 10 9 10 9 11
 9.11[7,11] 9 7 7 9 10 10 9 10 11
 8.33[6,11] 9 6 7 8 9 8 9 8 11
 9.22[7,11] 9 7 8 9 10 10 9 10 11
 8.67[7,10] 9 7 7 9 9 9 9 9 10
 9.22[7,10] 9 7 8 10 10 10 9 10 10
 8.22[6,10] 9 6 7 8 9 8 9 8 10
 9.0[7,10] 9 7 8 9 10 10 9 9 10
 9.11[7,10] 9 7 8 10 10 10 9 9 10
 8.89[7,10] 9 7 7 10 9 10 9 9 10
 9.11[7,10] 9 7 8 10 10 10 9 9 10
 9.11[7,10] 9 7 8 10 10 10 9 9 10
 8.67[7,10] 9 7 7 10 9 9 9 8 10
 9.11[7,10] 9 7 8 10 10 10 9 9 10
 9.11[7,10] 9 7 8 10 10 10 9 9 10
 9.0[7,10] 9 8 7 10 9 10 9 9 10
 9.11[7,10] 9 7 8 10 10 10 9 9 10
 9.11[7,10] 9 7 8 10 10 10 9 9 10
 8.67[7,10] 9 7 7 9 9 9 9 9 10
 9.22[8,10] 9 8 8 10 10 10 9 9 10
 9.22[8,10] 9 8 8 10 10 10 9 9 10
 9.22[8,1

 9.44[7,11] 10 8 11 10 10 10 9 7 10
 9.67[7,11] 10 9 10 11 10 10 10 7 10
 9.56[7,10] 10 9 10 10 10 10 10 7 10
 9.44[7,10] 10 9 10 9 10 10 10 7 10
 9.56[7,11] 10 9 11 10 10 10 9 7 10
 9.56[7,11] 10 9 11 10 10 10 9 7 10
 9.33[7,10] 10 8 10 10 10 10 9 7 10
 9.22[7,10] 10 8 10 9 10 10 9 7 10
 9.44[7,10] 10 9 10 9 10 10 10 7 10
 9.56[7,10] 10 9 10 10 10 10 10 7 10
 9.44[7,10] 10 8 10 10 10 10 10 7 10
 9.56[7,11] 10 9 11 10 10 10 9 7 10
 9.56[7,11] 10 9 11 10 10 10 9 7 10
 9.33[7,10] 10 8 10 9 10 10 10 7 10
 9.67[7,11] 10 9 11 10 10 10 10 7 10
 9.33[7,10] 10 8 10 9 10 10 10 7 10
 9.89[8,11] 10 10 11 10 10 10 9 8 11
 9.67[7,11] 10 9 11 10 10 10 10 7 10
 9.56[7,11] 10 9 11 9 10 10 10 7 10
 9.56[7,11] 10 9 11 9 10 10 10 7 10
 9.56[7,11] 10 9 11 10 10 10 9 7 10
 9.67[7,11] 10 10 11 10 10 10 9 7 10
 9.56[7,11] 10 9 11 9 10 10 10 7 10
 9.56[7,11] 10 9 11 9 10 10 10 7 10
 9.56[8,10] 10 9 10 9 10 10 10 8 10
 9.67[7,11] 10 10 11 10 10 10 9 7 10
 9.67[7,11] 10 10 11 10 10 10 9 7 10
 9.67[8,11] 10 9 11

 9.56[8,12] 9 11 12 9 10 9 8 9 9
 9.33[8,11] 9 9 11 10 10 9 8 9 9
 9.44[8,12] 9 10 12 9 10 9 8 9 9
 9.44[8,12] 9 9 12 9 10 9 8 9 10
 9.33[8,12] 9 9 12 10 10 9 8 8 9
 9.33[8,12] 9 9 12 9 10 9 8 9 9
 9.67[9,12] 9 10 12 9 10 9 9 9 10
 9.33[8,12] 9 9 12 9 10 9 9 8 9
 9.33[8,12] 10 9 12 9 10 9 8 8 9
 9.33[8,12] 10 9 12 9 10 9 8 8 9
 9.33[8,12] 10 9 12 9 10 9 8 8 9
 9.33[8,12] 10 9 12 10 10 9 8 8 8
 9.44[8,12] 10 9 12 10 10 10 8 8 8
 9.33[8,12] 10 9 12 10 10 9 8 8 8
 9.56[8,12] 9 10 12 9 9 10 9 8 10
 9.67[8,12] 10 11 12 9 10 10 8 8 9
 9.78[8,12] 9 10 12 10 10 10 9 8 10
 9.44[8,12] 9 10 12 9 10 9 8 9 9
 9.33[8,12] 10 9 12 10 10 9 8 8 8
 9.78[8,12] 9 10 12 10 10 10 9 8 10
 9.44[8,12] 9 10 12 9 10 10 8 8 9
 9.56[8,12] 9 11 12 9 10 10 8 8 9
 9.56[8,12] 9 11 12 9 10 10 8 8 9
 9.67[8,12] 9 11 12 10 10 10 8 8 9
 9.56[8,12] 9 11 12 10 10 9 8 8 9
 9.44[8,12] 10 10 12 10 10 9 8 8 8
 9.11[7,12] 9 9 12 10 10 9 8 8 7
 9.44[8,12] 9 10 12 9 10 10 8 8 9
 9.56[8,12] 9 10 12 10 10 10 8 8 9
 9.78[8,12] 9 11 12

<keras.callbacks.History at 0x7f16d08ff9b0>

## Prediction

In [17]:
#to_predict=np.array([[1,4,6,9,12,13,14,16,17,18,19,20,21,24,25],
#[3,5,6,7,8,9,10,12,13,15,16,17,18,19,22]])
to_predict=np.array([[3,5,6,7,8,9,10,12,13,15,16,17,18,19,22]])
scaled_to_predict = scaler.transform(to_predict)

# Result (real)
scaled_predicted_output_1 = model.predict(np.array([scaled_to_predict]))
print(scaler.inverse_transform(scaled_predicted_output_1).astype(int)[0])

# Result (test)
to_predict=np.array([[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]])
scaled_to_predict = scaler.transform(to_predict)
scaled_predicted_output_1 = model.predict(np.array([scaled_to_predict]))
print(scaler.inverse_transform(scaled_predicted_output_1).astype(int)[0])

[ 1  3  5  6  8  9 10 12 14 15 16 18 19 21 24]
[ 1  4  5  7  9 10 11 13 14 15 15 18 20 21 24]




## Result

In [18]:
scaled_predicted_output_1 = model.predict(np.array([scaled_to_predict]))
print(scaler.inverse_transform(scaled_predicted_output_1).astype(int)[0])

[ 1  4  5  7  9 10 11 13 14 15 15 18 20 21 24]


## Conclusion

The model is not able to predict the 15 numbers of the next game. Actually, in average, it will predict correctly only 9 numbers. 

On the other hand, the neural network was able to suggest 15 different numbers between 1 and 25, with a good even-odd distribution. Maybe it can be considered a good guess.