# Deep Learning Lotto implementation in Keras

<a href="http://www.crest.fr/ckfinder/userfiles/files/Pageperso/vcottet/paper%20-%20Deep%20Learning%20predicts%20Loto.pdf">Paper</a>
<br>
<a href="
https://stackoverflow.com/questions/44202627/keras-model-from-nn-schematic">StackOverflow</a>

## Setup

In [1]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os
import pandas as pd
import featuretools as ft

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "end_to_end_project"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

# Ignore useless warnings (see SciPy issue #5998)
import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")


## Logging

import logging
import io
import contextlib
import sys

@contextlib.contextmanager
def capture_logs():
  """Save logs to an io.StringIO."""
  ip = get_ipython()
  s = io.StringIO()
  h = logging.StreamHandler(s)
  ip.log.addHandler(h)
  try:
    yield s
  finally:
    ip.log.removeHandler(h)

@contextlib.contextmanager
def print_logs():
  """Print logs inline."""  
  ip = get_ipython()
  h = logging.StreamHandler(sys.stdout)
  ip.log.addHandler(h)
  try:
    yield
  finally:
    ip.log.removeHandler(h)
    

ip = get_ipython()

## Set logging level
ip.log.setLevel(logging.INFO)


with capture_logs() as s:
  ip.log.warning('capture logs')
print(s.getvalue())

with print_logs():
  ip.log.info('print logs')

ModuleNotFoundError: No module named 'featuretools'

## Get the data

In [None]:
import os
import zipfile
from six.moves import urllib

DOWNLOAD_ROOT = "http://www.sportstoto.com.my/"
LOTTO_PATH = os.path.join(os.getcwd(), "datasets", "lotto")
LOTTO_URL = DOWNLOAD_ROOT + "upload/Toto655.zip"
LOTTO_ZIP = os.path.join(LOTTO_PATH, "lotto.zip")

# print(LOTTO_URL)

# def fix_bad_zip_file(zip_file):  
#  f = open(zip_file, 'r+b')  
#  data = f.read()  
#  pos = data.find('\x50\x4b\x05\x06') # End of central directory signature  
#  if (pos > 0):  
#      self._log("Trancating file at location " + str(pos + 22)+ ".")  
#      f.seek(pos + 22)   # size of 'ZIP end of central directory record' 
#      f.truncate()  
#      f.close()  
#  else:  
#      # raise error, file is truncated  
#     raise RuntimeError('bad zip file')

def fetch_data(url=LOTTO_URL, path=LOTTO_PATH, zip_path=LOTTO_ZIP):
  if not os.path.isdir(path):
      os.makedirs(path)
  urllib.request.urlretrieve(url, zip_path)
  if (os.path.isfile(zip_path)):
    print(f'Download path: {zip_path}')
  # fix_bad_zip_file(zip_path)
  # with zipfile.ZipFile(zip_path, 'r') as lotto_zip:
  #  lotto_zip.extractall(path)

In [4]:
!rm /content/datasets/lotto/lotto.zip

fetch_data()

rm: cannot remove '/content/datasets/lotto/lotto.zip': No such file or directory
Download path: /content/datasets/lotto/lotto.zip


In [None]:
df = pd.read_csv(LOTTO_ZIP, compression='infer', header=0, sep=',', quotechar='"', dtype=str)

In [9]:
df.tail(10)

Unnamed: 0,DrawNo,DrawDate,DrawnNo1,DrawnNo2,DrawnNo3,DrawnNo4,DrawnNo5,DrawnNo6,Jackpot
1652,490419,20190209,8,18,22,28,40,55,1710074216
1653,490519,20190210,9,17,20,27,45,46,1729954755
1654,490619,20190213,4,20,34,40,45,48,1750281168
1655,490719,20190216,6,24,25,28,31,50,1770969014
1656,490819,20190217,23,26,35,39,51,53,1791228325
1657,490919,20190219,6,15,17,29,43,45,1807899337
1658,491019,20190220,7,11,26,36,39,55,1825936678
1659,491119,20190223,7,16,33,35,45,48,1848654981
1660,491219,20190224,5,7,11,28,31,48,1870665327
1661,491319,20190227,13,14,19,27,40,52,1892538857


In [11]:
df.info()
df.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1662 entries, 0 to 1661
Data columns (total 9 columns):
DrawNo       1662 non-null object
DrawDate     1662 non-null object
DrawnNo1     1662 non-null object
DrawnNo2     1662 non-null object
 DrawnNo3    1662 non-null object
 DrawnNo4    1662 non-null object
 DrawnNo5    1662 non-null object
 DrawnNo6    1662 non-null object
Jackpot      1662 non-null object
dtypes: object(9)
memory usage: 116.9+ KB


Unnamed: 0,DrawNo,DrawDate,DrawnNo1,DrawnNo2,DrawnNo3,DrawnNo4,DrawnNo5,DrawnNo6,Jackpot
count,1662,1662,1662,1662,1662,1662,1662,1662,1662
unique,1662,1662,37,43,48,48,41,34,1662
top,370512,20110924,1,11,22,33,46,55,550751361
freq,1,1,196,90,77,75,83,205,1


## Feature Engineering

In [17]:
df.hist(bins=50, figsize=(20,15))
plt.show()

# data = ft.demo.load_mock_customer()
# print(data)

ValueError: ignored

<Figure size 1440x1080 with 0 Axes>