In [1]:
%reload_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os
import pandas as pd
import featuretools as ft
import matplotlib as mpl
import matplotlib.pyplot as plt
import re
import datetime
from datetime import date
from pathlib import Path

In [3]:
# to make this notebook's output stable across runs
np.random.seed(42)

mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Ignore useless warnings (see SciPy issue #5998)
import warnings
warnings.filterwarnings(action="ignore", message="^internal gelsd")

from IPython.display import display
pd.options.display.max_columns = 50
pd.options.display.html.table_schema = True

## Functions

In [4]:
def add_datepart(df, fldname, drop=True, time=False):
    """Helper function that adds columns relevant to a date."""
    fld = df[fldname]
    fld_dtype = fld.dtype
    if isinstance(fld_dtype, pd.core.dtypes.dtypes.DatetimeTZDtype):
        fld_dtype = np.datetime64

    if not np.issubdtype(fld_dtype, np.datetime64):
        df[fldname] = fld = pd.to_datetime(fld, infer_datetime_format=True)
    targ_pre = re.sub('[Dd]ate$', '', fldname)
    attr = ['Year', 'Month', 'Week', 'Day', 'Dayofweek', 'Dayofyear',
            'Is_month_end', 'Is_month_start', 'Is_quarter_end', 'Is_quarter_start', 'Is_year_end', 'Is_year_start']
    if time: attr = attr + ['Hour', 'Minute', 'Second']
    for n in attr: df[targ_pre + n] = getattr(fld.dt, n.lower())
    df[targ_pre + 'Elapsed'] = fld.astype(np.int64) // 10 ** 9
    if drop: df.drop(fldname, axis=1, inplace=True)

## Constants

In [5]:
TOTAL_NUMBERS = 10000

## Load Data

In [6]:
PATH = Path("datasets/lotto")
DATASET = PATH/'data_all.csv'

In [7]:
# df will store the original dataset
dataset = pd.read_csv(DATASET, parse_dates=['DrawDate'], dtype={'PrizeType': str})
dataset.columns

Index(['DrawNo', 'DrawDate', 'PrizeType', 'LuckyNo'], dtype='object')

In [8]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 104673 entries, 0 to 104672
Data columns (total 4 columns):
DrawNo       104673 non-null int64
DrawDate     104673 non-null datetime64[ns]
PrizeType    104673 non-null object
LuckyNo      104673 non-null int64
dtypes: datetime64[ns](1), int64(2), object(1)
memory usage: 3.2+ MB


In [9]:
dataset.dtypes

DrawNo                int64
DrawDate     datetime64[ns]
PrizeType            object
LuckyNo               int64
dtype: object

In [10]:
dataset.nunique()

DrawNo        4551
DrawDate      4551
PrizeType       23
LuckyNo      10000
dtype: int64

## Train Test Split

In [11]:
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(dataset, test_size=0.20, shuffle=False)
combined_df = [train_df, test_df]

display(len(train_df))
display(len(test_df))

83738

20935

## Feature generation

#### Days since last exact match

In [12]:
df = dataset.copy()

In [13]:
df['DaysSinceLastExactMatch'] = 0

result = df.loc[df.LuckyNo == 1234]
display(len(result), result)

10

Unnamed: 0,DrawNo,DrawDate,PrizeType,LuckyNo,DaysSinceLastExactMatch
231,41792,1992-05-28,2ndPrizeNo,1234,0
18696,121997,1997-07-12,SpecialNo7,1234,0
35191,193801,2001-12-26,2ndPrizeNo,1234,0
47146,245705,2005-02-13,SpecialNo6,1234,0
47753,248405,2005-04-06,ConsolationNo2,1234,0
49421,255605,2005-09-11,SpecialNo4,1234,0
71468,351511,2011-04-24,ConsolationNo4,1234,0
71528,351711,2011-04-27,SpecialNo8,1234,0
98841,470517,2017-12-27,ConsolationNo7,1234,0
101068,480218,2018-07-15,ConsolationNo3,1234,0


In [14]:
datetime.datetime.strptime('1997-07-12', '%Y-%m-%d').date() - datetime.datetime.strptime('1992-05-28', '%Y-%m-%d').date()

datetime.timedelta(days=1871)

In [15]:
# df_temp['DaysSinceLastExactMatch'] = (df_temp.DrawDate - df_temp.loc[df_temp.LuckyNo == 9999].DrawDate.shift(1)).dt.days
# df_temp.loc[df_temp.LuckyNo == 9999].head(3)
matched_dates = df.loc[df.LuckyNo == 1234].DrawDate.values
days = np.diff(matched_dates).astype('timedelta64[D]')
days = np.insert(days, 0,0)
display(matched_dates, days)
len(days)

array(['1992-05-28T00:00:00.000000000', '1997-07-12T00:00:00.000000000',
       '2001-12-26T00:00:00.000000000', '2005-02-13T00:00:00.000000000',
       '2005-04-06T00:00:00.000000000', '2005-09-11T00:00:00.000000000',
       '2011-04-24T00:00:00.000000000', '2011-04-27T00:00:00.000000000',
       '2017-12-27T00:00:00.000000000', '2018-07-15T00:00:00.000000000'],
      dtype='datetime64[ns]')

array([   0, 1871, 1628, 1145,   52,  158, 2051,    3, 2436,  200],
      dtype='timedelta64[D]')

10

In [16]:
def get_elapsed_days(data, no, fld, func):
    """Get elapsed between the draw dates"""
    df_filtered = data[func(data.LuckyNo, no)]
    days = np.absolute(np.diff(df_filtered.DrawDate.values).astype('timedelta64[D]')).astype('int64')
    days = np.insert(days, 0,0)
    for idx, val in zip(df_filtered.index, days):
        data.at[idx, fld] = val

In [17]:
def exact_match(lucky_no, no):
    return lucky_no == no

In [18]:
df.sort_values(by=['DrawDate'], ascending=True, inplace=True)

In [19]:
for no in range(0, TOTAL_NUMBERS):
    if no % 1000 == 0:
        print('Processing %s of 10000' % str(no))
    get_elapsed_days(df, no, 'DaysSinceLastExactMatch', exact_match)

Processing 0 of 10000
Processing 1000 of 10000
Processing 2000 of 10000
Processing 3000 of 10000
Processing 4000 of 10000
Processing 5000 of 10000
Processing 6000 of 10000
Processing 7000 of 10000
Processing 8000 of 10000
Processing 9000 of 10000


In [20]:
display(df[df.LuckyNo==1234])

Unnamed: 0,DrawNo,DrawDate,PrizeType,LuckyNo,DaysSinceLastExactMatch
231,41792,1992-05-28,2ndPrizeNo,1234,0
18696,121997,1997-07-12,SpecialNo7,1234,1871
35191,193801,2001-12-26,2ndPrizeNo,1234,1628
47146,245705,2005-02-13,SpecialNo6,1234,1145
47753,248405,2005-04-06,ConsolationNo2,1234,52
49421,255605,2005-09-11,SpecialNo4,1234,158
71468,351511,2011-04-24,ConsolationNo4,1234,2051
71528,351711,2011-04-27,SpecialNo8,1234,3
98841,470517,2017-12-27,ConsolationNo7,1234,2436
101068,480218,2018-07-15,ConsolationNo3,1234,200


#### Days Until Next Exact Match

In [21]:
df['DaysUntilNextExactMatch'] = 0
df.tail(20)

Unnamed: 0,DrawNo,DrawDate,PrizeType,LuckyNo,DaysSinceLastExactMatch,DaysUntilNextExactMatch
104670,495819,2019-06-09,SpecialNo7,5050,120,0
104667,495819,2019-06-09,SpecialNo4,3669,14,0
104668,495819,2019-06-09,SpecialNo5,9517,603,0
104669,495819,2019-06-09,SpecialNo6,1844,1187,0
104662,495819,2019-06-09,ConsolationNo9,3596,2954,0
104666,495819,2019-06-09,SpecialNo3,1218,1628,0
104661,495819,2019-06-09,ConsolationNo8,6788,407,0
104655,495819,2019-06-09,ConsolationNo2,4622,176,0
104659,495819,2019-06-09,ConsolationNo6,3153,449,0
104658,495819,2019-06-09,ConsolationNo5,1132,189,0


In [22]:
df.sort_values(by=['DrawDate'], ascending=False, inplace=True)

In [23]:
for no in range(0, TOTAL_NUMBERS):
    if no % 1000 == 0:
        print('Processing %s of 10000' % str(no))
    get_elapsed_days(df, no, 'DaysUntilNextExactMatch', exact_match)

Processing 0 of 10000
Processing 1000 of 10000
Processing 2000 of 10000
Processing 3000 of 10000
Processing 4000 of 10000
Processing 5000 of 10000
Processing 6000 of 10000
Processing 7000 of 10000
Processing 8000 of 10000
Processing 9000 of 10000


In [24]:
display(df[df.LuckyNo==1234])

Unnamed: 0,DrawNo,DrawDate,PrizeType,LuckyNo,DaysSinceLastExactMatch,DaysUntilNextExactMatch
101068,480218,2018-07-15,ConsolationNo3,1234,200,0
98841,470517,2017-12-27,ConsolationNo7,1234,2436,200
71528,351711,2011-04-27,SpecialNo8,1234,3,2436
71468,351511,2011-04-24,ConsolationNo4,1234,2051,3
49421,255605,2005-09-11,SpecialNo4,1234,158,2051
47753,248405,2005-04-06,ConsolationNo2,1234,52,158
47146,245705,2005-02-13,SpecialNo6,1234,1145,52
35191,193801,2001-12-26,2ndPrizeNo,1234,1628,1145
18696,121997,1997-07-12,SpecialNo7,1234,1871,1628
231,41792,1992-05-28,2ndPrizeNo,1234,0,1871


#### Days Since Last Any Match

In [25]:
def pad(val):
    return str(val).zfill(4)

In [26]:
from itertools import permutations

def get_permutations(no):
    no_list = []
    for p in list(set(permutations(pad(no)))):
        no_list.append(int(''.join(p)))
    return no_list
    
def any_match(lucky_no, no):
    return lucky_no.isin(get_permutations(no))

In [27]:
df['DaysSinceLastAnyMatch'] = 0
df.sort_values(by=['DrawDate'], ascending=True, inplace=True)
display(df[df.LuckyNo==1234])

Unnamed: 0,DrawNo,DrawDate,PrizeType,LuckyNo,DaysSinceLastExactMatch,DaysUntilNextExactMatch,DaysSinceLastAnyMatch
231,41792,1992-05-28,2ndPrizeNo,1234,0,1871,0
18696,121997,1997-07-12,SpecialNo7,1234,1871,1628,0
35191,193801,2001-12-26,2ndPrizeNo,1234,1628,1145,0
47146,245705,2005-02-13,SpecialNo6,1234,1145,52,0
47753,248405,2005-04-06,ConsolationNo2,1234,52,158,0
49421,255605,2005-09-11,SpecialNo4,1234,158,2051,0
71468,351511,2011-04-24,ConsolationNo4,1234,2051,3,0
71528,351711,2011-04-27,SpecialNo8,1234,3,2436,0
98841,470517,2017-12-27,ConsolationNo7,1234,2436,200,0
101068,480218,2018-07-15,ConsolationNo3,1234,200,0,0


In [28]:
for no in range(0, TOTAL_NUMBERS):
    if no % 1000 == 0:
        print('Processing %s of 10000' % str(no))
    get_elapsed_days(df, no, 'DaysSinceLastAnyMatch', any_match)

# get_elapsed_days(df, 1234, 'DaysSinceLastAnyMatch', any_match)

display(df[df.LuckyNo.isin(get_permutations(123))].head(10))

Processing 0 of 10000
Processing 1000 of 10000
Processing 2000 of 10000
Processing 3000 of 10000
Processing 4000 of 10000
Processing 5000 of 10000
Processing 6000 of 10000
Processing 7000 of 10000
Processing 8000 of 10000
Processing 9000 of 10000


Unnamed: 0,DrawNo,DrawDate,PrizeType,LuckyNo,DaysSinceLastExactMatch,DaysUntilNextExactMatch,DaysSinceLastAnyMatch
294,41992,1992-06-03,SpecialNo5,213,0,15,0
458,42692,1992-06-18,SpecialNo8,213,15,42,15
590,43292,1992-07-02,SpecialNo2,2310,0,462,14
857,44492,1992-07-30,ConsolationNo3,213,42,356,28
2042,49592,1992-11-26,SpecialNo5,3201,0,143,119
2344,50892,1992-12-27,SpecialNo8,1230,0,500,31
2633,52193,1993-01-27,ConsolationNo8,1302,0,2771,31
2869,53193,1993-02-18,SpecialNo4,231,0,307,22
3444,55693,1993-04-18,SpecialNo4,3201,143,812,59
3728,56993,1993-05-19,3rdPrizeNo,1023,0,28,31


#### Days Until Next Any Match

In [29]:
df['DaysUntilNextAnyMatch'] = 0
df.sort_values(by=['DrawDate'], ascending=False, inplace=True)
display(df[df.LuckyNo.isin(get_permutations(123))].head(10))

Unnamed: 0,DrawNo,DrawDate,PrizeType,LuckyNo,DaysSinceLastExactMatch,DaysUntilNextExactMatch,DaysSinceLastAnyMatch,DaysUntilNextAnyMatch
104291,494219,2019-05-04,ConsolationNo6,1032,186,0,13,0
104187,493719,2019-04-21,SpecialNo7,312,602,0,11,0
104054,493219,2019-04-10,3rdPrizeNo,123,973,0,115,0
102819,487818,2018-12-16,ConsolationNo6,3201,686,0,47,0
102270,485418,2018-10-30,ConsolationNo9,1032,254,186,97,0
101163,480618,2018-07-25,ConsolationNo6,213,1246,0,49,0
100638,478318,2018-06-06,SpecialNo1,3120,2863,0,81,0
99784,474618,2018-03-17,ConsolationNo7,2301,52,0,27,0
99461,473218,2018-02-18,ConsolationNo6,1032,623,254,25,0
99143,471818,2018-01-24,SpecialNo1,2301,672,52,98,0


In [30]:
for no in range(0, TOTAL_NUMBERS):
    if no % 1000 == 0:
        print('Processing %s of 10000' % str(no))
    get_elapsed_days(df, no, 'DaysUntilNextAnyMatch', any_match)

display(df[df.LuckyNo.isin(get_permutations(123))].head(10))

Processing 0 of 10000
Processing 1000 of 10000
Processing 2000 of 10000
Processing 3000 of 10000
Processing 4000 of 10000
Processing 5000 of 10000
Processing 6000 of 10000
Processing 7000 of 10000
Processing 8000 of 10000
Processing 9000 of 10000


Unnamed: 0,DrawNo,DrawDate,PrizeType,LuckyNo,DaysSinceLastExactMatch,DaysUntilNextExactMatch,DaysSinceLastAnyMatch,DaysUntilNextAnyMatch
104291,494219,2019-05-04,ConsolationNo6,1032,186,0,13,0
104187,493719,2019-04-21,SpecialNo7,312,602,0,11,13
104054,493219,2019-04-10,3rdPrizeNo,123,973,0,115,11
102819,487818,2018-12-16,ConsolationNo6,3201,686,0,47,115
102270,485418,2018-10-30,ConsolationNo9,1032,254,186,97,47
101163,480618,2018-07-25,ConsolationNo6,213,1246,0,49,97
100638,478318,2018-06-06,SpecialNo1,3120,2863,0,81,49
99784,474618,2018-03-17,ConsolationNo7,2301,52,0,27,81
99461,473218,2018-02-18,ConsolationNo6,1032,623,254,25,27
99143,471818,2018-01-24,SpecialNo1,2301,672,52,98,25


#### Extract digits by position

In [31]:
df['LuckyNo_str'] = df['LuckyNo'].apply(str).apply(pad)
df['1st_digit'] = df['LuckyNo_str'].str[0:1]
df['2st_digit'] = df['LuckyNo_str'].str[1:2]
df['3st_digit'] = df['LuckyNo_str'].str[2:3]
df['4st_digit'] = df['LuckyNo_str'].str[3:4]
df.drop(columns=['LuckyNo_str'], axis=1, inplace=True)
display(df[df.LuckyNo.isin(get_permutations(123))].head(10))

Unnamed: 0,DrawNo,DrawDate,PrizeType,LuckyNo,DaysSinceLastExactMatch,DaysUntilNextExactMatch,DaysSinceLastAnyMatch,DaysUntilNextAnyMatch,1st_digit,2st_digit,3st_digit,4st_digit
104291,494219,2019-05-04,ConsolationNo6,1032,186,0,13,0,1,0,3,2
104187,493719,2019-04-21,SpecialNo7,312,602,0,11,13,0,3,1,2
104054,493219,2019-04-10,3rdPrizeNo,123,973,0,115,11,0,1,2,3
102819,487818,2018-12-16,ConsolationNo6,3201,686,0,47,115,3,2,0,1
102270,485418,2018-10-30,ConsolationNo9,1032,254,186,97,47,1,0,3,2
101163,480618,2018-07-25,ConsolationNo6,213,1246,0,49,97,0,2,1,3
100638,478318,2018-06-06,SpecialNo1,3120,2863,0,81,49,3,1,2,0
99784,474618,2018-03-17,ConsolationNo7,2301,52,0,27,81,2,3,0,1
99461,473218,2018-02-18,ConsolationNo6,1032,623,254,25,27,1,0,3,2
99143,471818,2018-01-24,SpecialNo1,2301,672,52,98,25,2,3,0,1


#### Add Date Parts

In [35]:
columns_to_drop = ['DrawIs_month_end','DrawIs_month_start', 'DrawIs_quarter_end', 'DrawIs_quarter_start', 'DrawIs_year_end', 'DrawIs_year_start' ,'DrawElapsed']
add_datepart(df, 'DrawDate', drop=False)
df.drop(columns=columns_to_drop, axis=1, inplace=True)
display(df[df.LuckyNo.isin(get_permutations(123))].head(10))

Unnamed: 0,DrawNo,DrawDate,PrizeType,LuckyNo,DaysSinceLastExactMatch,DaysUntilNextExactMatch,DaysSinceLastAnyMatch,DaysUntilNextAnyMatch,1st_digit,2st_digit,3st_digit,4st_digit,DrawYear,DrawMonth,DrawWeek,DrawDay,DrawDayofweek,DrawDayofyear
104291,494219,2019-05-04,ConsolationNo6,1032,186,0,13,0,1,0,3,2,2019,5,18,4,5,124
104187,493719,2019-04-21,SpecialNo7,312,602,0,11,13,0,3,1,2,2019,4,16,21,6,111
104054,493219,2019-04-10,3rdPrizeNo,123,973,0,115,11,0,1,2,3,2019,4,15,10,2,100
102819,487818,2018-12-16,ConsolationNo6,3201,686,0,47,115,3,2,0,1,2018,12,50,16,6,350
102270,485418,2018-10-30,ConsolationNo9,1032,254,186,97,47,1,0,3,2,2018,10,44,30,1,303
101163,480618,2018-07-25,ConsolationNo6,213,1246,0,49,97,0,2,1,3,2018,7,30,25,2,206
100638,478318,2018-06-06,SpecialNo1,3120,2863,0,81,49,3,1,2,0,2018,6,23,6,2,157
99784,474618,2018-03-17,ConsolationNo7,2301,52,0,27,81,2,3,0,1,2018,3,11,17,5,76
99461,473218,2018-02-18,ConsolationNo6,1032,623,254,25,27,1,0,3,2,2018,2,7,18,6,49
99143,471818,2018-01-24,SpecialNo1,2301,672,52,98,25,2,3,0,1,2018,1,4,24,2,24


#### Calculate Combinations Per X Draws

In [120]:
df.sort_values(by=['DrawDate', 'PrizeType'], ascending=False, inplace=True)
df.head(23)

Unnamed: 0,DrawNo,DrawDate,PrizeType,LuckyNo,DaysSinceLastExactMatch,DaysUntilNextExactMatch,DaysSinceLastAnyMatch,DaysUntilNextAnyMatch,1st_digit,2st_digit,3st_digit,4st_digit,DrawYear,DrawMonth,DrawWeek,DrawDay,DrawDayofweek,DrawDayofyear
104672,495819,2019-06-09,SpecialNo9,2976,2066,0,39,0,2,9,7,6,2019,6,23,9,6,160
104671,495819,2019-06-09,SpecialNo8,9073,592,0,0,0,9,0,7,3,2019,6,23,9,6,160
104670,495819,2019-06-09,SpecialNo7,5050,120,0,120,0,5,0,5,0,2019,6,23,9,6,160
104669,495819,2019-06-09,SpecialNo6,1844,1187,0,15,0,1,8,4,4,2019,6,23,9,6,160
104668,495819,2019-06-09,SpecialNo5,9517,603,0,7,0,9,5,1,7,2019,6,23,9,6,160
104667,495819,2019-06-09,SpecialNo4,3669,14,0,14,0,3,6,6,9,2019,6,23,9,6,160
104666,495819,2019-06-09,SpecialNo3,1218,1628,0,147,0,1,2,1,8,2019,6,23,9,6,160
104665,495819,2019-06-09,SpecialNo2,791,613,0,28,0,0,7,9,1,2019,6,23,9,6,160
104664,495819,2019-06-09,SpecialNo10,6223,735,0,25,0,6,2,2,3,2019,6,23,9,6,160
104663,495819,2019-06-09,SpecialNo1,9569,705,0,95,0,9,5,6,9,2019,6,23,9,6,160


In [117]:
# Derive number of combinations for the past X draw
X = 3
total_numbers = 23 * X
print(total_numbers)

69


In [118]:
def get_combinations(df, dates):
    total_distinct_combinations = 0
    total_random_combinations = 0
    combination_list = {}
    
    for no in df[df.DrawDate.isin(dates)].LuckyNo:
        found = False
        for k, v in combination_list.items():
            perms = get_permutations(k)
            if no in perms:
                found = True
                combination_list[k] = combination_list.get(k) + 1
            
        if not found:
            combination_list[no] = 0
    return combination_list


# get_combinations(df,['2019-06-09', '2019-06-08']) 
# Past 10 draws
result = get_combinations(df,df.DrawDate[0:total_numbers].unique()) 
print(total_numbers, len(result), result)

69 64 {2976: 0, 397: 2, 3669: 0, 9517: 0, 5050: 0, 6223: 0, 4622: 0, 6788: 0, 1218: 0, 791: 0, 3153: 0, 8611: 0, 1132: 0, 3596: 0, 5031: 0, 1232: 0, 483: 0, 1808: 0, 1844: 0, 1219: 0, 9569: 0, 8902: 0, 9531: 1, 7287: 0, 2927: 0, 4206: 0, 3397: 0, 7351: 0, 4031: 0, 3925: 0, 1144: 0, 6775: 0, 7829: 0, 4703: 0, 7758: 0, 4591: 1, 5228: 0, 307: 0, 2971: 0, 6: 0, 6780: 1, 6964: 0, 6801: 0, 6894: 0, 5901: 0, 1678: 0, 6090: 0, 4819: 0, 9927: 0, 1314: 0, 1715: 0, 3252: 0, 2177: 0, 977: 0, 1826: 0, 4486: 0, 7001: 0, 9713: 0, 5099: 0, 6576: 0, 6276: 0, 7389: 0, 1904: 0, 3610: 0}


#### Cumulative Probability Exact Match

In [133]:
df['CumProbaExactMatch'] = 0
df.sort_values(by=['DrawDate', 'PrizeType'], ascending=True, inplace=True)
df[df.LuckyNo.isin(get_permutations(2646))].tail(10)

Unnamed: 0,DrawNo,DrawDate,PrizeType,LuckyNo,DaysSinceLastExactMatch,DaysUntilNextExactMatch,DaysSinceLastAnyMatch,DaysUntilNextAnyMatch,1st_digit,2st_digit,3st_digit,4st_digit,DrawYear,DrawMonth,DrawWeek,DrawDay,DrawDayofweek,DrawDayofyear,CumProbaExactMatch
100843,479218,2018-06-26,ConsolationNo8,6264,314,0,108,8,6,2,6,4,2018,6,26,26,1,177,0
100968,479718,2018-07-04,SpecialNo8,6642,123,172,8,77,6,6,4,2,2018,7,27,4,2,185,0
101812,483418,2018-09-19,SpecialNo10,6246,1869,158,77,95,6,2,4,6,2018,9,38,19,2,262,0
102879,488118,2018-12-23,1stPrizeNo,6642,172,0,95,27,6,6,4,2,2018,12,51,23,6,357,0
103165,489319,2019-01-19,ConsolationNo7,4662,1652,0,27,36,4,6,6,2,2019,1,3,19,5,19,0
103599,491219,2019-02-24,ConsolationNo4,6246,158,66,36,41,6,2,4,6,2019,2,8,24,6,55,0
104010,493019,2019-04-06,ConsolationNo10,6426,832,0,41,25,6,4,2,6,2019,4,14,6,5,96,0
104268,494119,2019-05-01,ConsolationNo6,6246,66,0,25,31,6,2,4,6,2019,5,18,1,2,121,0
104570,495419,2019-06-01,ConsolationNo9,4626,1424,0,31,1,4,6,2,6,2019,6,22,1,5,152,0
104581,495519,2019-06-02,1stPrizeNo,2646,449,0,1,0,2,6,4,6,2019,6,22,2,6,153,0


In [135]:
df[df.LuckyNo.isin([2646])].tail(10)

Unnamed: 0,DrawNo,DrawDate,PrizeType,LuckyNo,DaysSinceLastExactMatch,DaysUntilNextExactMatch,DaysSinceLastAnyMatch,DaysUntilNextAnyMatch,1st_digit,2st_digit,3st_digit,4st_digit,DrawYear,DrawMonth,DrawWeek,DrawDay,DrawDayofweek,DrawDayofyear,CumProbaExactMatch
25544,151799,1999-05-26,SpecialNo10,2646,781,20,29,20,2,6,4,6,1999,5,21,26,2,146,0
25756,152699,1999-06-15,SpecialNo6,2646,20,127,20,4,2,6,4,6,1999,6,24,15,1,166,0
27086,158499,1999-10-20,SpecialNo2,2646,127,213,123,67,2,6,4,6,1999,10,42,20,2,293,0
29275,167900,2000-05-20,SpecialNo6,2646,213,346,13,7,2,6,4,6,2000,5,20,20,5,141,0
32817,183301,2001-05-01,SpecialNo6,2646,346,2324,0,0,2,6,4,6,2001,5,18,1,1,121,0
57213,289507,2007-09-11,ConsolationNo9,2646,2324,2303,38,99,2,6,4,6,2007,9,37,11,1,254,0
82438,399213,2013-12-31,ConsolationNo3,2646,2303,446,97,40,2,6,4,6,2013,12,1,31,1,365,0
87507,421215,2015-03-22,SpecialNo2,2646,446,1084,95,108,2,6,4,6,2015,3,12,22,6,81,0
99714,474318,2018-03-10,ConsolationNo6,2646,1084,449,7,108,2,6,4,6,2018,3,10,10,5,69,0
104581,495519,2019-06-02,1stPrizeNo,2646,449,0,1,0,2,6,4,6,2019,6,22,2,6,153,0


In [156]:
df.head(10)

Unnamed: 0,DrawNo,DrawDate,PrizeType,LuckyNo,DaysSinceLastExactMatch,DaysUntilNextExactMatch,DaysSinceLastAnyMatch,DaysUntilNextAnyMatch,1st_digit,2st_digit,3st_digit,4st_digit,DrawYear,DrawMonth,DrawWeek,DrawDay,DrawDayofweek,DrawDayofyear,CumProbaExactMatch
0,40792,1992-05-06,1stPrizeNo,19,0,28,0,28,0,0,1,9,1992,5,19,6,2,127,0
1,40792,1992-05-06,2ndPrizeNo,1124,0,2436,0,162,1,1,2,4,1992,5,19,6,2,127,0
2,40792,1992-05-06,3rdPrizeNo,592,0,25,0,11,0,5,9,2,1992,5,19,6,2,127,0
3,40792,1992-05-06,ConsolationNo1,5311,0,515,0,204,5,3,1,1,1992,5,19,6,2,127,0
4,40792,1992-05-06,ConsolationNo10,407,0,2114,0,22,0,4,0,7,1992,5,19,6,2,127,0
5,40792,1992-05-06,ConsolationNo2,1949,0,95,0,95,1,9,4,9,1992,5,19,6,2,127,0
6,40792,1992-05-06,ConsolationNo3,1606,0,351,0,35,1,6,0,6,1992,5,19,6,2,127,0
7,40792,1992-05-06,ConsolationNo4,3775,0,494,0,126,3,7,7,5,1992,5,19,6,2,127,0
8,40792,1992-05-06,ConsolationNo5,6226,0,494,0,232,6,2,2,6,1992,5,19,6,2,127,0
9,40792,1992-05-06,ConsolationNo6,1271,0,410,0,119,1,2,7,1,1992,5,19,6,2,127,0


In [177]:
for index, row in df[df.LuckyNo.isin([2646])].iterrows():
    #print(index)
    print(row['DrawDate'],len(df.iloc[0:index]))
    #print(row['DrawDate'])

1993-07-01 00:00:00 4164
1994-10-08 00:00:00 8751
1997-04-05 00:00:00 17714
1999-05-26 00:00:00 25544
1999-06-15 00:00:00 25756
1999-10-20 00:00:00 27086
2000-05-20 00:00:00 29275
2001-05-01 00:00:00 32817
2007-09-11 00:00:00 57213
2013-12-31 00:00:00 82438
2015-03-22 00:00:00 87507
2018-03-10 00:00:00 99714
2019-06-02 00:00:00 104581


#### Digits Digits Average Frequencies by Position

#### Digits Pairings

In [59]:
#### Visualisation

#### Digits Rolling Mean

In [33]:
# df['1st_digit_rolling_mean'] = df['1st_digit'].rolling(window=23).mean()
# df['2st_digit_rolling_mean'] = df['2st_digit'].rolling(window=23).mean()
# df['3st_digit_rolling_mean'] = df['3st_digit'].rolling(window=23).mean()
# df['4st_digit_rolling_mean'] = df['4st_digit'].rolling(window=23).mean()
# df['total_digits_rolling_mean'] = (df['1st_digit_rolling_mean'] + df['2st_digit_rolling_mean'] + df['3st_digit_rolling_mean'] + df['4st_digit_rolling_mean']) / 4

In [34]:
# df.head(46)