In [10]:
# Import Libraries
from math import factorial
import random
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

In [3]:
# Load PCSO Lotto Draw Results
url_642 = 'results_6-42_20092017.csv'
url_645 = 'results_6-45_20092017.csv'
url_649 = 'results_6-49_20092017.csv'
url_655 = 'results_6-55_20102017.csv'
url_658 = 'results_6-58_20152017.csv'
names = ['combination', 'date']

data_642 = pd.read_csv(url_642, names=names, delimiter=',', skiprows=1)
data_645 = pd.read_csv(url_645, names=names, delimiter=',', skiprows=1)
data_649 = pd.read_csv(url_649, names=names, delimiter=',', skiprows=1)
data_655 = pd.read_csv(url_655, names=names, delimiter=',', skiprows=1)
data_658 = pd.read_csv(url_658, names=names, delimiter=',', skiprows=1)

In [4]:
print(data_642.head(10))

         combination       date
0  06-41-07-22-39-33   1/3/2009
1  18-42-39-20-22-29   1/6/2009
2  31-04-01-24-12-41  1/10/2009
3  08-38-01-40-34-32  1/13/2009
4  31-02-06-15-14-04  1/17/2009
5  02-31-27-10-37-05  1/20/2009
6  20-21-22-03-15-02  1/24/2009
7  29-32-41-38-05-08  1/27/2009
8  30-41-39-17-07-38  1/31/2009
9  10-11-06-19-21-15   2/3/2009


In [5]:
# Prepare Combination Probabilities for Theoretical Analysis
def comb(n,k):
    return factorial(n)/(factorial(k)*factorial(n-k))

def comb_oddeven(n,i):
    return comb(n//2,i)*comb(n-(n//2),6-i) # Even * Odd

prob_642 = []
prob_645 = []
prob_649 = []
prob_655 = []
prob_658 = []

# i is no. of even numbers
for i in range(7):
    prob_642.append(comb_oddeven(42,i)/float(comb(42,6)))
    prob_645.append(comb_oddeven(45,i)/float(comb(45,6)))
    prob_649.append(comb_oddeven(49,i)/float(comb(49,6)))
    prob_655.append(comb_oddeven(55,i)/float(comb(55,6)))
    prob_658.append(comb_oddeven(58,i)/float(comb(58,6)))

# Compile all probabilities in a dict
prob_dict = {
    0: prob_642,
    1: prob_645,
    2: prob_649,
    3: prob_655,
    4: prob_658
}

In [6]:
print(data_642[['combination']].iloc[0])
print(data_642.values[0,0])

combination    06-41-07-22-39-33
Name: 0, dtype: object
06-41-07-22-39-33


In [7]:
# Count Odd-Even Combinations
def data_count_oddeven(data):
    r = np.zeros((data.shape[0],2)) # Even, Odd
    for i in range(data.shape[0]):
        temp = (data.values[i,0]).split('-')
        for j in temp:
            if int(j)%2==1:
                r[i,1] += 1
            else:
                r[i,0] += 1
    return r

def table_count_oddeven(r):
    count = [0,0,0,0,0,0,0]
    for i in range(len(r)):
        count[int(r[i,0])] += 1
    return count

oddeven_642 = data_count_oddeven(data_642)
oddeven_645 = data_count_oddeven(data_645)
oddeven_649 = data_count_oddeven(data_649)
oddeven_655 = data_count_oddeven(data_655)
oddeven_658 = data_count_oddeven(data_658)

# Compile all odd-even counts in a dict
oddeven_dict = {
    0: oddeven_642,
    1: oddeven_645,
    2: oddeven_649,
    3: oddeven_655,
    4: oddeven_658
}

# TABLE STRUCTURE:
# Rows: 0-6, 1-5, 2-4, 3-3, 4-2, 5-1, 6-0 (E-O)
# Columnns: [0,2,4,6,8] = Theoretical Distribution; [1,3,5,7,9] = Empirical Distribution
oddeven_dist = np.zeros((7,10))
for col in range(5):
    count = table_count_oddeven(oddeven_dict[col])
    for row in range(7):
        oddeven_dist[row,2*col] = prob_dict[col][row]*len(oddeven_dict[col])
        oddeven_dist[row,2*col+1] = count[row]

In [8]:
# Dump Results to CSV File
np.savetxt('oddeven_dist.csv', oddeven_dist, delimiter=',')

In [9]:
# Approximation Error Analysis for Odd-Even Distribution (Absolute, Relative)
err = np.zeros((7,10))
for row in range(7):
    for col in range(5):
        err[row,2*col] = abs(oddeven_dist[row,2*col+1]-oddeven_dist[row,2*col]) # Absolute Error
        err[row,2*col+1] = 100*abs(1-oddeven_dist[row,2*col]/oddeven_dist[row,2*col+1]) # Relative Error

# Dump Results to CSV File
np.savetxt('oddeven_err.csv', err, delimiter=',')

In [26]:
# Simulate a Lotto Run of 1000 draws (10 trials) and of 10000 draws; Repeat 100 times
def draw(n, k): # n is number of balls, k is number of balls to draw
    box = random.sample(range(1,n+1), n)
    return box[:k]

# Use 6/45 for illustrative purposes
setup_control = np.zeros((45,1)) # 10000 draws (control)
setup_exp = np.zeros((45,10)) # 1000 draws, 10 trials (experiment)

# Begin Control Run
for d in range(10000):
    results = draw(45, 6)
    for result in results:
        setup_control[result-1,0] += 1

# Begin Experiment Run
for t in range(10):
    for d in range(1000):
        results = draw(45, 6)
        for result in results:
            setup_exp[result-1,t] += 1

# Dump Results to CSV File
np.savetxt('draw_control.csv', setup_control, delimiter=',')
np.savetxt('draw_experiment.csv', setup_exp, delimiter=',')

In [121]:
# Simulate 10,000 draws to check number of winning numbers per draw
numbers_642 = [6, 11, 15, 28, 37, 40]
numbers_645 = [3, 16, 18, 24, 33, 39]
numbers_649 = [9, 13, 20, 30, 38, 41]
numbers_655 = [11, 16, 21, 28, 34, 37]
numbers_658 = [4, 15, 23, 47, 50, 54]

numbers_dict = {
    0: numbers_642,
    1: numbers_645,
    2: numbers_649,
    3: numbers_655,
    4: numbers_658
}
n_list = [42, 45, 49, 55, 58]

hits_df = pd.DataFrame(0, index=['6/42', '6/45', '6/49', '6/55', '6/58'], columns=['0', '1', '2', '3', '4', '5', '6'])

for n in range(5):
    for i in range(100):
        hits = 0
        results = draw(n_list[n], 6)
        for result in results:
            if result in numbers_dict[n]:
                hits += 1
        hits_df.at['6/'+str(n_list[n]), str(hits)] += 1

In [122]:
hits_df.head()

Unnamed: 0,0,1,2,3,4,5,6
6/42,38,38,23,1,0,0,0
6/45,36,51,12,1,0,0,0
6/49,36,41,23,0,0,0,0
6/55,44,35,21,0,0,0,0
6/58,46,46,6,1,1,0,0


In [74]:
# Check how many hits for each unique 6/42 combination
numbers = [
    [1, 2, 3, 4, 5, 6],
    [7, 8, 9, 10, 11, 12],
    [13, 14, 15, 16, 17, 18],
    [19, 20, 21, 22, 23, 24],
    [25, 26, 27, 28, 29, 30],
    [31, 32, 33, 34, 35, 36],
    [37, 38, 39, 40, 41, 42]
]

index_642 = ['1-6', '7-12', '13-18', '19-24', '25-30', '31-36', '37-42']
hits_columns = ['0', '1', '2', '3', '4', '5', '6']
hits_642_df = pd.DataFrame(0, index=index_642, columns=hits_columns)

for n in range(7):
    for i in range(10000):
        hits = 0
        results = draw(42, 6)
        for result in results:
            if result in numbers[n]:
                hits += 1
        hits_642_df.at[index_642[n], str(hits)] += 1

In [75]:
hits_642_df.head(7)

Unnamed: 0,0,1,2,3,4,5,6
1-6,3750,4245,1723,259,23,0,0
7-12,3687,4320,1690,292,11,0,0
13-18,3807,4284,1636,254,19,0,0
19-24,3717,4317,1670,282,14,0,0
25-30,3742,4248,1689,295,25,1,0
31-36,3686,4300,1698,306,9,1,0
37-42,3740,4266,1705,272,17,0,0


In [95]:
result_sum = np.zeros((10000,1))
for i in range(10000):
    result_sum[i] = sum(draw(45, 6))

In [96]:
print sum(result_sum)/10000

[138.0625]
