In [61]:
# Library Imports
import csv
import random
import pandas as pd
import time
from itertools import islice
from enum import Enum
import json

In [81]:
# Previous Lotto Records into Lists
wedLotto = pd.read_csv('Crawled_MonWed.csv')
satLotto = pd.read_csv('Crawled_Sat.csv')

# Remove ProductID
wedLotto = pd.DataFrame(wedLotto)
satLotto = pd.DataFrame(satLotto)

In [91]:
# Go through the previous draws and mark the relations between numbers
wedRelations = {}
satRelations = {}

# Conver string representation of list into list
def strList_to_list(strList: str) -> list:
    ret_list = []
    try:
        ret_list = json.loads(strList)
        return ret_list
    except:
        print("strList to List Failed: ", strList)
        return []

# list, set, dict are mutable -> pass by reference (if not reassign)
# ig. relationSetter(wedRelations, wedLotto)
def relationSetter(relations, prev_draws) -> dict:
    # String List to Int List
    for _, draw in prev_draws.iterrows():
        try:
            drawnTicket = strList_to_list(draw['PrimaryNumbers']) + strList_to_list(draw['SecondaryNumbers'])
        except:
            print('relationSetter Concat Failed')
            continue
            
            
        for num in drawnTicket:
            if num not in relations:
                relations[num] = {}
                relations[num]['reps'] = 0
                
            for numNum in drawnTicket:
                if num != numNum: # Not repeating myself
                    if numNum not in relations[num]:
                        relations[num][numNum] = 0
                    else:
                        relations[num][numNum] += 1
            relations[num]['reps'] += 1

In [92]:
# Get relation between the numbers
relationSetter(wedRelations, wedLotto)
relationSetter(satRelations, satLotto)

In [93]:
df_wed = pd.DataFrame.from_dict(wedRelations)
df_sat = pd.DataFrame.from_dict(satRelations)

In [95]:
# Retriev relations with other nums
def get_corr_nums(data):
    # Default Setup for Return Dictionary
    ret_data = {}
    ret_data['nums'] = {}
    ret_data['reps'] = data['reps']
    
    # Sort by repetitions of each num
    data.pop('reps')
    sorted_data = data.sort_values(ascending = False)
    # For threshold 
    overall_data_mean = sorted_data.mean()
    
    # Get corr_nums from sorted_data
    for key, value in sorted_data.items():
        ret_data['nums'][key] = value
    
    # Set basic info for the chosen corr_nums
    ret_data['overall_mean'] = round(overall_data_mean, 2)
     
    return ret_data

In [96]:
# get Relations per each num into DataFrame
df_wed_corrNums = {}
df_sat_corrNums = {}

for num in range(1, 46):
    df_wed_corrNums[num] = get_corr_nums(df_wed.get(num))
    
for num in range(1, 46):
    df_sat_corrNums[num] = get_corr_nums(df_sat.get(num))
    

In [394]:
# 1 game = 1 ticket with 8 numbers
# 1 play = n games

def random_picker(value):
    return random.choice([x for x in range(1,46) if x != value])

# pref_start_num = given Starting number
def ticket_generator(corr_data, 
                     threshold, 
                     n_top_nums = 5,
                     pref_start_num = None) -> dict:
    
    ticket = {}
    one_ticket_length = 8
    perc_sum = 0
    
    # 1. Choose one number randomly
    chosen_num_perc = 0
    chosen_num = pref_start_num if pref_start_num != None else random_picker(0)
    ticket['ticket'] = [chosen_num] 
    
    # 1.1 Slicing for the first num
    # Prevent setting n_top_nums to be 0
    n_nums = max(1, n_top_nums)
    isValidNum = True if corr_data.get(chosen_num, None) != None else False

    # Guard Count to Avoid infinte loop
    guardCount = 0
    
    # 2. Pick Follow Nums in the ticket
    while(len(ticket['ticket']) < one_ticket_length):
        guardCount += 1
        # Get valid Num      
        while not isValidNum or guardCount > 40:
            chosen_num = random_picker(chosen_num)
            chosen_num_perc = 0
            if corr_data.get(chosen_num, None) != None:
                isValidNum = True
                guardCount = 0
                       
        # Has valid chosen_num -> slice
        chosen_num_data = corr_data[chosen_num]
        num_slices = n_nums if guardCount < 10 else n_nums + guardCount // 10
        slice_len = min(n_nums, len(chosen_num_data['nums']))
        available_nums = islice(chosen_num_data['nums'].items(), slice_len)
        
        # Check if new before append the chosen Num
        if chosen_num not in ticket['ticket']:
            ticket['ticket'].append(chosen_num)
            perc_sum += chosen_num_perc
            guardCount = 0
            # print(chosen_num_item, chosen_num_perc, perc_sum)
            
        # Get new chosen_num
        chosen_num_item = random.choice(list(available_nums))
        chosen_num = chosen_num_item[0]
        chosen_num_perc = round ((chosen_num_item[1] / chosen_num_data['overall_mean'])*100 - 100, 2)

    # 4. Add average correlation value into the ticket
    ticket['perc_mean'] = round(perc_sum / one_ticket_length, 2)
    return ticket

In [395]:
# Classes - Different Lottery has diff rules

# Lottery
class Lotto(Enum):
    WED = 'MonWedLotto'
    SAT = 'TattsLotto'

class Division(Enum):
    first = "First"
    second = "Second"
    third = "Third"
    fourth = "Fourth"
    fifth = "Fifth"
    sixth = "Sixth"
    error = "error"

In [396]:
# Division Calculator - per each ticket
# lotto - Lotto Class
# Draw - Winning Numbers
def division_calculator(lotto, draw, ticket):
    if lotto == Lotto.WED:
        return wed_division_calculator(draw, ticket)
    else:
        return sat_division_calculator(draw, ticket)
    
def wed_division_calculator(draw, ticket) -> str:
    winning_match = 0
    supp_match = 0
    
    # theta(n^2) = 8^2 = 64 per game => Can be seen as O(1)
    # Count Matches
    for index, draw_num in enumerate(draw):
        for ticket_num in ticket:
            if ticket_num == draw_num:
                if index > 5:
                    supp_match += 1
                else:
                    winning_match += 1
    
    # Division Calc
    # 1. 6 winnings 2. 5 winnings + 1 supp 3. 5 winnings
    # 4. 4 winnings 5. 3 winnings + 2 supp 
    # 6. total 3 (winning 1 + 2, 2 + 1)
    if winning_match == 6:
        return Division.first.value
    
    if winning_match == 5:
        if supp_match == 1:
            return Division.second.value
        else:
            return Division.third.value
    
    if winning_match == 4:
        return  Division.fourth.value

    if winning_match == 3 and supp_match == 2:
        return Division.fifth.value
    
    if winning_match + supp_match == 3:
        return Division.sixth.value

    return Division.error.value

def sat_division_calculator(draw, ticket) -> str:
    winning_match = 0
    supp_match = 0
    
    # theta(n^2) = 8^2 = 64 per game => Can be seen as O(1)
    # Count Matches
    for index, draw_num in enumerate(draw):
        for ticket_num in ticket:
            if ticket_num == draw_num:
                if index > 5:
                    supp_match += 1
                else:
                    winning_match += 1
    
    # Division Calc
    # 1. 6 winnings 2. 5 winnings + 1 supp 3. 5 winnings
    # 4. 4 winnings 5. 3 winnings + 1 supp 6. 3 winnings
    if winning_match == 6:
        return Division.first.value
    
    if winning_match == 5:
        if supp_match == 1:
            return Division.second.value
        else:
            return Division.third.value
    
    if winning_match == 4:
        return  Division.fourth.value

    if winning_match == 3 and supp_match == 2:
            return Division.fifth.value
        
    if winning_match == 3:
        return Division.sixth.value
    
    return Division.error.value

In [397]:
# Game - n tickets 
def game_marker(lotto, draw, game) -> dict:
    game_result = {}
    for ticket in game:
        result = division_calculator(lotto, draw, ticket)
        current_value = game_result.get(result, 0)
        game_result[result] = current_value + 1
    
    return game_result


In [398]:

def gameGenerator(game_data, num_tickets, best_n = 5, start_no = None) -> list:
    games = []
    for i in range (0,num_tickets):
        games.append(ticket_generator(game_data, 0.5, best_n, start_no))
        
    return games

In [399]:
ticket_generator(df_wed_corrNums, 0.5, 5)

{'ticket': [11, 35, 7, 45, 41, 27, 34, 20], 'perc_mean': 21.56}

In [401]:
gameGenerator(df_wed_corrNums, 10, 5, None)

[{'ticket': [41, 37, 5, 14, 42, 32, 45, 27], 'perc_mean': 27.88},
 {'ticket': [4, 13, 23, 22, 28, 30, 40, 44], 'perc_mean': 25.26},
 {'ticket': [1, 41, 38, 37, 14, 42, 13, 40], 'perc_mean': 31.72},
 {'ticket': [31, 13, 42, 32, 40, 16, 44, 28], 'perc_mean': 29.47},
 {'ticket': [1, 43, 41, 37, 40, 11, 26, 44], 'perc_mean': 25.66},
 {'ticket': [38, 4, 45, 32, 30, 16, 43, 7], 'perc_mean': 27.09},
 {'ticket': [45, 27, 44, 13, 14, 37, 41, 42], 'perc_mean': 24.69},
 {'ticket': [4, 13, 23, 39, 14, 22, 42, 40], 'perc_mean': 29.04},
 {'ticket': [3, 23, 39, 28, 22, 26, 14, 42], 'perc_mean': 29.74},
 {'ticket': [5, 44, 16, 30, 24, 43, 7, 35], 'perc_mean': 30.3}]

In [362]:
############### Plot Generating

In [363]:
# Want to provide following info: Top 10 Closely-related numbers, and mean without those 10.
# in histogram plot with proper label.
# Plot can be drawn by APIs (whether CSS or python) manipulating those infos.

def each_num_info(num_data, n):
    listed_data = list(num_data.items())
    length_listed_data = len(listed_data)
    
    top_n_data = []
    index_data_seg = min(n, length_listed_data)
    for index in range(0, index_data_seg):
        top_n_data.append(listed_data[index])
    
    rest_data_sum = 0
    no_rest_data = length_listed_data - index_data_seg - 1
    for index in range(index_data_seg, len(listed_data) - 1):
        rest_data_sum += listed_data[index][1]
    
    return top_n_data, (rest_data_sum // no_rest_data, no_rest_data)


def retrieve_plot_info(corrNums_data, n):
    plot_dict = {}
    
    for num, num_data in corrNums_data.items():
        plot_dict[num] = {}
        n_data_list, rest_data_tuple = each_num_info(num_data['nums'], n)
        plot_dict[num]['top_n'] = n_data_list
        plot_dict[num]['exc_n'] = rest_data_tuple
        plot_dict[num]['reps'] = num_data['reps'] 
        
    return plot_dict

In [364]:
df_wed_plot = retrieve_plot_info(df_wed_corrNums, 4)
df_sat_plot = retrieve_plot_info(df_sat_corrNums, 4)

In [None]:
df_wed_plot.to_csv('Crawled_MonWed.csv', encoding='utf-8', index=False)
df_sat_plot.to_csv('Crawled_Sat.csv', encoding='utf-8', index=False)

In [650]:
df = pd.DataFrame(df_wed_plot)

In [656]:
df

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,36,37,38,39,40,41,42,43,44,45
top_n,"[(24, 38.0), (39, 35.0), (43, 33.0), (41, 32.0)]","[(42, 32.0), (24, 32.0), (30, 30.0), (29, 29.0)]","[(23, 41.0), (16, 41.0), (4, 40.0), (24, 36.0)]","[(3, 40.0), (38, 36.0), (20, 36.0), (45, 35.0)]","[(14, 37.0), (37, 34.0), (38, 33.0), (44, 32.0)]","[(25, 34.0), (40, 33.0), (37, 32.0), (11, 31.0)]","[(35, 35.0), (40, 34.0), (31, 34.0), (43, 32.0)]","[(40, 36.0), (14, 35.0), (39, 35.0), (44, 34.0)]","[(23, 31.0), (8, 31.0), (13, 28.0), (14, 28.0)]","[(13, 36.0), (40, 35.0), (44, 34.0), (42, 32.0)]",...,"[(24, 38.0), (45, 33.0), (16, 32.0), (14, 31.0)]","[(38, 37.0), (14, 35.0), (41, 35.0), (5, 34.0)]","[(37, 37.0), (41, 36.0), (4, 36.0), (30, 34.0)]","[(28, 39.0), (44, 39.0), (13, 37.0), (16, 37.0)]","[(42, 41.0), (13, 39.0), (11, 37.0), (8, 36.0)]","[(45, 37.0), (38, 36.0), (37, 35.0), (20, 33.0)]","[(40, 41.0), (16, 40.0), (32, 39.0), (14, 38.0)]","[(24, 38.0), (30, 35.0), (1, 33.0), (7, 32.0)]","[(33, 40.0), (39, 39.0), (40, 36.0), (13, 36.0)]","[(27, 38.0), (41, 37.0), (39, 36.0), (24, 36.0)]"
exc_n,"(22.0, 40)","(23.0, 40)","(24.0, 40)","(26.0, 40)","(24.0, 40)","(23.0, 40)","(23.0, 40)","(24.0, 40)","(21.0, 40)","(24.0, 40)",...,"(24.0, 40)","(27.0, 40)","(25.0, 40)","(27.0, 40)","(27.0, 40)","(26.0, 40)","(28.0, 40)","(24.0, 40)","(27.0, 40)","(29.0, 40)"
reps,157.0,158.0,170.0,179.0,168.0,160.0,162.0,168.0,144.0,167.0,...,166.0,181.0,173.0,183.0,184.0,177.0,192.0,165.0,187.0,194.0
