In [2]:
# Library Imports
import csv
import random
import pandas as pd
import time
from itertools import islice
from enum import Enum
import json


In [17]:
# Previous Lotto Records into Lists
wedLotto = pd.read_csv('csv/Crawled_MonWed.csv')
satLotto = pd.read_csv('csv/Crawled_Sat.csv')

# Remove ProductID
wedLotto = pd.DataFrame(wedLotto)
satLotto = pd.DataFrame(satLotto)

In [26]:
# Go through the previous draws and mark the relations between numbers
wedRelations = {}
satRelations = {}

# Conver string representation of list into list
def strList_to_list(strList: str) -> list:
    ret_list = []
    try:
        ret_list = json.loads(strList)
        return ret_list
    except:
        print("strList to List Failed: ", strList)
        return []

# list, set, dict are mutable -> pass by reference (if not reassign)
# ig. relationSetter(wedRelations, wedLotto)
def relationSetter(relations, prev_draws) -> dict:
    # String List to Int List
    for _, draw in prev_draws.iterrows():
        try:
            drawnTicket = strList_to_list(draw['PrimaryNumbers']) + strList_to_list(draw['SecondaryNumbers'])
        except:
            print('relationSetter Concat Failed')
            continue
            
            
        for num in drawnTicket:
            if num not in relations:
                relations[num] = {}
                relations[num]['reps'] = 0
                
            for numNum in drawnTicket:
                if num != numNum: # Not repeating myself
                    if numNum not in relations[num]:
                        relations[num][numNum] = 0
                    else:
                        relations[num][numNum] += 1
            relations[num]['reps'] += 1

In [27]:
# Get relation between the numbers
relationSetter(wedRelations, wedLotto)
relationSetter(satRelations, satLotto)

In [28]:
df_wed = pd.DataFrame.from_dict(wedRelations)
df_sat = pd.DataFrame.from_dict(satRelations)

In [29]:
# Retriev relations with other nums
def get_corr_nums(data):
    # Default Setup for Return Dictionary
    ret_data = {}
    ret_data['nums'] = {}
    ret_data['reps'] = data['reps']
    
    # Sort by repetitions of each num
    data.pop('reps')
    sorted_data = data.sort_values(ascending = False)
    # For threshold 
    overall_data_mean = sorted_data.mean()
    
    # Get corr_nums from sorted_data
    for key, value in sorted_data.items():
        ret_data['nums'][key] = value
    
    # Set basic info for the chosen corr_nums
    ret_data['overall_mean'] = round(overall_data_mean, 2)
     
    return ret_data

In [30]:
# get Relations per each num into DataFrame
df_wed_corrNums = {}
df_sat_corrNums = {}

for num in range(1, 46):
    df_wed_corrNums[num] = get_corr_nums(df_wed.get(num))
    
for num in range(1, 46):
    df_sat_corrNums[num] = get_corr_nums(df_sat.get(num))
    

In [31]:
# 1 game = 1 ticket with 8 numbers
# 1 play = n games

def random_picker(value):
    return random.choice([x for x in range(1,46) if x != value])

# pref_start_num = given Starting number
def ticket_generator(corr_data, 
                     threshold, 
                     n_top_nums = 5,
                     pref_start_num = None) -> dict:
    
    ticket = {}
    one_ticket_length = 8
    perc_sum = 0
    
    # 1. Choose one number randomly
    chosen_num_perc = 0
    chosen_num = pref_start_num if pref_start_num != None else random_picker(0)
    ticket['ticket'] = [chosen_num] 
    
    # 1.1 Slicing for the first num
    # Prevent setting n_top_nums to be 0
    n_nums = max(1, n_top_nums)
    isValidNum = True if corr_data.get(chosen_num, None) != None else False

    # Guard Count to Avoid infinte loop
    guardCount = 0
    
    # 2. Pick Follow Nums in the ticket
    while(len(ticket['ticket']) < one_ticket_length):
        guardCount += 1
        # Get valid Num      
        while not isValidNum or guardCount > 40:
            chosen_num = random_picker(chosen_num)
            chosen_num_perc = 0
            if corr_data.get(chosen_num, None) != None:
                isValidNum = True
                guardCount = 0
                       
        # Has valid chosen_num -> slice
        chosen_num_data = corr_data[chosen_num]
        num_slices = n_nums if guardCount < 10 else n_nums + guardCount // 10
        slice_len = min(n_nums, len(chosen_num_data['nums']))
        available_nums = islice(chosen_num_data['nums'].items(), slice_len)
        
        # Check if new before append the chosen Num
        if chosen_num not in ticket['ticket']:
            ticket['ticket'].append(chosen_num)
            perc_sum += chosen_num_perc
            guardCount = 0
            # print(chosen_num_item, chosen_num_perc, perc_sum)
            
        # Get new chosen_num
        chosen_num_item = random.choice(list(available_nums))
        chosen_num = chosen_num_item[0]
        chosen_num_perc = round ((chosen_num_item[1] / chosen_num_data['overall_mean'])*100 - 100, 2)

    # 4. Add average correlation value into the ticket
    ticket['perc_mean'] = round(perc_sum / one_ticket_length, 2)
    return ticket

In [32]:
# Classes - Different Lottery has diff rules

# Lottery
class Lotto(Enum):
    WED = 'MonWedLotto'
    SAT = 'TattsLotto'

class Division(Enum):
    first = "First"
    second = "Second"
    third = "Third"
    fourth = "Fourth"
    fifth = "Fifth"
    sixth = "Sixth"
    error = "error"

In [33]:
# Division Calculator - per each ticket
# lotto - Lotto Class
# Draw - Winning Numbers
def division_calculator(lotto, draw, ticket):
    if lotto == Lotto.WED:
        return wed_division_calculator(draw, ticket)
    else:
        return sat_division_calculator(draw, ticket)
    
def wed_division_calculator(draw, ticket) -> str:
    winning_match = 0
    supp_match = 0
    
    # theta(n^2) = 8^2 = 64 per game => Can be seen as O(1)
    # Count Matches
    for index, draw_num in enumerate(draw):
        for ticket_num in ticket:
            if ticket_num == draw_num:
                if index > 5:
                    supp_match += 1
                else:
                    winning_match += 1
    
    # Division Calc
    # 1. 6 winnings 2. 5 winnings + 1 supp 3. 5 winnings
    # 4. 4 winnings 5. 3 winnings + 2 supp 
    # 6. total 3 (winning 1 + 2, 2 + 1)
    if winning_match == 6:
        return Division.first.value
    
    if winning_match == 5:
        if supp_match == 1:
            return Division.second.value
        else:
            return Division.third.value
    
    if winning_match == 4:
        return  Division.fourth.value

    if winning_match == 3 and supp_match == 2:
        return Division.fifth.value
    
    if winning_match + supp_match == 3:
        return Division.sixth.value

    return Division.error.value

def sat_division_calculator(draw, ticket) -> str:
    winning_match = 0
    supp_match = 0
    
    # theta(n^2) = 8^2 = 64 per game => Can be seen as O(1)
    # Count Matches
    for index, draw_num in enumerate(draw):
        for ticket_num in ticket:
            if ticket_num == draw_num:
                if index > 5:
                    supp_match += 1
                else:
                    winning_match += 1
    
    # Division Calc
    # 1. 6 winnings 2. 5 winnings + 1 supp 3. 5 winnings
    # 4. 4 winnings 5. 3 winnings + 1 supp 6. 3 winnings
    if winning_match == 6:
        return Division.first.value
    
    if winning_match == 5:
        if supp_match == 1:
            return Division.second.value
        else:
            return Division.third.value
    
    if winning_match == 4:
        return  Division.fourth.value

    if winning_match == 3 and supp_match == 2:
            return Division.fifth.value
        
    if winning_match == 3:
        return Division.sixth.value
    
    return Division.error.value

In [34]:
# Marking your generated tickets against previous draws
def game_marker(lotto, draw, game) -> dict:
    game_result = {}
    for ticket in game:
        result = division_calculator(lotto, draw, ticket)
        current_value = game_result.get(result, 0)
        game_result[result] = current_value + 1
    
    return game_result

# Game - n tickets 
def gameGenerator(game_data, num_tickets, best_n = 5, start_no = None) -> list:
    games = []
    for i in range (0,num_tickets):
        games.append(ticket_generator(game_data, 0.5, best_n, start_no))
        
    return games

In [36]:
# Ignore 0.5 - threshold for future implemenation
# 5 - top_n elements you want to generate from the data.

ticket_generator(df_wed_corrNums, 0.5, 5)

{'ticket': [26, 22, 13, 28, 42, 16, 39, 14], 'perc_mean': 31.71}

In [37]:
# By setting None -> Random First number pick
# Otherwise, put number in it
gameGenerator(df_wed_corrNums, 10, 5, None)

[{'ticket': [34, 4, 13, 23, 42, 40, 11, 35], 'perc_mean': 27.28},
 {'ticket': [45, 27, 32, 42, 13, 23, 22, 31], 'perc_mean': 27.12},
 {'ticket': [21, 41, 45, 27, 32, 39, 28, 22], 'perc_mean': 23.74},
 {'ticket': [37, 41, 42, 40, 11, 16, 3, 4], 'perc_mean': 30.97},
 {'ticket': [12, 33, 44, 39, 45, 24, 16, 30], 'perc_mean': 34.17},
 {'ticket': [42, 32, 30, 40, 11, 35, 19, 20], 'perc_mean': 26.75},
 {'ticket': [45, 41, 37, 38, 17, 14, 5, 8], 'perc_mean': 23.63},
 {'ticket': [26, 22, 23, 39, 13, 42, 32, 30], 'perc_mean': 28.55},
 {'ticket': [43, 30, 32, 42, 16, 3, 29, 39], 'perc_mean': 28.17},
 {'ticket': [5, 45, 41, 37, 20, 42, 14, 38], 'perc_mean': 22.75}]

In [38]:
############### Plot Generating

In [39]:
# Want to provide following info: Top 10 Closely-related numbers, and mean without those 10.
# in histogram plot with proper label.
# Plot can be drawn by APIs (whether CSS or python) manipulating those infos.

def each_num_info(num_data, n):
    listed_data = list(num_data.items())
    length_listed_data = len(listed_data)
    
    top_n_data = []
    index_data_seg = min(n, length_listed_data)
    for index in range(0, index_data_seg):
        top_n_data.append(listed_data[index])
    
    rest_data_sum = 0
    no_rest_data = length_listed_data - index_data_seg - 1
    for index in range(index_data_seg, len(listed_data) - 1):
        rest_data_sum += listed_data[index][1]
    
    return top_n_data, (rest_data_sum // no_rest_data, no_rest_data)


def retrieve_plot_info(corrNums_data, n):
    plot_dict = {}
    
    for num, num_data in corrNums_data.items():
        plot_dict[num] = {}
        n_data_list, rest_data_tuple = each_num_info(num_data['nums'], n)
        plot_dict[num]['top_n'] = n_data_list
        plot_dict[num]['exc_n'] = rest_data_tuple
        plot_dict[num]['reps'] = num_data['reps'] 
        
    return plot_dict

In [193]:
# Data Generations

wed_plot_data = retrieve_plot_info(df_wed_corrNums, 4)
sat_plot_data = retrieve_plot_info(df_sat_corrNums, 4)
df_wed_plot = pd.DataFrame(wed_plot_data)
df_sat_plot = pd.DataFrame(sat_plot_data)
df_wed_plot.to_csv('csv/plot_MonWed.csv', encoding='utf-8', index=False)
df_sat_plot.to_csv('csv/plot_Sat.csv', encoding='utf-8', index=False)

In [167]:
# Plot top_n vs Avg.rests in perc_mean

import plotly.graph_objects as go
import plotly.io as pio
import os.path

def plot_graph_all(data, game_name: str):
    top_n = len(data[1]['top_n'])
    dir_name = game_name + "_top_" + str(top_n) + "_plots"
    file_name_template = game_name + "_top_" + str(top_n) + "_for_"

    for num, data in data.items():
        title_name = "Top " + str(top_n) + " Closely-Related numbers for " + str(num)
        file_name = file_name_template + str(num)
        colors = ['lightslategray',] * (top_n + 1)
        colors[-1] = 'crimson'
        x_no = []
        y_reps = []
        
        for num_data in data['top_n']:
            x_no.append(str(num_data[0]))
            y_reps.append(num_data[1])
        
        x_no.append("Avg.rests")
        y_reps.append(data['exc_n'][0])
        
        fig = go.Figure(data=[go.Bar(
            x = x_no,
            y = y_reps,
            marker_color=colors
        )])
        
        fig.update_layout(title_text=title_name)
        
        if not os.path.isdir(dir_name):
            os.mkdir(dir_name)
            
        pio.write_html(fig, file= dir_name + "/" + file_name + '.html', auto_open=False)
            

In [192]:
# Plot Generated Tickets

import plotly.graph_objects as go
from plotly.subplots import make_subplots

def table_plot_game(game, game_name):
    fig = make_subplots(
        rows=1, cols=1,
        shared_xaxes=True,
        vertical_spacing=0.04,
        specs=[[{"type": "table"}]]
    )

    table_cols = ["Ticket No"]
    for i in range(len(df['ticket'][0])):
        table_cols.append('#' + str(i+1))

    table_cols.append('perc_mean')


    fig.add_trace(
        go.Table(
            header=dict(
                values= table_cols,
                font=dict(size=10),
                align="left"
            ),
            cells=dict(
                values=[df.index.values + 1] + 
                [num for num in zip(*df['ticket'])] +
                [[str(mean) + ' %' for mean in df['perc_mean'].tolist()]] ,
                align = "left")
        ),
        row=1, col=1
    )
    fig.update_layout(
        height=600,
        width=900,
        showlegend=False,
        title_text="Generated 10 Tickets of " + game_name,
    )

    fig.show()
    pio.write_html(fig, file= 'generated_tickets.html', auto_open=False)
    
game = gameGenerator(df_wed_corrNums, 10, 5, None)
table_plot_game(game, "MonWedLotto")