In [1]:
import datetime as dt
import random

import numpy as np
import pandas as pd
import plotly.express as px

In [2]:
def getData(source_data):
    if(source_data=='from_file'):
        data = pd.read_csv('result.csv', sep=';')
    else:
        data = pd.read_csv('https://www.multipasko.pl/wyniki-csv.php?f=euro-jackpot', sep=';')
        
    data = data.drop('Unnamed: 9', axis=1)
    columns_name = ['id','day','month', 'year','N1','N2','N3','N4','N5','E1','E2']
    data = data.set_axis(columns_name,axis=1, inplace=False)
    data = data.astype({"day": str, "month": str, "year": str})
    data['day'] = data['day'].str.zfill(2)
    data['month'] = data['month'].str.zfill(2)
    return data

In [3]:
def frequencyNumber(data):
    numbers = data.stack().reset_index(drop=True)
    numbers_count = pd.DataFrame(numbers.value_counts(), columns=['Drawn Frequency'])
    numbers_count.reset_index(inplace=True)
    numbers_count.rename(columns = {'index':'Number'}, inplace = True)
    return numbers_count

In [35]:
def plotBar(data,x,y,color,legend):
    fig = px.bar(data, x=x, y=y, color=color)
    # fig.update_xaxes(type='category')

    fig.update_layout(
    title=legend["title"],
    xaxis_title=legend["x_label"],
    yaxis_title=legend["y_label"],
    font=dict(
        family="Courier New, monospace",
        size=18,
        color="RebeccaPurple"
    )
    )

    fig.show()

In [5]:
def topNumbers(data,start_range,end_range,numbers_freq,top_num):
    top_data = pd.DataFrame(numbers_freq.head(top_num))
    top_list = list()
    for i in range(len(top_data)):
        filtr = (data.loc[:,start_range:end_range] == top_data['Number'][i])
        filtr = filtr.sum(axis=1)
        temp = data.drop(data[filtr == 0].index)
        day = str(temp.day.iloc[-1])
        month = str(temp.month.iloc[-1])
        year = str(temp.year.iloc[-1])
        top_list.append(day+"/"+month+"/"+year)

    top_data['Last drawn date (D/M/Y)'] = pd.DataFrame(top_list)
    time_ago = pd.to_datetime(top_data['Last drawn date (D/M/Y)'], format="%d/%m/%Y")
    time_ago = abs((time_ago - pd.Timestamp.today()).dt.days)
    top_data['Days Ago'] = time_ago

    top_data.index += 1
    top_data.style.hide_index()
    return top_data

In [6]:
def mean_std(data,start_range,end_range):
    sum_draw = data.loc[:,start_range:end_range].sum(axis=1)
    sum_draw = pd.DataFrame(sum_draw.value_counts(), columns={'Frequency of sum'})
    sum_draw.reset_index(inplace=True)
    sum_draw.rename(columns = {'index':'Sum of numbers'}, inplace = True)
    # sum_draw.plot.bar(figsize=(25,5), ylabel = 'Number of draws', xlabel = 'Number')
    percent = list()
    std = sum_draw['Sum of numbers'].std()
    mean = sum_draw['Sum of numbers'].mean()
    return [mean,std]

In [7]:
def standardDeviation(data,start_range,end_range):
    sum_draw = data.loc[:,start_range:end_range].sum(axis=1)
    sum_draw = pd.DataFrame(sum_draw.value_counts(), columns={'Frequency of sum'})
    sum_draw.reset_index(inplace=True)
    sum_draw.rename(columns = {'index':'Sum of numbers'}, inplace = True)
    # sum_draw.plot.bar(figsize=(25,5), ylabel = 'Number of draws', xlabel = 'Number')
    percent = list()
    std = sum_draw['Sum of numbers'].std()
    mean = sum_draw['Sum of numbers'].mean()
    for i in range(len(sum_draw)):
        val = sum_draw.loc[i]['Sum of numbers']
        if((val > (mean+std)) | (val < (mean-std))):
            percent.append('> mean ± std')
        else:
            percent.append('mean ± std')

    sum_draw['Std'] = pd.DataFrame(percent)
    return sum_draw

In [8]:
def numberEvenOdd(data,start_range,end_range):

    data = data.loc[:,start_range:end_range]
    width = len(data.columns)
    col_names = list(data.columns)

    even_count = [0] * len(data)
    odd_count = [0] * len(data)

    for n in range(width):
        temp = list(data[col_names[n]])

        for i in range(len(temp)):
            if temp[i] % 2 == 0:
                even_count[i] += 1
            else:
                odd_count[i] += 1
    
    even_odd = pd.DataFrame({'Even': even_count,'Odd':odd_count})
    even_odd_count = pd.DataFrame(even_odd.value_counts(), columns={'Frequency'})
    even_odd_count.reset_index(inplace=True)
    return even_odd_count

In [9]:
def multipleWins(data,start_range,end_range):
    multi_win = pd.DataFrame(data.loc[:,start_range:end_range])
    multi_win.style
    multi_win_result = pd.DataFrame(multi_win.value_counts())
    multi_win_result.reset_index(inplace=True)
    col_names = list(multi_win_result.columns)
    multi_win_result.rename(columns = {col_names[-1]:'Frequency'}, inplace = True)

    
    if(len(multi_win_result.loc[multi_win_result.Frequency > 1])>0):
        return multi_win_result.loc[multi_win_result.Frequency > 1]
    else:
        return('NO DUPLICATED DRAW')



In [10]:
def differencesNumbers(data,start_range,end_range):
    data = data.loc[:,start_range:end_range]
    width = len(data.columns)
    col_names = list(data.columns)
    diff_list = list()
    for i in range(width-1):
        
        val = data[col_names[i+1]]-data[col_names[i]]
        diff_list.append(val)
    
    diff_list = np.array(diff_list)
    diff_list = diff_list.flatten()
    result = pd.DataFrame(diff_list, columns={'Frequency'})
    result = result[result['Frequency'] > 0]
    return result
    

In [11]:
def redrawingNumbers(data,start_range,end_range):
    data = data.loc[:,start_range:end_range]
    width = len(data.columns)
    col_names = list(data.columns)

    result_list = list()
    for r in range(len(data)):
        for c in range(width):
            number = data.iloc[r,c]

            i=r
            while(i<(len(data)-1)):
                i = i+1

                if(any(number==data.iloc[i,:])):
                    diff = i - r
                    result_list.append(diff)
                    break
                else:
                    continue
                
                    
    return pd.DataFrame(result_list)


In [12]:
def randomStatisticalGenerator(data,start_range,end_range,top_numbers,std_differences,even_odds,number_ticket,numbers,uniqe,save):

    columns_name = np.arange(1,numbers+1)
    df = pd.DataFrame(columns=columns_name)

    diff_max = np.percentile(std_differences_numbers['Frequency of sum'],20)
    diff_max = list(std_differences_numbers.loc[std_differences_numbers['Frequency of sum'] > diff_max]['Sum of numbers'])
    mean_std_numbers = mean_std(data,start_range,end_range) 

    sum_max = mean_std_numbers[0] + mean_std_numbers[1]
    sum_min = mean_std_numbers[0] - mean_std_numbers[1]

    even_odds_threshold = 0.2
    even_odds = even_odds[even_odds['Frequency'].max() * even_odds < even_odds['Frequency']]
            
    freq = top_numbers['Days Ago']
    low_weight = 1
    hight_weight = 1.05
    step = (hight_weight-low_weight) / (max(freq)-min(freq))
    list_weights = list(freq * step +1)
    list_number = list(top_numbers['Number'])

    i = 0
    while(i<number_ticket):
        random_list = list()
        random_list = random.choices(list_number, list_weights, k=numbers)
        lenght = len(np.unique(random_list))
        
        if(lenght==numbers):
            random_list.sort()
            shift_list = list(random_list)
            shift_list.pop(0)
            diff = np.subtract(shift_list, random_list[:-1])
            
            if(all(item in diff_max for item in diff)):
                random_list_sum = sum(random_list)


                if((random_list_sum < sum_max) & (random_list_sum > sum_min)):

                    even_count = 0
                    odd_count = 0
                    for num in random_list:
                        if num % 2 == 0:
                            even_count += 1
                        else:
                            odd_count += 1
                    if(any(numbers_even_odds['Even']==even_count) & any(numbers_even_odds['Odd']==odd_count)):
                        i += 1
                        df.loc[len(df)] = random_list
                        # if(i%(number_ticket*0.2)==0):
                        #     print(i,'/',number_ticket)

    return df

In [13]:
def testGenerated(data,winnig_numbers,start_range,end_range):

    winnig_numbers = winnig_numbers.loc[:,start_range:end_range]
    width = len(winnig_numbers.columns)
    col_names = list(winnig_numbers.columns)
    
    for i in range(width):
        logical = (int(winnig_numbers[col_names[i]]) == data)
        if(i==0):
            result = logical
        else:
            result = result | logical

    
    return list(result.sum(axis=1))

In [50]:
def averagePrize(number,euro_numbers):
    # Average prize all country - Dec. 2022
    data = [[5, 2, 38515339], [5, 1, 860141], [5,0,136513],[4,2,4597],[4,1,253.13],
    [4,0,111.59],[3,2,73.33],[3,1,19.06],[3,0,15.19],[2,2,21.12],[2,1,8.26],[1,2,10.59]]
    
    # win_degree = list(['5+2','5+1','5+0','4+2','4+1','4+0','3+2','3+1','3+0','2+2','2+1','1+2'])
    # numbers_degree = list(11 * [0])

    df = pd.DataFrame(data, columns=['Numbers', 'Euro_numbers', 'Prize'])
    total_win = 0

    generated_numbers = pd.DataFrame()
    generated_numbers['Numbers'] = number
    generated_numbers['Euro_numbers'] = euro_numbers
    num_tickets = pd.DataFrame(generated_numbers.value_counts(ascending=True), columns=['Hits'])
    num_tickets.reset_index(inplace=True)
    # num_tickets.rename(columns = {'index':'Numbers + Euro'})

    for i in range(len(generated_numbers)):
        logical = (generated_numbers.iloc[i] == df)
        win = df['Prize'].loc[(logical.Euro_numbers == True)&(logical.Numbers == True)]
        # print(index((logical.Euro_numbers == True)&(logical.Numbers == True)))
        if(len(win)>0):
            win = float(win)
            total_win = total_win + win

    ticket_cost = len(generated_numbers) * 2

    win_relative = total_win - ticket_cost

    sum_arr = [[int(ticket_cost), int(total_win), int(win_relative)]]
    summary = pd.DataFrame(sum_arr, columns=['Coupon price', 'Total winnings', 'Summary'])

    return summary, num_tickets


In [15]:
def randomGenerator(numbers, max_number, number_tickets):

    columns_name = np.arange(1,numbers+1)
    df = pd.DataFrame(columns=columns_name)

    for i in range(number_tickets):

        random_list = random.sample(range(1, max_number), numbers)
        df.loc[len(df)] = random_list
    return df

# MAIN

In [44]:
data = getData('online')
# data = getData('from_file')

########## RUN THIS SECTION IN TEST MODE ###########
draw_before_test = 3
test_value = data.tail(draw_before_test).head(1)
data.drop(data.tail(draw_before_test).index,inplace=True)
####################################################

freq_numbers = frequencyNumber(data.loc[:,'N1':'N5'])
freq_euro_numbers = frequencyNumber(data.loc[:,'E1':'E2'])
# legend = {"title": "Number Frequency", "x_label": "Number", "y_label": "Drawn Frequency"}
# plotBar(freq_numbers,'Number', 'Drawn Frequency', None, legend)
# legend = {"title": "Euro Number Frequency", "x_label": "Number", "y_label": "Drawn Frequency"}
# plotBar(freq_euro_numbers,'Number', 'Drawn Frequency', None, legend)

top_numbers = topNumbers(data,'N1','N5',freq_numbers,50)
top_euro_numbers = topNumbers(data,'E1','E2',freq_euro_numbers,12)
# display(top_numbers.head(5).style.hide_index())
# display(top_euro_numbers.head(5).style.hide_index())

std_numbers = standardDeviation(data,'N1','N5')
std_euro_numbers = standardDeviation(data,'E1','E2')
# legend = {"title": "", "x_label": "Sum of numbers in the draw", "y_label": "Drawn Frequency"}
# plotBar(std_numbers,'Sum of numbers', 'Frequency of sum', 'Std', legend)
# legend = {"title": "", "x_label": "Sum of euro numbers in the draw", "y_label": "Drawn Frequency"}
# plotBar(std_euro_numbers,'Sum of numbers', 'Frequency of sum', 'Std', legend)

numbers_even_odds = numberEvenOdd(data,'N1','N5')
euro_numbers_even_odds = numberEvenOdd(data,'E1','E2')
# display(numbers_even_odds.style.hide_index())
# display(euro_numbers_even_odds.style.hide_index())

multiple_winning_numbers = multipleWins(data,'N1','N5')
multiple_winning_euro_numbers = multipleWins(data,'E1','E2')
# display(multiple_winning_numbers)
# display(multiple_winning_euro_numbers.head(9).style.hide_index())

differences_numbers = differencesNumbers(data,'N1','N5')
freq_differences_numbers = frequencyNumber(differences_numbers)
std_differences_numbers = standardDeviation(differences_numbers,'Frequency','Frequency')
differences_euro_numbers = differencesNumbers(data,'E1','E2')
freq_differences_euro_numbers = frequencyNumber(differences_euro_numbers)
std_differences_euro_numbers = standardDeviation(differences_euro_numbers,'Frequency','Frequency')
# legend = {"title": "", "x_label": "The difference between numbers", "y_label": "Frequency"}
# plotBar(std_differences_numbers,'Sum of numbers', 'Frequency of sum', None, legend)
# legend = {"title": "", "x_label": "The difference between euro numbers", "y_label": "Frequency"}
# plotBar(std_differences_euro_numbers,'Sum of numbers', 'Frequency of sum', None, legend)

redrawing_numbers = redrawingNumbers(data,'N1','N5')
freq_redrawing_numbers = frequencyNumber(redrawing_numbers)
std_redrawing_numbers = standardDeviation(freq_redrawing_numbers,'Drawn Frequency','Drawn Frequency')
redrawing_euro_numbers = redrawingNumbers(data,'E1','E2')
freq_redrawing_euro_numbers = frequencyNumber(redrawing_euro_numbers)
std_redrawing_euro_numbers = standardDeviation(freq_redrawing_euro_numbers,'Drawn Frequency','Drawn Frequency')
# legend = {"title": "", "x_label": "Number of draws until the number returns ", "y_label": "Frequency"}
# plotBar(freq_redrawing_numbers,'Number', 'Drawn Frequency', None, legend)
# legend = {"title": "", "x_label": "Number of draws until the euro number returns ", "y_label": "Frequency"}
# plotBar(freq_redrawing_euro_numbers,'Number', 'Drawn Frequency', None, legend)


# Generate and test number

In [51]:
numbers_5 = randomStatisticalGenerator(data,'N1','N5',top_numbers,std_differences_numbers,numbers_even_odds,1000,5,0,1)
numbers_2 = randomStatisticalGenerator(data,'E1','E2',top_euro_numbers,std_differences_euro_numbers,euro_numbers_even_odds,1000,2,0,1)

winning_5 = testGenerated(numbers_5,test_value,'N1','N5')
winning_2 = testGenerated(numbers_2,test_value,'E1','E2')

summary, num_tickets = averagePrize(winning_5,winning_2)

display(summary.style.hide_index())
display(num_tickets.style.hide_index())



Automatic reindexing on DataFrame vs Series comparisons is deprecated and will raise ValueError in a future version.  Do `left, right = left.align(right, axis=1, copy=False)` before e.g. `left == right`


Automatic reindexing on DataFrame vs Series comparisons is deprecated and will raise ValueError in a future version.  Do `left, right = left.align(right, axis=1, copy=False)` before e.g. `left == right`



Coupon price,Total winnings,Summary
2000,287,-1712


Numbers,Euro_numbers,Hits
3,1,1
4,0,1
2,1,19
2,0,42
1,1,114
0,1,170
1,0,241
0,0,412


In [18]:
numbers_5 = randomGenerator(5, 50, 1000)
numbers_2 = randomGenerator(2, 12, 1000)

winning_5 = testGenerated(numbers_5,test_value,'N1','N5')
winning_2 = testGenerated(numbers_2,test_value,'E1','E2')

summary, num_tickets = averagePrize(winning_5,winning_2)


Automatic reindexing on DataFrame vs Series comparisons is deprecated and will raise ValueError in a future version.  Do `left, right = left.align(right, axis=1, copy=False)` before e.g. `left == right`

