In [1]:
# Libraries
import sys
sys.path.append('C:\Proyectos\Loteria\DataBase\Scrapping')
from scrapping import get_data
import pandas as pd
import numpy as np
from collections import Counter

In [2]:
# Functions
def count_skips(df, list_numbers):
    counts = {key: 0 for key in list_numbers}
    for columns in df:
        contador = 0
        for i in reversed(df[columns]):
            if not i:
                contador += 1
            else:
                counts[columns] = contador
                break
    return counts

def max_output(df):
    df = df.T
    max_numbers = df.sort_values(by = df.columns[0], ascending = False)
    return max_numbers

def year_hits(database, numbers, numbers_quantity):
    db_year = database['Dates'].dt.year
    year_history = pd.DataFrame(columns = ['Year', 'Number', 'Count'])
    for year in range(db_year.min(), db_year.max() + 1):
        for number in range(1, numbers_quantity + 1):
            filtered = numbers[(db_year == year) & (numbers == number).any(axis = 1)]
            count = filtered.eq(number).sum().sum()
            year_history = pd.concat([year_history, pd.DataFrame({'Year': year, 'Number': number, 'Count': count}, index = [0])], ignore_index = True)
    return year_history

In [3]:
# Load the data base and obtain the first DataFrame
db = pd.read_parquet('C:\Proyectos\Loteria\DataBase\db.parquet')
winning_numbers = db.iloc[:, 2:7]
winning_stars = db.iloc[:, 7:9]

# Create a template DataFrame with all values set to False
skip_winners_bool = pd.DataFrame(False, columns = [str(i) for i in range(1, 51)], index = range(len(winning_numbers)))

# Fill in the True values
for e in range(1, 6):
    for i in range(1, 51):
        skip_winners_bool[f"{i}"] |= (winning_numbers[f"Nro{e}"] == i)

# Add an extra row, in order to compare the number that did not appear for the first time in the game history
d_0 = pd.DataFrame(columns = [str(i) for i in range(1, 51)], index = [0]).fillna(True)

# Create the final DataFrame
skip_winners = pd.concat([d_0, skip_winners_bool]).reset_index(drop = True)
del d_0
del skip_winners_bool

# Hits of numbers
numbers_hits = year_hits(db, winning_numbers, 50).pivot_table(index = 'Year', columns = 'Number', values = 'Count', fill_value = 0).sum().reset_index()
numbers_hits = numbers_hits.rename(columns = {'Number': 'Numbers', 'Count': 'Hits'}).set_index('Numbers').T
numbers_hits = numbers_hits.iloc[0].rename('Hits').to_frame()
numbers_hits.index.name = None
numbers_hits = numbers_hits.T

# Hits of stars since draw 940, this is because of the change of rules. The star 12 was added in September 24th 2016
stars_filtered = db[(db['Sorteos'] > 940)].iloc[:, [0, 7, 8]]
stars_filtered_hits = year_hits(stars_filtered, stars_filtered, 12).pivot_table(index = 'Year', columns = 'Number', values = 'Count', fill_value=0).sum().reset_index()
stars_filtered_hits = stars_filtered_hits.rename(columns = {'Stars': 'Number', 'Count': 'Hits'}).set_index('Number').T
stars_filtered_hits = stars_filtered_hits.iloc[0].rename('Hits').to_frame()
stars_filtered_hits.index.name = None
stars_filtered_hits = stars_filtered_hits.T

# Year History for numbers and stars
numbers_year_history = year_hits(db, winning_numbers, 50)
stars_year_history = year_hits(db, winning_stars, 12)
numbers_year_history = numbers_year_history.pivot_table(index = 'Year', columns = 'Number', values = 'Count', fill_value = 0)
stars_year_history = stars_year_history.pivot_table(index = 'Year', columns = 'Number', values = 'Count', fill_value = 0)

# Average of number
numbers_average = numbers_hits.apply(lambda hits: hits/len(db)/5)
numbers_average = numbers_average.iloc[0].rename('Average').to_frame()
numbers_average.index.name = None
average_numbers = numbers_average['Average'].sum() / 50 + 0.001

# Average of stars, since draw 940, because of the change of the rules with the inclusion of star number 12
stars_average = stars_filtered_hits.apply(lambda hits: hits/len(stars_filtered)/2)
stars_average = stars_average.iloc[0].rename('Average').to_frame()
stars_average.index.name = None
average_stars = stars_average['Average'].sum() / 12 + 0.001

In [None]:
# It creates the list of draws, numbers and the dictionary to obtain the amount of skips per number if wins and looses.
draws = list(np.arange(1, len(skip_winners_bool)))
numbers = [str(i) for i in range(1, 51)]
dicts = {draw: {key:[] for key in numbers} for draw in draws}

for e in draws:
    df = skip_winners.loc[:e]
    counts = count_skips(df, numbers)
    dicts[e].update(counts)

skip_numbers = pd.DataFrame(dicts).T

In [None]:
# Order the last draw for skips:
last_draw = sorted(dicts[len(skip_numbers)].items(), key = lambda x: x[1])
last_draw = pd.DataFrame({'Numero': [x[0] for x in last_draw], 'Skips': [int(x[1]) for x in last_draw]})

# Select the last 12 draws
last_12_draws = np.arange(len(skip_numbers) - 12, len(skip_numbers) + 1)
sk_12 = skip_numbers.loc[last_12_draws]

# This establish the skips of the last 12 draws
skips = np.arange(0, 19)
aus_12 = [sk_12.loc[i - 1, str(column)] for i in last_12_draws[1:13] for column in sk_12 if sk_12.loc[i, str(column)] == 0]
counter_7 = Counter(aus_12[25:60])
counter_12 = Counter(aus_12)
last_7 = [counter_7.get(i, 0) for i in skips]
last_12 = [counter_12.get(i, 0) for i in skips]
skips_7_12 = pd.DataFrame({'7': last_7, '12': last_12})

In [None]:
groups = [list(range(i, i + 10)) for i in range(1, 51, 10)]
group_names = [tuple(range(i, i + 10)) for i in range(1, 51, 10)]
groups_of_winners = db.iloc[:, 2:7]
results = {i: {group_name: sum([1 for num in row if num in group]) for group_name, group in zip(group_names, groups)} for i, row in groups_of_winners.iterrows()}
results_df = pd.DataFrame.from_dict(results, orient = 'index')
results_df.columns = ['{}_to_{}'.format(i, i + 9) for i in range(1, 51, 10)]
sg_10 = results_df.iloc[-10:]
sg_5 = results_df.iloc[-5:]
groups_df = pd.DataFrame({'10_games': (sg_10 > 0).sum(), '5_games': (sg_5 > 0).sum()}).T

In [None]:
db[db['Sorteos'] == 940]

In [None]:
last_draw

In [None]:
skips_7_12

In [None]:
results_df

In [None]:
groups_df

In [None]:
numbers_year_history

In [None]:
stars_year_history