In [1]:
# Imports
# Standard Libraries of Python
import itertools
from collections import Counter
from typing import Tuple, Union
from decimal import Decimal, ROUND_HALF_UP, getcontext
from datetime import datetime
getcontext().prec = 5
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score

# Dependencies
import pandas as pd
import numpy as np
from pandas import DataFrame, Index

# Libraries made for this Proyect
from backend.src.parse import Criteria
from backend.src import pick_numbers
file_route = 'database/files/'

In [None]:
def clean_df_skips(df: DataFrame,columns_id,name) -> DataFrame:
    df.columns = columns_id
    df.columns.name = name
    df.index.name = 'Draws'
    return df

def combination_df(database,low_high_counts,odd_even_counts):
    COMBINATIONS = [(3,2), (2,3), (1,4), (4,1), (0,5), (5,0)]
    draws = set(range(0,len(database)))
    columns_id = ['3/2', '2/3', '1/4', '4/1', '0/5', '5/0']
    
    low_high = {}
    odd_even = {}
    for i in draws:
        counts_l_h = {}
        counts_o_e = {}
        for combination in COMBINATIONS:
            count_l_h = sum([1 for j in range(i-9,i+1) if combination[0] == low_high_counts[j][0] and combination[1] == low_high_counts[j][1]])
            counts_l_h[combination] = count_l_h
            count_o_e = sum([1 for j in range(i-9,i+1) if combination[0] == odd_even_counts[j][0] and combination[1] == odd_even_counts[j][1]])
            counts_o_e[combination] = count_o_e
        low_high[i] = counts_l_h
        odd_even[i] = counts_o_e
    
    low_high = clean_df_skips(pd.DataFrame.from_dict(low_high, orient='index'), columns_id, 'L/H')
    odd_even = clean_df_skips(pd.DataFrame.from_dict(odd_even, orient='index'), columns_id, 'O/E')
    return low_high, odd_even

def count_100_combinations(df, columns, combinations, name):
    count_dic = {i: {key: 0 for key in combinations} for i in range(1, len(df) - 99)}
    columns_id = ['3/2', '2/3', '1/4', '4/1', '0/5', '5/0']
    for i, _ in enumerate(range(1, len(df) - 99)):
        df_slice = df.iloc[i:i+100]
        counts = [df_slice[(df_slice[columns[0]] == combination[0]) & (df_slice[columns[1]] == combination[1])][columns[0]].count() for combination in combinations]
        count_dic[i+1] = dict(zip(combinations, counts))
    df = clean_df_skips(pd.DataFrame.from_dict(count_dic, orient='index'), columns_id, name)
    return df

In [None]:
# Test of Tomorrow Numbers
euromillions = Criteria()

euromillions.define_odd_even()
euromillions.define_low_high()
euromillions.define_combinations_skips()
euromillions.groups_info()
euromillions.apply_transformation()
euromillions.count_skips()
euromillions.skips_for_last_12_draws()
euromillions.get_natural_rotations()
euromillions.get_numbers_clasification()
euromillions.draw_skips()
euromillions.skips_evaluation()

euromillions.year_criterion()
euromillions.rotation_criterion()
euromillions.position_criterion()
euromillions.group_criterion()
euromillions.numbers_of_tomorrow()

# lotto = pick_numbers.Selection(euromillions)
# lotto.first_number()
# lotto.suggested_numbers()

# tickets = itertools.combinations(lotto._selected_numbers,5)

# ticket = []
# for combination in tickets:
#     ticket.append(combination)
#     print(combination)

In [None]:
euromillions.combinations_skips

In [7]:
odd_even_count = pd.DataFrame(columns=['odd/even','low/high'])
odd_even_count.loc['3/2','odd/even'] = (euromillions.df.iloc[-10:, 9] == 3).sum()
odd_even_count.loc['3/2','low/high'] = 5 - odd_even_count.loc['3/2','odd/even']

In [10]:
def determine_distribution():
    odd_even_count = pd.DataFrame(columns=['odd/even/','low/high'])
    cases = [3,2,1,4,0]

    for n in cases:
        odd_count = (euromillions.df.iloc[-10:, 9] == n).sum()
        low_count = (euromillions.df.iloc[-10:, 11] == n).sum()
        row_name = f'{n}/{5-n}'
        
        odd_even_count.loc[row_name] = [odd_count, low_count]
    
    return odd_even_count

In [None]:
combinations = [(1, 4), (2, 3), (3, 2), (4, 1), (5, 0), (0, 5)]
# Crear variables predictoras (X) y la variable objetivo (y)
X = euromillions.combinations_skips[['low', 'high'] + [f'days_since_{comb[0]}_{comb[1]}' for comb in combinations]]  # Incluimos las columnas de días
y = euromillions.combinations_skips['odd']  # Variable objetivo (puedes cambiar a otra variable si lo prefieres)

# Dividir en conjunto de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Crear y entrenar el modelo
model = RandomForestClassifier(
    n_estimators=100,
    max_depth=10,
    min_samples_split=5,
    min_samples_leaf=2,
    random_state=42
)
model.fit(X_train, y_train)

# Evaluar el modelo con validación cruzada
cross_val_scores = cross_val_score(model, X, y, cv=5)
print(f'Cross-validated accuracy: {cross_val_scores.mean()}')

# Hacer predicciones
y_pred = model.predict(X_test)

# Evaluación
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

In [None]:
count = 0
for draw in range(1000,len(euromillions.df)-1):
    # Obtener el último sorteo
    last_draw = euromillions.combinations_skips.iloc[draw]

    # Crear las características de entrada para la predicción
    last_features = [
        last_draw['low'],              # Característica low
        last_draw['high'],             # Característica high
        last_draw['days_since_1_4'],   # Días desde que salió la combinación (1, 4)
        last_draw['days_since_2_3'],   # Días desde que salió la combinación (2, 3)
        last_draw['days_since_3_2'],   # Días desde que salió la combinación (3, 2)
        last_draw['days_since_4_1'],   # Días desde que salió la combinación (4, 1)
        last_draw['days_since_5_0'],   # Días desde que salió la combinación (5, 0)
        last_draw['days_since_0_5']    # Días desde que salió la combinación (0, 5)
    ]

    # Convertir las características en un DataFrame o un numpy array
    last_features = np.array(last_features).reshape(1, -1)  # Reshape para tener un solo conjunto de datos

    # Realizar la predicción
    y_pred_next = model.predict(last_features)

    # Imprimir el resultado
    next = draw + 1
    v = euromillions.combinations_skips.iloc[next,3]
    if y_pred_next == v:
        print(y_pred_next, v, next)
        count += 1

print(count)

In [None]:
y_pred

In [None]:
n_predictions = len(y_test)
random_predictions = np.random.choice([0, 1, 2, 3, 4, 5], size=n_predictions, replace=True)
random_accuracy = accuracy_score(y_test, random_predictions)
random_accuracy

In [None]:
# Test of Tomorrow Stars

euromillions = Criteria(is_star=True)

euromillions.apply_transformation(is_star=True)
euromillions.count_skips(is_star=True)
euromillions.get_natural_rotations(is_star=True)
print(euromillions.year_history)

print(euromillions.aprox_rotation)
print(euromillions.exact_rotation)

In [None]:
euromillions.counts.max()

In [None]:
euromillions.df.loc[euromillions.df['dates'] == datetime(2016,9,27)]

In [70]:
def find_group(value: int) -> str | None:
    stars_group = {
    'A': [1,3,5],
    'B': [2,4,6],
    'C': [7,9,11],
    'D': [8,10,12]
}
    
    for k, v in stars_group.items():
        if value in v:
            return k
    
    return None

euromillions.df_stars['group_0'] = euromillions.df_stars['star_1'].apply(find_group) 
euromillions.df_stars['group_1'] = euromillions.df_stars['star_2'].apply(find_group)

In [None]:
euromillions.df_stars

In [107]:
euromillions.df_stars['combined_groups'] = euromillions.df_stars[['group_0','group_1']].apply(lambda row: [value for value in row if isinstance(value, str)], axis = 1)

In [94]:
test = itertools.combinations_with_replacement(['A','B','C','D'],2)

In [None]:
euromillions.df_stars['combined_groups'].value_counts()

In [154]:
star = euromillions.df_stars.iloc[:,[1,2,3,4,5,6]]
# Definir las combinaciones posibles (low, high)
combinations = [('B','D'), ('B','C'), ('A','C'), ('A','D'), ('A','B'), ('C','D'), ('B','A'), ('A','A'), ('B','B'), ('D','C'), ('C','C'), ('D','D')]

# Inicializamos las nuevas columnas para almacenar los días desde la última aparición
for comb in combinations:
    col_name = f'days_since_{comb[0]}_{comb[1]}'
    star[col_name] = None

# Inicializamos el contador de días
days_since = {comb: 0 for comb in combinations}

# Iteramos sobre las filas para calcular los días desde la última aparición de cada combinación
for i in range(863,len(euromillions.df)):
    for comb in combinations:
        col_name = f'days_since_{comb[0]}_{comb[1]}'
        
        if star.loc[i, 'combined_groups'] == list(comb):
            # Si la combinación actual coincide, reiniciamos el contador a 0
            days_since[comb] = 0
        else:
            # Si no coincide, incrementamos el contador de días
            days_since[comb] += 1
        
        # Asignamos el valor del contador de días en la nueva columna
        star.loc[i, col_name] = days_since[comb]

In [None]:
star