In [10]:
# Standard Libraries of Python
from collections import Counter
from decimal import Decimal, ROUND_HALF_UP, getcontext
getcontext().prec = 5

# Dependencies
import pandas as pd
import numpy as np
np.set_printoptions(precision=5)

# Libraries made for this Proyect
from database.clean_database import database

In [11]:
# Main Object - Super Class
class Analysis:
    def __init__(self):
        self.df = pd.read_parquet('database/db.parquet')
        self.dates_construct = self.df.copy()
        self.dates_construct['dates'] = self.dates_construct['dates'].dt.year
        self.dates_construct = self.dates_construct.drop(
            columns=['draw','star_1','star_2']
            )
        self.numbers = range(1,51)
    
    def __transformation_into_columns(self,row):
        for draw in range(1,6):
            if not np.isnan(self.year_history.loc[row.dates,row[f'nro{draw}']]):
                self.year_history.loc[row.dates,row[f'nro{draw}']] += 1
            else:
                self.year_history.loc[row.dates,row[f'nro{draw}']] = 1

    def apply_transformation(self):
        self.year_history = pd.DataFrame(columns=[i for i in self.numbers],
            index=np.arange(self.dates_construct['dates'].iloc[0],
            self.dates_construct['dates'].iloc[-1]+1))
        self.dates_construct.apply(self.__transformation_into_columns,axis=1)
        self.year_history.fillna(0,inplace=True)

        self.hits = self.year_history.sum().to_frame().rename(columns={0: 'hits'}).T.astype('int32')
        self.mean = self.year_history.mean().to_frame().rename(columns={0: 'mean'}).T.astype('float32')
        self.median = self.year_history.median().to_frame().rename(columns={0: 'median'}).T.astype('float32')
    
    def __numbers_boolean(self):
        self.booleans_df = pd.DataFrame(False,columns=[int(i) for i in self.numbers],
            index=range(1,len(self.df)+1))
        for e in range(1,6):
            col_name = f"nro{e}"
            self.booleans_df = self.booleans_df | (self.df[col_name].to_numpy()
                [:, None] == self.numbers)

    def count_skips(self):
        self.__numbers_boolean()
        mask = self.booleans_df == 0
        reset_mask = self.booleans_df == 1
        cumulative_sum = np.cumsum(mask)
        cumulative_sum[reset_mask] = 0
        result = np.where(self.booleans_df == 0, 1, cumulative_sum)
        result = pd.DataFrame(result)
        df_t = result != 0
        self.counts = df_t.cumsum()-df_t.cumsum().where(~df_t).ffill().fillna(0).astype(int)

    def __total_average_hits(self,is_star=False,aprox=False):
        divide = 2 if is_star else 5
        self.average = self.hits.apply(lambda hits: hits / len(self.df) / divide).iloc[0]
        if aprox:
            return Decimal(self.average.sum()) / Decimal(int(50)) + Decimal(0.001)
        else:
            return Decimal(self.average.sum()) / Decimal(int(50))

    def m_hits(self,is_star=False,aprox=False):
        min_hits = self.__total_average_hits(is_star,aprox)
        return min_hits * Decimal(int(self.hits.iloc[0,0])) / Decimal(float(self.average.iat[0]))

    def __natural_rotation(self,is_star=False,aprox=False):
        self.__total_average_hits(is_star,aprox)

        rotation = pd.DataFrame(
            {'hits': self.hits.iloc[0],
            'average_of_numbers': self.average,
            'total_average': self.__total_average_hits(is_star,aprox),
            'minimal_hits_needed': self.m_hits(is_star,aprox)},
            index=self.numbers)
        
        rotation['difference'] = rotation['hits'] - rotation['minimal_hits_needed']
        return rotation

    def get_natural_rotations(self,is_star=False):
        self.exact_rotation = self.__natural_rotation(is_star,aprox=False)
        self.aprox_rotation = self.__natural_rotation(is_star,aprox=True)
    
    def numbers_clasification(self):
        self.best_numbers = self.aprox_rotation.loc[
            self.aprox_rotation['hits'] > self.aprox_rotation['minimal_hits_needed']
            ].index.to_numpy()
        self.normal_numbers = self.exact_rotation.loc[
            (self.exact_rotation['hits'] > self.exact_rotation['minimal_hits_needed'])
            & ~(self.exact_rotation.index.isin(self.best_numbers))
            ].index.to_numpy()
        self.category = {number: 0 for number in self.numbers}
        for number in self.best_numbers:
            self.category[number] = 2
        for number in self.normal_numbers:
            self.category[number] = 1

# Sub class
class Criteria(Analysis):
    def __init__(self):
        super().__init__()
        self.skips = range(0,19)
    
    def __skips_last_draws(self):
        # Order the last draw for skips:
        last_draw = self.counts.iloc[-1].transpose().sort_values(by=0)
        self.last_draw = last_draw.rename(columns={'index': 'number', 0: 'skips'})
        
        # Select the last 12 draws
        if len(self.counts) - 12 == 0:
            last_12_draws = range(1,len(self.counts))
        else:
            last_12_draws = range(len(self.counts) - 11, len(self.counts))
        sk_12 = self.counts.loc[last_12_draws]

        # This establish the skips of the last 12 draws
        aus_12 = [sk_12.loc[i,int(column)] 
                for i in last_12_draws[0:11]
                for column in sk_12 if sk_12.loc[i,int(column)] == 0
                ]
        
        counter_7 = Counter(aus_12[25:60])
        counter_12 = Counter(aus_12)
        last_7 = [counter_7.get(i,0) for i in self.skips]
        last_12 = [counter_12.get(i,0) for i in self.skips]

        self.skips_7_12 = pd.DataFrame(
            {'7': last_7,
             '12': last_12}
             )
    
    def year_criterion(self):
        current_year = self.dates_construct.iloc[-1]
        year_criteria = {key: [] for key in self.numbers}

        for number in self.numbers:
            x = self.year_history.at[current_year,number]
            median_half = self.median.iat['median',number] / 2

            if self.mean.iat['average',number] != 0 and not np.isnan(
                self.mean.iat['average',number]):
                if x == 0 or x <= median_half:
                    y = round((1 - ((self.median.iat['median',number] * 100) / 
                            self.mean.iat['average',number]) / 100),2)
                else:
                    x_percentage = round(
                        (x * 100 / self.mean.iat['average',number]) / 100,2)
                    y = round((1 - x_percentage if x_percentage > 1 
                            else 1 - x_percentage),2)
            else:
                y = 0.50

            year_criteria[number] = float(y)

        self.year_criteria = pd.DataFrame.from_dict(
            year_criteria,
            orient='index',
            columns=['year_criteria']
            )

In [12]:
euromillions = Analysis()
euromillions.apply_transformation()
euromillions.get_natural_rotations()
euromillions.count_skips()
euromillions.numbers_clasification()

In [None]:
numbers_choice = Criteria()