In [1]:

import itertools
import random
import pandas as pd
from itertools import combinations
from datetime import datetime

import date_utils as dt_mng
from data_manager import DataManager
import analyze

dm = DataManager()

class Prop:
    def __init__(self, name, team, stat, threshold, odds, bet_type):
        self.name = name
        self.team = team
        self.stat = stat
        self.n = threshold
        self.odds = odds
        self.bet_type = bet_type
        self.probability = self.get_prop_probability()
        self.ev, self.house_prob = self.get_ev_and_implied_prob()
        self.print_out = f"""
            PLAYER: {self.name}
              STAT: {self.stat}
            THRESH: {self.n}
              ODDS: {self.odds}
              TYPE: {self.bet_type}
              PROB: {self.probability}
                EV: {self.ev}
        HOUSE_PROB: {self.house_prob}
            """
        self.entry = {
            "PLAYER": self.name,
              "TEAM": self.team,
              "STAT": self.stat,
            "THRESH": self.n,
              "ODDS": self.odds,
              "TYPE": self.bet_type,
              "PROB": self.probability,
                "EV": self.ev,
        "HOUSE_PROB": self.house_prob
        }
    
    def get_prop_probability(self, last_n_games=25):
        player_id = dm.get_player_id(self.name)
        data = dm.get_and_save_player_data(player_id, self.name).sort_values(by='date', ascending=False).head(last_n_games).copy()
        # print(data.head())
        if self.bet_type == "over":
            return analyze.estimate_probability_poisson_over(data, self.stat, self.n)
        elif self.bet_type == "under":
            return analyze.estimate_probability_poisson_under(data, self.stat, self.n)
        else:
            raise ValueError("Invalid bet type. Use 'over' or 'under'.")
        
    def get_ev_and_implied_prob(self):
        odds = self.american_to_decimal(self.odds)
        house_probability = analyze.estimate_implied_probability(odds)
        ev = analyze.calculate_ev(self.probability, odds, 5)
        return ev, house_probability
    
    @staticmethod
    def american_to_decimal(american_odds):
        """Convert American odds to decimal odds."""
        if american_odds > 0:
            return 1 + (american_odds / 100)
        else:
            return 1 + (100 / abs(american_odds))
    

def extract_raw_data(file_path): # .csv
    # gets input from A1
    # Sample input text (use the content of your file here)
    raw_input = pd.read_csv(file_path)
    list_of_raw_input = list(raw_input.iloc[:, 0])
    return list_of_raw_input


def load_available_props():
    
    raw_input = extract_raw_data("prop_lines/prop_lines.csv")
    stat_names = {
             'PointsSGP': "points",
            'AssistsSGP': "assists",
        'Threes MadeSGP': "fg3m",
           'ReboundsSGP': "rebounds",
   'Field Goals MadeSGP': "fgm",
             'StealsSGP': "steals",
             'BlocksSGP': "blocks",
        }
    #debug stat_name_inputs = extract_raw_data("prop_lines/player_prop_categories.csv")
    players = dm.query_players()
    player_names = [player.name for player in players]
    teams = dm.query_teams()
    team_names = [team.nickname for team in teams]
    row_of_interest = 0
    current_category = None
    current_player = None
    current_team = None
    records = []
    for _, item in enumerate(raw_input):
        if item is None: 
            continue
        if item in stat_names:
            current_category = stat_names[item]
        if item != current_team:
            if item in team_names:
                current_team = item
        if item in player_names:
            current_player = item 
            assert current_team
            record = [current_player, current_team, current_category]
            row_of_interest = 6


        if row_of_interest:
            row_of_interest -= 1
            if row_of_interest < 5:
                record.append(item)
                if row_of_interest == 1:
                    records.append(record)
                    record = []
    df = pd.DataFrame.from_records(records, columns=["player_name", "team", "stat", "over_threshold", "over_odds", "under_threshold", "under_odds"])
    
    df['player_name'] = df['player_name'].astype(str)
    df['team'] = df['team'].astype(str)
    df['stat'] = df['stat'].astype(str)
    df['over_threshold'] = df['over_threshold'].str.extract(r'(\d+\.\d+)').astype(float)
    df['under_threshold'] = df['under_threshold'].str.extract(r'(\d+\.\d+)').astype(float)
    df['over_odds'] = df['over_odds'].astype(int)
    df['under_odds'] = df['under_odds'].astype(int)

    return df


def get_analyzed_props(available_props):
    props = []
    for _, row in available_props.iterrows():
        for bet_type in ["over", "under"]:
            prop = Prop(
                    name=row["player_name"], 
                    team=row["team"],
                    stat=row["stat"], 
                threshold=row[f"{bet_type}_threshold"], 
                    odds=row[f"{bet_type}_odds"], 
                bet_type=bet_type
                )
            props.append(prop)    

    return props


def generate_heterogenous_combinations(df, n):

    # Generate all combinations of n rows
    combinations = list(itertools.combinations(df.index, n))

    # Function to evaluate heterogeneity of a combination
    def evaluate_heterogeneity(comb, df):
        comb_list = list(comb)
        players = df.loc[comb_list, 'PLAYER']
        stats = df.loc[comb_list, 'STAT']
        teams = df.loc[comb_list, 'TEAM']
        # Calculate a simple heterogeneity score (you can define your own logic)
        player_score = len(set(players))
        stat_score = len(set(stats))
        team_score = len(set(teams))
        return player_score + stat_score + team_score

    # Evaluate all combinations and sort them by heterogeneity score
    comb_scores = [(comb, evaluate_heterogeneity(comb, df)) for comb in combinations]
    comb_scores_sorted = sorted(comb_scores, key=lambda x: x[1], reverse=True)

    # Select the most heterogeneous combinations (you can define how many you want)
    top_combinations = comb_scores_sorted # Top 5 combinations for example

    # Display the most heterogeneous combinations
    for comb, score in top_combinations:
        print(f"Combination: {comb}, Score: {score}")
        print(df.loc[list(comb)])
        print()

    # Optional: Convert combinations to DataFrame
    top_comb_dfs = [(df.loc[list(comb)], score) for comb, score in top_combinations]

    return top_comb_dfs


def generate_unique_combinations(df, combo_length):
    all_combinations = list(combinations(df.index, combo_length))
    
    unique_combinations = set()
    valid_combinations = []
    
    for combo in all_combinations:
        players_in_combo = df.loc[list(combo), 'Player']
        if players_in_combo.duplicated().sum() == 0:
            sorted_combo = tuple(sorted(df.loc[list(combo)].apply(lambda row: (row['Player'], row['Category'], row['Bet']), axis=1)))
            if sorted_combo not in unique_combinations:
                unique_combinations.add(sorted_combo)
                valid_combinations.append(combo)
    
    return valid_combinations




def distribute_props_into_parlays(props_df, n_parlays):
    """
    Distributes props into a specified number of parlays based on their probabilities.

    :param props_df: pandas DataFrame with columns 'player', 'team', 'threshold', 'odds', 'bet_type', 'probability'
    :param n_parlays: Number of parlays to distribute the props into
    :return: List of parlays, where each parlay is a list of props
    """
    # Initialize empty parlays
    parlays = [[] for _ in range(n_parlays)]

    # Calculate total probability
    total_probability = props_df["PROB"].sum()
    print(total_probability)

    # Distribute props into parlays based on their probabilities
    for index, row in props_df.iterrows():
        # Determine how many parlays to distribute this prop into
        print(row['PROB'])
        print(row["PROB"] / 1)
        num_parlays_for_prop = int(row["PROB"] / total_probability * n_parlays)
        num_parlays_for_prop = max(1, num_parlays_for_prop)

        # Randomly select parlays for this prop
        selected_parlays = random.sample(parlays, num_parlays_for_prop)
        for parlay in selected_parlays:
            parlay.append(row.to_dict())

    return parlays


def remove_rows_by_value(df, column_name, values_to_remove):
    """
    Removes rows from the DataFrame where the value in the specified column is in the values_to_remove list.

    Parameters:
    df (pandas.DataFrame): The input DataFrame.
    column_name (str): The name of the column to check for values to remove.
    values_to_remove (list): A list of values to be removed from the DataFrame.

    Returns:
    pandas.DataFrame: The DataFrame with the specified rows removed.
    """
    # Filter the DataFrame to keep rows where the column value is not in the values_to_remove list
    filtered_df = df[~df[column_name].isin(values_to_remove)]
    return filtered_df


def remove_rows_by_shared_id(df, target_column, values_to_check, id_column):
    """
    Removes all rows from the DataFrame that share the same value in the id_column
    if a value in the target_column is found in the values_to_check list.

    Parameters:
    df (pandas.DataFrame): The input DataFrame.
    target_column (str): The name of the column to check for values.
    values_to_check (list): A list of values to check in the target_column.
    id_column (str): The name of the column containing the IDs to check for shared values.

    Returns:
    pandas.DataFrame: The DataFrame with the specified rows removed.
    """
    # Identify the rows where the target column contains values from the values_to_check list
    rows_to_remove = df[df[target_column].isin(values_to_check)]

    # Get the unique IDs from the id_column for the identified rows
    ids_to_remove = rows_to_remove[id_column].unique()

    # Filter the DataFrame to remove all rows that share the same IDs in the id_column
    filtered_df = df[~df[id_column].isin(ids_to_remove)]
    return filtered_df


def create_study_packet(parlay_study, folder_name):
    players = parlay_study['PLAYER'].unique()
    game_data_players = []
    for player in players:
        player_id = dm.get_player_id(player)
        player_game_data = dm.get_and_save_player_data(player_id)
        game_data_players.append(player_game_data)

    teams = parlay_study['TEAM'].unique()
    game_data_teams = []
    for team in teams:
        team_id = dm.get_team_id(team)
        team_game_data = dm.get_and_save_team_data(team_id)
        game_data_teams.append(team_game_data)
    props = parlay_study.drop_duplicates()[['PROPS', 'THRESH', 'PARLAY_ID', 'PARLAY_EV', 'PARLAY_PROB', 'HOUSE_PARLAY_PROB', 'PROB', 'HOUSE_PROB', 'EV', 'TO_WIN']]
    
    game_data = dict(zip(players, game_data_players))
    for i, team in enumerate(teams):
        game_data[team] = game_data_teams[i]
    dm.save_as_excel_workbook(game_data, folder_name + "/study_packet")
    props.to_csv(folder_name + "/props.csv")


def generate_candidates(n_props, n_parlays, parlay_len, cycles, filter_players, prop_filter):
    available_props = load_available_props()
    analyzed_props = get_analyzed_props(available_props)
    filtered_df = dm.filter_props(analyzed_props, filter_players, n_props)
    parlays = [distribute_props_into_parlays(filtered_df, n_parlays) for n in range(cycles)]
    parlays = [item for sublist in parlays for item in sublist]
    parlays = [parlay for parlay in parlays if len(parlay) == parlay_len]
    parlays = analyze.analyze_parlays(parlays)
    parlays['PROPS'] = parlays['PLAYER'] + "-" + parlays['STAT'] + "-" + parlays['TYPE']
    parlays = remove_rows_by_shared_id(parlays, 'PROPS', prop_filter, 'PARLAY_ID')
    return parlays 


def select_top_parlays(parlays_df, top_n):
    parlays_df = parlays_df.sort_values(by='PARLAY_EV', ascending=False)
    parlay_ids_sorted_by_ev = parlays_df['PARLAY_ID'].unique()
    top_parlay_ids = parlay_ids_sorted_by_ev[:top_n]
    selected_parlays = parlays_df[parlays_df['PARLAY_ID'].isin(top_parlay_ids)]
    return selected_parlays

def generate_parlays(df, min_props, max_props):
    parlays = []
    for r in range(min_props, max_props + 1):
        for combination in itertools.combinations(df.index, r):
            prob_product = 1
            house_prob_product = 1
            odds_product = 1
            ev_sum = 0

            for idx in combination:
                prop_data = df[df.index == idx]
                prob_product *= prop_data['PROB'].values[0]
                odds_product *= analyze.american_to_decimal(prop_data['ODDS'].values[0])
                house_prob_product *= prop_data['HOUSE_PROB'].values[0]
                ev_sum += prop_data['EV'].values[0]
            parlays.append({
                'COMBO': combination,
                'COMBINED_PROB': prob_product,
                'COMBINED_HOUSE_PROB': house_prob_product,
                'COMBINED_EV': ev_sum,
                'TO_WIN': 1 * (odds_product - 1)


            })
    return pd.DataFrame(parlays)

# def select_optimal_parlays(prop_df, max_permeation_rate, min_props, max_props):
#     parlays_df = generate_parlays(prop_df, min_props, max_props)
#     parlays_df = parlays_df.sort_values(by="COMBINED_EV", ascending=False).reset_index(drop=True)
#     parlays_df['PARLAY_ID'] = parlays_df.index
#     num_parlays = int(len(prop_df) / max_permeation_rate)
#     parlays_selected = []
#     prop_counts = {prop: 0 for prop in prop_df.index}

#     for _, parlay in parlays_df.iterrows():
#         can_add_parlay = True
#         seen = {}
#         for idx in parlay['COMBO']:
#             prop_row = prop_df.iloc[idx]
#             player = prop_row['PLAYER']
#             stat = prop_row['STAT']

#             if stat == 'points':
#                 if player in seen and seen[player] == 'fgm':
#                     print('A')
#                     can_add_parlay = False
#                     break
#             if stat == 'fgm':
#                 if player in seen and seen[player] == 'points':
#                     print('B')
#                     can_add_parlay = False
#                     break
#             if prop_counts[idx] >= num_parlays * max_permeation_rate:
#                     can_add_parlay = False
#                     break
#             seen[player] = stat


#         if can_add_parlay:
#             parlays_selected.append(parlay)
#             for idx in parlay['COMBO']:
#                 prop_counts[idx] += 1
#     return parlays_selected


def select_optimal_parlays(prop_df, max_permeation_rate, min_props, max_props):
    parlays_df = generate_parlays(prop_df, min_props, max_props)
    parlays_df = parlays_df.sort_values(by="COMBINED_EV", ascending=False).reset_index(drop=True)
    parlays_df['PARLAY_ID'] = parlays_df.index
    num_parlays = int(len(prop_df) / max_permeation_rate)
    parlays_selected = []
    prop_counts = {prop: 0 for prop in prop_df.index}

    for _, parlay in parlays_df.iterrows():
        can_add_parlay = True
        seen = {}
        for idx in parlay['COMBO']:
            prop_row = prop_df.iloc[idx]
            player = prop_row['PLAYER']
            stat = prop_row['STAT']
            print(f"Checking player: {player}, stat: {stat}")

            if stat == 'points':
                if player in seen and seen[player] == 'fgm':
                    print(f"Conflict detected: Player {player} has both points and fgm props (A).")
                    can_add_parlay = False
                    break
                seen[player] = stat
                print(f"Seen dictionary updated: {seen}")

            elif stat == 'fgm':
                if player in seen and seen[player] == 'points':
                    print(f"Conflict detected: Player {player} has both points and fgm props (B).")
                    can_add_parlay = False
                    break
                seen[player] = stat
                print(f"Seen dictionary updated: {seen}")

            if prop_counts[idx] >= num_parlays * max_permeation_rate:
                print(f"Prop {idx} has reached the maximum selection limit.")
                can_add_parlay = False
                break



        if can_add_parlay:
            parlays_selected.append(parlay)
            for idx in parlay['COMBO']:
                prop_counts[idx] += 1
            print(f"Parlay {parlay['PARLAY_ID']} added.")
        else:
            print(f"Parlay {parlay['PARLAY_ID']} rejected.")

    return parlays_selected


def get_selected_parlays(n_props, max_permeation_rate, min_props_in_parlay, max_props_in_parlay, filter_players):
    available_props = load_available_props()
    analyzed_props = get_analyzed_props(available_props)
    selected_props = dm.filter_props(analyzed_props, filter_players, n_props).reset_index(drop=True)
    selected_parlays = select_optimal_parlays(selected_props, max_permeation_rate, min_props_in_parlay, max_props_in_parlay)
    selected_parlay_dfs = []
    for parlay in selected_parlays:
        parlay_rows = []
        for i in parlay['COMBO']:
            prop = selected_props.iloc[i]
            parlay_rows.append(prop)
        parlay_df = pd.concat(parlay_rows, axis=1, ignore_index=True).T
        parlay_df['PARLAY_EV'] = parlay['COMBINED_EV']
        parlay_df['PARLAY_PROB'] = parlay['COMBINED_PROB']
        parlay_df['PARLAY_HOUSE_PROB'] = parlay['COMBINED_HOUSE_PROB']
        parlay_df['PARLAY_EV'] = parlay['COMBINED_EV']
        parlay_df['PARLAY_ID'] = parlay['PARLAY_ID']
        parlay_df['TO_WIN'] = parlay['TO_WIN']
        selected_parlay_dfs.append(parlay_df)
    selected_parlay_df = pd.concat(selected_parlay_dfs, axis=0, ignore_index=True)
    selected_parlay_df['PROP_TAG'] = selected_parlay_df['PLAYER'] + "_" + selected_parlay_df['STAT'] + "_" + selected_parlay_df['TYPE']
    return selected_parlay_df


def get_prop_distribution(parlays):
    appearance_counts = parlays['PROP_TAG'].value_counts()
    parlay_count = len(parlays['PARLAY_ID'].unique())
    percentages = pd.Series(appearance_counts/parlay_count)
    parlay_distribution = pd.concat([appearance_counts, percentages], axis=1)
    parlay_distribution.columns = ['COUNT', '%']
    return parlay_distribution



In [2]:
filter_players = []
n_props = 16
max_permeation_rate = 0.4
min_props_in_parlay = 3
max_props_in_parlay = 6

selected_parlays = get_selected_parlays(n_props, max_permeation_rate, min_props_in_parlay, max_props_in_parlay, filter_players)
selected_parlays = selected_parlays[selected_parlays['PARLAY_EV'] > 5]
distribution = get_prop_distribution(selected_parlays)



162
num of profitable props: 61
              PLAYER       TEAM    STAT  THRESH  ODDS  TYPE      PROB  \
0  Derrick Jones Jr.  Mavericks  points     8.5  -115  over  0.549606   
1        Luka Doncic  Mavericks  points    28.5  -110  over  0.530599   
2   Dereck Lively II  Mavericks  points     7.5   100  over  0.552609   
3     Daniel Gafford  Mavericks  points     8.5   100  over  0.671666   
4       Kyrie Irving  Mavericks  points    21.5  -130  over  0.674553   

         EV  HOUSE_PROB  
0  0.137619    0.534884  
1  0.064813    0.523810  
2  0.526086    0.500000  
3  1.716662    0.500000  
4  0.967197    0.565217  
61
              PLAYER       TEAM    STAT  THRESH  ODDS  TYPE      PROB  \
0  Derrick Jones Jr.  Mavericks  points     8.5  -115  over  0.549606   
1        Luka Doncic  Mavericks  points    28.5  -110  over  0.530599   
2   Dereck Lively II  Mavericks  points     7.5   100  over  0.552609   
3     Daniel Gafford  Mavericks  points     8.5   100  over  0.671666   
4    

In [3]:
today_dir = dt_mng.get_or_create_directory_in_days()
folder_name = dm.create_directory(f"{today_dir}/parlays/{datetime.now().strftime('%Y%m%d%H%M%S')}_study")
selected_parlays.to_csv(f'{folder_name}/parlays.csv')

Directory already exists: e:\coding_projects\nba_01\days
Directory already exists: e:\coding_projects\nba_01\days\2024-05-22
Directory 'e:\coding_projects\nba_01\days\2024-05-22/parlays/20240522142449_study' created successfully.


In [4]:
print(selected_parlays.columns)

Index(['PLAYER', 'TEAM', 'STAT', 'THRESH', 'ODDS', 'TYPE', 'PROB', 'EV',
       'HOUSE_PROB', 'PARLAY_EV', 'PARLAY_PROB', 'PARLAY_HOUSE_PROB',
       'PARLAY_ID', 'TO_WIN', 'PROP_TAG'],
      dtype='object')


In [5]:
players = selected_parlays['PLAYER'].unique()
game_data_players = []
for player in players:
    player_id = dm.get_player_id(player)
    player_game_data = dm.get_and_save_player_data(player_id)
    game_data_players.append(player_game_data)

teams = selected_parlays['TEAM'].unique()
game_data_teams = []
for team in teams:
    team_id = dm.get_team_id(team)
    team_game_data = dm.get_and_save_team_data(team_id)
    game_data_teams.append(team_game_data)

props = selected_parlays.drop_duplicates()[['PROP_TAG', 'THRESH', 'PARLAY_ID', 'PARLAY_EV', 'PARLAY_PROB', 'PARLAY_HOUSE_PROB', 'PROB', 'HOUSE_PROB', 'EV', 'TO_WIN']]
game_data = dict(zip(players, game_data_players))
for i, team in enumerate(teams):
    game_data[team] = game_data_teams[i]
dm.save_as_excel_workbook(game_data, folder_name + "/study_packet")
props.to_csv(folder_name + "/props.csv")

Naz Reid    player_name player_position  minutes  points  rebounds  assists    efg  \
89    Naz Reid             C-F     21.0      11         4        1  0.643   
90    Naz Reid             C-F     22.0      10        11        1  0.333   
91    Naz Reid             C-F     19.0       7         3        0  0.375   
6     Naz Reid             C-F     18.0      11         4        0  0.917   
0     Naz Reid             C-F     20.0       7         1        1  0.357   
..         ...             ...      ...     ...       ...      ...    ...   
52    Naz Reid             C-F     22.0      16         5        1  0.722   
74    Naz Reid             C-F     19.0      16         5        1  0.722   
62    Naz Reid             C-F     23.0      13         1        0  0.542   
16    Naz Reid             C-F     28.0      25         8        1  0.857   
48    Naz Reid             C-F     23.0      10         4        1  0.375   

    fg3a  fg3m  fg3_pct  fga  fgm  fta  ft_pct  steals  blocks    