In [26]:
import os
import itertools
import random
import pandas as pd
from itertools import combinations
from datetime import datetime

import date_utils as dt_mng
from data_manager import DataManager
import analyze

dm = DataManager()

class Prop:
    def __init__(self, name, team, stat, threshold, odds, bet_type):
        self.name = name
        self.team = team
        self.stat = stat
        self.n = threshold
        self.odds = odds
        self.bet_type = bet_type
        self.probability = self.get_prop_probability()
        self.ev, self.house_prob = self.get_ev_and_implied_prob()
        self.print_out = f"""
            PLAYER: {self.name}
              STAT: {self.stat}
            THRESH: {self.n}
              ODDS: {self.odds}
              TYPE: {self.bet_type}
              PROB: {self.probability}
                EV: {self.ev}
        HOUSE_PROB: {self.house_prob}
            """
        self.entry = {
            "PLAYER": self.name,
              "TEAM": self.team,
              "STAT": self.stat,
            "THRESH": self.n,
              "ODDS": self.odds,
              "TYPE": self.bet_type,
              "PROB": self.probability,
                "EV": self.ev,
        "HOUSE_PROB": self.house_prob
        }
    
    def get_prop_probability(self, last_n_games=3):
        player_id = dm.get_player_id(self.name)
        data = dm.get_and_save_player_data(player_id, self.name).sort_values(by='date', ascending=False).head(last_n_games).copy()
        # print(data.head())
        if self.bet_type == "over":
            return analyze.estimate_probability_poisson_over(data, self.stat, self.n)
        elif self.bet_type == "under":
            return analyze.estimate_probability_poisson_under(data, self.stat, self.n)
        else:
            raise ValueError("Invalid bet type. Use 'over' or 'under'.")
        
    def get_ev_and_implied_prob(self):
        odds = self.american_to_decimal(self.odds)
        house_probability = analyze.estimate_implied_probability(odds)
        ev = analyze.calculate_ev(self.probability, odds, 5)
        return ev, house_probability
    
    @staticmethod
    def american_to_decimal(american_odds):
        """Convert American odds to decimal odds."""
        if american_odds > 0:
            return 1 + (american_odds / 100)
        else:
            return 1 + (100 / abs(american_odds))
    

def extract_raw_data(file_path): # .csv
    # gets input from A1
    # Sample input text (use the content of your file here)
    raw_input = pd.read_csv(file_path)
    list_of_raw_input = list(raw_input.iloc[:, 0])
    return list_of_raw_input


def load_available_props(file_path):
    
    raw_input = extract_raw_data(file_path)
    stat_names = {
             'PointsSGP': "points",
            'AssistsSGP': "assists",
        'Threes MadeSGP': "fg3m",
           'ReboundsSGP': "rebounds",
   'Field Goals MadeSGP': "fgm",
             'StealsSGP': "steals",
             'BlocksSGP': "blocks",
        }
    #debug stat_name_inputs = extract_raw_data("prop_lines/player_prop_categories.csv")
    players = dm.query_players()
    player_names = [player.name for player in players]
    teams = dm.query_teams()
    team_names = [team.nickname for team in teams]
    row_of_interest = 0
    current_category = None
    current_player = None
    current_team = None
    records = []
    for _, item in enumerate(raw_input):
        if item is None: 
            continue
        if item in stat_names:
            current_category = stat_names[item]
        if item != current_team:
            if item in team_names:
                current_team = item
        if item in player_names:
            current_player = item 
            assert current_team
            record = [current_player, current_team, current_category]
            row_of_interest = 6


        if row_of_interest:
            row_of_interest -= 1
            if row_of_interest < 5:
                record.append(item)
                if row_of_interest == 1:
                    records.append(record)
                    record = []
    df = pd.DataFrame.from_records(records, columns=["player_name", "team", "stat", "over_threshold", "over_odds", "under_threshold", "under_odds"])
    
    df['player_name'] = df['player_name'].astype(str)
    df['team'] = df['team'].astype(str)
    df['stat'] = df['stat'].astype(str)
    df['over_threshold'] = df['over_threshold'].str.extract(r'(\d+\.\d+)').astype(float)
    df['under_threshold'] = df['under_threshold'].str.extract(r'(\d+\.\d+)').astype(float)
    df['over_odds'] = df['over_odds'].astype(int)
    df['under_odds'] = df['under_odds'].astype(int)

    return df


def process_all_csv_files_in_directory(directory):
    combined_records = []
    
    for filename in os.listdir(directory):
        if filename.endswith(".csv"):
            file_path = os.path.join(directory, filename)
            print(file_path)
            df = load_available_props(file_path)
            combined_records.append(df)

    # Combine all dataframes into one if desired
    final_df = pd.concat(combined_records, ignore_index=True)
    return final_df


def get_analyzed_props(available_props):
    props = []
    for _, row in available_props.iterrows():
        for bet_type in ["over", "under"]:
            prop = Prop(
                    name=row["player_name"], 
                    team=row["team"],
                    stat=row["stat"], 
                threshold=row[f"{bet_type}_threshold"], 
                    odds=row[f"{bet_type}_odds"], 
                bet_type=bet_type
                )
            props.append(prop)    

    return props


def generate_heterogenous_combinations(df, n):

    # Generate all combinations of n rows
    combinations = list(itertools.combinations(df.index, n))

    # Function to evaluate heterogeneity of a combination
    def evaluate_heterogeneity(comb, df):
        comb_list = list(comb)
        players = df.loc[comb_list, 'PLAYER']
        stats = df.loc[comb_list, 'STAT']
        teams = df.loc[comb_list, 'TEAM']
        # Calculate a simple heterogeneity score (you can define your own logic)
        player_score = len(set(players))
        stat_score = len(set(stats))
        team_score = len(set(teams))
        return player_score + stat_score + team_score

    # Evaluate all combinations and sort them by heterogeneity score
    comb_scores = [(comb, evaluate_heterogeneity(comb, df)) for comb in combinations]
    comb_scores_sorted = sorted(comb_scores, key=lambda x: x[1], reverse=True)

    # Select the most heterogeneous combinations (you can define how many you want)
    top_combinations = comb_scores_sorted # Top 5 combinations for example

    # Display the most heterogeneous combinations
    for comb, score in top_combinations:
        print(f"Combination: {comb}, Score: {score}")
        print(df.loc[list(comb)])
        print()

    # Optional: Convert combinations to DataFrame
    top_comb_dfs = [(df.loc[list(comb)], score) for comb, score in top_combinations]

    return top_comb_dfs


def generate_unique_combinations(df, combo_length):
    all_combinations = list(combinations(df.index, combo_length))
    
    unique_combinations = set()
    valid_combinations = []
    
    for combo in all_combinations:
        players_in_combo = df.loc[list(combo), 'Player']
        if players_in_combo.duplicated().sum() == 0:
            sorted_combo = tuple(sorted(df.loc[list(combo)].apply(lambda row: (row['Player'], row['Category'], row['Bet']), axis=1)))
            if sorted_combo not in unique_combinations:
                unique_combinations.add(sorted_combo)
                valid_combinations.append(combo)
    
    return valid_combinations




def distribute_props_into_parlays(props_df, n_parlays):
    """
    Distributes props into a specified number of parlays based on their probabilities.

    :param props_df: pandas DataFrame with columns 'player', 'team', 'threshold', 'odds', 'bet_type', 'probability'
    :param n_parlays: Number of parlays to distribute the props into
    :return: List of parlays, where each parlay is a list of props
    """
    # Initialize empty parlays
    parlays = [[] for _ in range(n_parlays)]

    # Calculate total probability
    total_probability = props_df["PROB"].sum()
    print(total_probability)

    # Distribute props into parlays based on their probabilities
    for index, row in props_df.iterrows():
        # Determine how many parlays to distribute this prop into
        print(row['PROB'])
        print(row["PROB"] / 1)
        num_parlays_for_prop = int(row["PROB"] / total_probability * n_parlays)
        num_parlays_for_prop = max(1, num_parlays_for_prop)

        # Randomly select parlays for this prop
        selected_parlays = random.sample(parlays, num_parlays_for_prop)
        for parlay in selected_parlays:
            parlay.append(row.to_dict())

    return parlays


def remove_rows_by_value(df, column_name, values_to_remove):
    """
    Removes rows from the DataFrame where the value in the specified column is in the values_to_remove list.

    Parameters:
    df (pandas.DataFrame): The input DataFrame.
    column_name (str): The name of the column to check for values to remove.
    values_to_remove (list): A list of values to be removed from the DataFrame.

    Returns:
    pandas.DataFrame: The DataFrame with the specified rows removed.
    """
    # Filter the DataFrame to keep rows where the column value is not in the values_to_remove list
    filtered_df = df[~df[column_name].isin(values_to_remove)]
    return filtered_df


def remove_rows_by_shared_id(df, target_column, values_to_check, id_column):
    """
    Removes all rows from the DataFrame that share the same value in the id_column
    if a value in the target_column is found in the values_to_check list.

    Parameters:
    df (pandas.DataFrame): The input DataFrame.
    target_column (str): The name of the column to check for values.
    values_to_check (list): A list of values to check in the target_column.
    id_column (str): The name of the column containing the IDs to check for shared values.

    Returns:
    pandas.DataFrame: The DataFrame with the specified rows removed.
    """
    # Identify the rows where the target column contains values from the values_to_check list
    rows_to_remove = df[df[target_column].isin(values_to_check)]

    # Get the unique IDs from the id_column for the identified rows
    ids_to_remove = rows_to_remove[id_column].unique()

    # Filter the DataFrame to remove all rows that share the same IDs in the id_column
    filtered_df = df[~df[id_column].isin(ids_to_remove)]
    return filtered_df


def create_study_packet(parlay_study, folder_name):
    players = parlay_study['PLAYER'].unique()
    game_data_players = []
    for player in players:
        player_id = dm.get_player_id(player)
        player_game_data = dm.get_and_save_player_data(player_id)
        game_data_players.append(player_game_data)

    teams = parlay_study['TEAM'].unique()
    game_data_teams = []
    for team in teams:
        team_id = dm.get_team_id(team)
        team_game_data = dm.get_and_save_team_data(team_id)
        game_data_teams.append(team_game_data)
    props = parlay_study.drop_duplicates()[['PROPS', 'THRESH', 'PARLAY_ID', 'PARLAY_EV', 'PARLAY_PROB', 'HOUSE_PARLAY_PROB', 'PROB', 'HOUSE_PROB', 'EV', 'TO_WIN']]
    
    game_data = dict(zip(players, game_data_players))
    for i, team in enumerate(teams):
        game_data[team] = game_data_teams[i]
    dm.save_as_excel_workbook(game_data, folder_name + "/study_packet")
    props.to_csv(folder_name + "/props.csv")


def generate_candidates(n_props, n_parlays, parlay_len, cycles, filter_players, prop_filter):
    available_props = load_available_props()
    analyzed_props = get_analyzed_props(available_props)
    filtered_df = dm.filter_props(analyzed_props, filter_players, n_props)
    parlays = [distribute_props_into_parlays(filtered_df, n_parlays) for n in range(cycles)]
    parlays = [item for sublist in parlays for item in sublist]
    parlays = [parlay for parlay in parlays if len(parlay) == parlay_len]
    parlays = analyze.analyze_parlays(parlays)
    parlays['PROPS'] = parlays['PLAYER'] + "-" + parlays['STAT'] + "-" + parlays['TYPE']
    parlays = remove_rows_by_shared_id(parlays, 'PROPS', prop_filter, 'PARLAY_ID')
    return parlays 


def select_top_parlays(parlays_df, top_n):
    parlays_df = parlays_df.sort_values(by='PARLAY_EV', ascending=False)
    parlay_ids_sorted_by_ev = parlays_df['PARLAY_ID'].unique()
    top_parlay_ids = parlay_ids_sorted_by_ev[:top_n]
    selected_parlays = parlays_df[parlays_df['PARLAY_ID'].isin(top_parlay_ids)]
    return selected_parlays

def generate_parlays(df, min_props, max_props):
    parlays = []
    for r in range(min_props, max_props + 1):
        for combination in itertools.combinations(df.index, r):
            prob_product = 1
            house_prob_product = 1
            odds_product = 1
            ev_sum = 0
            for idx in combination:
                prop_data = df[df.index == idx]
                prob_product *= prop_data['PROB'].values[0]
                odds_product *= analyze.american_to_decimal(prop_data['ODDS'].values[0])
                house_prob_product *= prop_data['HOUSE_PROB'].values[0]
                ev_sum += prop_data['EV'].values[0]
                potential_winnings = 1 * (odds_product - 1)
            if ev_sum > min_props:
                parlays.append({
                    'COMBO': combination,
                    'COMBINED_PROB': prob_product,
                    'COMBINED_HOUSE_PROB': house_prob_product,
                    'COMBINED_EV': ev_sum,
                    'TO_WIN': potential_winnings,

                })
        if not parlays:
            raise RuntimeError("parlays is empty")
    return pd.DataFrame(parlays)

# def select_optimal_parlays(prop_df, max_permeation_rate, min_props, max_props):
#     parlays_df = generate_parlays(prop_df, min_props, max_props)
#     parlays_df = parlays_df.sort_values(by="COMBINED_EV", ascending=False).reset_index(drop=True)
#     parlays_df['PARLAY_ID'] = parlays_df.index
#     num_parlays = int(len(prop_df) / max_permeation_rate)
#     parlays_selected = []
#     prop_counts = {prop: 0 for prop in prop_df.index}

#     for _, parlay in parlays_df.iterrows():
#         can_add_parlay = True
#         seen = {}
#         for idx in parlay['COMBO']:
#             prop_row = prop_df.iloc[idx]
#             player = prop_row['PLAYER']
#             stat = prop_row['STAT']

#             if stat == 'points':
#                 if player in seen and seen[player] == 'fgm':
#                     print('A')
#                     can_add_parlay = False
#                     break
#             if stat == 'fgm':
#                 if player in seen and seen[player] == 'points':
#                     print('B')
#                     can_add_parlay = False
#                     break
#             if prop_counts[idx] >= num_parlays * max_permeation_rate:
#                     can_add_parlay = False
#                     break
#             seen[player] = stat


#         if can_add_parlay:
#             parlays_selected.append(parlay)
#             for idx in parlay['COMBO']:
#                 prop_counts[idx] += 1
#     return parlays_selected


def calculate_num_parlays(prop_df, max_permeation_rate, min_props, max_props):
    """
    Calculate the optimal number of parlays based on given constraints.
    
    Args:
        prop_df (pd.DataFrame): DataFrame containing prop information.
        max_permeation_rate (float): Maximum rate at which a prop can be permeated.
        min_props (int): Minimum number of props in a parlay.
        max_props (int): Maximum number of props in a parlay.
        
    Returns:
        int: Optimal number of parlays.
    """
    
    # Calculate the average parlay size
    avg_parlay_size = (min_props + max_props) / 2
    
    # Adjust the calculation to consider the average parlay size
    num_parlays = int(len(prop_df) / (avg_parlay_size * max_permeation_rate))
    
    return num_parlays

def select_optimal_parlays(prop_df, max_permeation_rate, player_permeation_rate, min_props, max_props):
    """
    Selects optimal parlays based on given constraints.
    
    Args:
        prop_df (pd.DataFrame): DataFrame containing prop information with columns 'PLAYER', 'STAT', etc.
        max_permeation_rate (float): Maximum rate at which a prop can be permeated.
        player_permeation_rate (float): Maximum rate at which a player can be permeated.
        min_props (int): Minimum number of props in a parlay.
        max_props (int): Maximum number of props in a parlay.
    
    Returns:
        list: List of selected parlays.
    """
    
    # Generate all possible parlays
    parlays_df = generate_parlays(prop_df, min_props, max_props)

    # Sort parlays by combined expected value
    parlays_df = parlays_df.sort_values(by="COMBINED_EV", ascending=False).reset_index(drop=True)
    # Assign unique ID to each parlay
    parlays_df['PARLAY_ID'] = parlays_df.index
    
    # Calculate the maximum number of parlays using the updated calculation
    num_parlays = calculate_num_parlays(prop_df, max_permeation_rate, min_props, max_props)
    
    # Initialize counts
    parlays_selected = []
    prop_counts = {prop: 0 for prop in prop_df.index}
    player_counts = {player: 0 for player in prop_df['PLAYER']}
    
    # Select parlays
    for _, parlay in parlays_df.iterrows():
        can_add_parlay = True
        seen = {}
        
        for idx in parlay['COMBO']:
            prop_row = prop_df.iloc[idx]
            player = prop_row['PLAYER']
            stat = prop_row['STAT']

            # Check for conflicts in stat types
            if stat == 'points':
                if player in seen and 'fgm' in seen[player]:
                    can_add_parlay = False
                    break
            elif stat == 'fgm':
                if player in seen and 'points' in seen[player]:
                    can_add_parlay = False
                    break

            # Check for prop permeation limit
            if prop_counts[idx] >= num_parlays * max_permeation_rate:
                can_add_parlay = False
                break

            # Check for player permeation limit
            if player_counts[player] >= num_parlays * player_permeation_rate:
                can_add_parlay = False
                break

            if player in seen:
                seen[player] += [stat]
            else:
                seen[player] = [stat]
        
        if can_add_parlay:
            parlays_selected.append(parlay)
            for idx in parlay['COMBO']:
                prop_counts[idx] += 1
            for player in seen:
                player_counts[player] += 1

    return parlays_selected


def get_selected_parlays(n_props, max_prop_permeation_rate, max_player_permeation_rate, min_props_in_parlay, max_props_in_parlay, filter_players):
    available_props = process_all_csv_files_in_directory(r"E:\coding_projects\nba_01\prop_lines\proplines_11022024")
    analyzed_props = get_analyzed_props(available_props)
    selected_props = dm.filter_props(analyzed_props, filter_players, n_props).reset_index(drop=True)
    selected_parlays = select_optimal_parlays(selected_props, max_prop_permeation_rate, max_player_permeation_rate, min_props_in_parlay, max_props_in_parlay)
    selected_parlay_dfs = []
    for parlay in selected_parlays:
        parlay_rows = []
        for i in parlay['COMBO']:
            prop = selected_props.iloc[i]
            parlay_rows.append(prop)
        parlay_df = pd.concat(parlay_rows, axis=1, ignore_index=True).T
        parlay_df['PARLAY_EV'] = parlay['COMBINED_EV']
        parlay_df['PARLAY_PROB'] = parlay['COMBINED_PROB']
        parlay_df['PARLAY_HOUSE_PROB'] = parlay['COMBINED_HOUSE_PROB']
        parlay_df['PARLAY_EV'] = parlay['COMBINED_EV']
        parlay_df['PARLAY_ID'] = parlay['PARLAY_ID']
        parlay_df['TO_WIN'] = parlay['TO_WIN']
        selected_parlay_dfs.append(parlay_df)
    selected_parlay_df = pd.concat(selected_parlay_dfs, axis=0, ignore_index=True)
    selected_parlay_df['PROP_TAG'] = selected_parlay_df['PLAYER'] + "_" + selected_parlay_df['STAT'] + "_" + selected_parlay_df['TYPE']
    return selected_parlay_df


def get_prop_distribution(parlays):
    appearance_counts = parlays['PROP_TAG'].value_counts()
    parlay_count = len(parlays['PARLAY_ID'].unique())
    percentages = pd.Series(appearance_counts/parlay_count)
    parlay_distribution = pd.concat([appearance_counts, percentages], axis=1)
    parlay_distribution.columns = ['COUNT', '%']
    return parlay_distribution


def get_player_distribution(parlays):
    appearance_counts = parlays.groupby('PLAYER')['PARLAY_ID'].nunique().reset_index()
    appearance_counts.columns = ['PLAYER', 'PARLAY_COUNT']
    parlay_count = len(parlays['PARLAY_ID'].unique())
    percentages = pd.Series(appearance_counts['PARLAY_COUNT']/parlay_count)
    parlay_distribution = pd.concat([appearance_counts, percentages], axis=1)
    parlay_distribution.columns = ['PLAYER', 'PARLAY_COUNTS', '%']
    parlay_distribution = parlay_distribution.sort_values(by="PARLAY_COUNTS", ascending=False)
    return parlay_distribution


def create_pivot_table_for_tracking(df):
    pivot_df = df.assign(value=1).pivot_table(index=['PROP_TAG', 'THRESH'], columns='PARLAY_ID', values='value', fill_value="")

    # Reset index to move PROP_TAG and THRESH to columns
    pivot_df = pivot_df.reset_index()
    return pivot_df



In [27]:
df =  process_all_csv_files_in_directory(r"E:\coding_projects\nba_01\prop_lines\proplines_11022024")

E:\coding_projects\nba_01\prop_lines\proplines_11022024\Book1.csv
E:\coding_projects\nba_01\prop_lines\proplines_11022024\prop_lines.csv


In [28]:
display(df)

Unnamed: 0,player_name,team,stat,over_threshold,over_odds,under_threshold,under_odds
0,DeMar DeRozan,Kings,points,22.5,-120,22.5,-110
1,Domantas Sabonis,Kings,points,18.5,-110,18.5,-120
2,Keegan Murray,Kings,points,13.5,-120,13.5,-110
3,Malik Monk,Kings,points,13.5,-105,13.5,-125
4,De'Aaron Fox,Kings,points,24.5,-125,24.5,-105
...,...,...,...,...,...,...,...
88,Caleb Martin,76ers,fgm,4.5,-110,4.5,-120
89,Andre Drummond,76ers,fgm,4.5,-105,4.5,-125
90,Kelly Oubre Jr.,76ers,fgm,6.5,-105,6.5,-125
91,Tyrese Maxey,76ers,fgm,10.5,105,10.5,-135


In [29]:
filter_players = []
n_props = 42
max_prop_permeation_rate = 0.3
max_player_permeation_rate = 0.5
min_props_in_parlay = 4
max_props_in_parlay = 4

selected_parlays = get_selected_parlays(n_props, max_prop_permeation_rate, max_player_permeation_rate, min_props_in_parlay, max_props_in_parlay, filter_players)
prop_distribution = get_prop_distribution(selected_parlays)
player_distribution = get_player_distribution(selected_parlays)



E:\coding_projects\nba_01\prop_lines\proplines_11022024\Book1.csv
E:\coding_projects\nba_01\prop_lines\proplines_11022024\prop_lines.csv
186
num of profitable props: 74
             PLAYER   TEAM    STAT  THRESH  ODDS   TYPE      PROB        EV  \
0     DeMar DeRozan  Kings  points    22.5  -110  under  0.556375  0.310854   
1  Domantas Sabonis  Kings  points    18.5  -110   over  0.884875  3.446537   
2     Keegan Murray  Kings  points    13.5  -110  under  0.749535  2.154653   
3        Malik Monk  Kings  points    13.5  -125  under  0.781291  2.031621   
4      De'Aaron Fox  Kings  points    24.5  -105  under  0.581031  0.671967   

   HOUSE_PROB  
0    0.523810  
1    0.523810  
2    0.523810  
3    0.555556  
4    0.512195  
74


In [30]:
print(player_distribution)
print(prop_distribution)
print(len(selected_parlays['PARLAY_ID'].unique()))


               PLAYER  PARLAY_COUNTS         %
8       Keegan Murray             18  0.257143
1        Caleb Martin             18  0.257143
15       Tyrese Maxey             18  0.257143
13         RJ Barrett             18  0.257143
12         Malik Monk             18  0.257143
10      Kevin Huerter             18  0.257143
16          Zach Edey             18  0.257143
6           Ja Morant             18  0.257143
5         Gradey Dick             18  0.257143
4    Domantas Sabonis             18  0.257143
2     Davion Mitchell             18  0.257143
7   Jaren Jackson Jr.             15  0.214286
9     Kelly Oubre Jr.             11  0.157143
3       DeMar DeRozan             11  0.157143
14       Santi Aldama             11  0.157143
0      Andre Drummond             11  0.157143
11         Kyle Lowry              4  0.057143
                                 COUNT         %
PROP_TAG                                        
Kevin Huerter_points_over           11  0.157143
DeMar D

In [31]:
today_dir = dt_mng.get_or_create_directory_in_days()
folder_name = dm.create_directory(f"{today_dir}/parlays/{datetime.now().strftime('%Y%m%d%H%M%S')}_study")
selected_parlays.to_csv(f'{folder_name}/parlays.csv')

Directory already exists: e:\coding_projects\nba_01\days
Directory already exists: e:\coding_projects\nba_01\days\2024-11-02
Directory 'e:\coding_projects\nba_01\days\2024-11-02/parlays/20241102014634_study' created successfully.


In [32]:
print(selected_parlays.columns)

Index(['PLAYER', 'TEAM', 'STAT', 'THRESH', 'ODDS', 'TYPE', 'PROB', 'EV',
       'HOUSE_PROB', 'PARLAY_EV', 'PARLAY_PROB', 'PARLAY_HOUSE_PROB',
       'PARLAY_ID', 'TO_WIN', 'PROP_TAG'],
      dtype='object')


In [33]:
players = selected_parlays['PLAYER'].unique()
game_data_players = []
for player in players:
    player_id = dm.get_player_id(player)
    player_game_data = dm.get_and_save_player_data(player_id)
    game_data_players.append(player_game_data)

teams = selected_parlays['TEAM'].unique()
game_data_teams = []
for team in teams:
    team_id = dm.get_team_id(team)
    team_game_data = dm.get_and_save_team_data(team_id)
    game_data_teams.append(team_game_data)

props = selected_parlays.drop_duplicates()[['PROP_TAG', 'THRESH', 'PARLAY_ID', 'PARLAY_EV', 'PARLAY_PROB', 'PARLAY_HOUSE_PROB', 'PROB', 'HOUSE_PROB', 'EV', 'TO_WIN']]
game_data = dict(zip(players, game_data_players))
for i, team in enumerate(teams):
    game_data[team] = game_data_teams[i]
dm.save_as_excel_workbook(game_data, folder_name + "/study_packet")
props.to_csv(folder_name + "/props.csv")

Kevin Huerter        player_name player_position  minutes  points  rebounds  assists    efg  \
416  Kevin Huerter             G-F     26.0      14         5        0  0.778   
413  Kevin Huerter             G-F     32.0      18         1        4  0.750   
414  Kevin Huerter             G-F     26.0      14         2        1  0.778   
415  Kevin Huerter             G-F     19.0       2         4        0  0.250   
35   Kevin Huerter             G-F      1.0       0         1        1  0.000   
..             ...             ...      ...     ...       ...      ...    ...   
126  Kevin Huerter             G-F     17.0       6         6        2  0.600   
128  Kevin Huerter             G-F     17.0       3         4        1  0.333   
122  Kevin Huerter             G-F     25.0       9        10        4  0.563   
111  Kevin Huerter             G-F     13.0       3         2        2  0.333   
83   Kevin Huerter             G-F      4.0       0         3        0  0.000   

     fg3a  fg

In [34]:
pivot_df = create_pivot_table_for_tracking(selected_parlays)
pivot_df.to_csv(f"{folder_name}/selected_parlays_as_pivot.csv")