In [7]:
import itertools
import random
import pandas as pd
from itertools import combinations
from datetime import datetime


from data_manager import DataManager
import analyze

dm = DataManager()

class Prop:
    def __init__(self, name, team, stat, threshold, odds, bet_type):
        self.name = name
        self.team = team
        self.stat = stat
        self.n = threshold
        self.odds = odds
        self.bet_type = bet_type
        self.probability = self.get_prop_probability()
        self.ev, self.house_prob = self.get_ev_and_implied_prob()
        self.print_out = f"""
            PLAYER: {self.name}
              STAT: {self.stat}
            THRESH: {self.n}
              ODDS: {self.odds}
              TYPE: {self.bet_type}
              PROB: {self.probability}
                EV: {self.ev}
        HOUSE_PROB: {self.house_prob}
            """
        print(self.print_out)
        self.entry = {
            "PLAYER": self.name,
              "TEAM": self.team,
              "STAT": self.stat,
            "THRESH": self.n,
              "ODDS": self.odds,
              "TYPE": self.bet_type,
              "PROB": self.probability,
                "EV": self.ev,
        "HOUSE_PROB": self.house_prob
        }
    
    def get_prop_probability(self, last_n_games=25):
        player_id = dm.get_player_id(self.name)
        data = dm.get_and_save_player_data(player_id, self.name).sort_values(by='date', ascending=False).head(last_n_games).copy()
        # print(data.head())
        if self.bet_type == "over":
            return analyze.estimate_probability_poisson_over(data, self.stat, self.n)
        elif self.bet_type == "under":
            return analyze.estimate_probability_poisson_under(data, self.stat, self.n)
        else:
            raise ValueError("Invalid bet type. Use 'over' or 'under'.")
        
    def get_ev_and_implied_prob(self):
        odds = self.american_to_decimal(self.odds)
        house_probability = analyze.estimate_implied_probability(odds)
        ev = analyze.calculate_ev(self.probability, odds, 5)
        return ev, house_probability
    
    @staticmethod
    def american_to_decimal(american_odds):
        """Convert American odds to decimal odds."""
        if american_odds > 0:
            return 1 + (american_odds / 100)
        else:
            return 1 + (100 / abs(american_odds))
    

def extract_raw_data(file_path): # .csv
    # gets input from A1
    # Sample input text (use the content of your file here)
    raw_input = pd.read_csv(file_path)
    list_of_raw_input = list(raw_input.iloc[:, 0])
    return list_of_raw_input


def load_available_props():
    
    raw_input = extract_raw_data("prop_lines/prop_lines.csv")
    print("Received raw input.")
    stat_names = {
             'PointsSGP': "points",
            'AssistsSGP': "assists",
        'Threes MadeSGP': "fg3m",
           'ReboundsSGP': "rebounds",
   'Field Goals MadeSGP': "fgm",
             'StealsSGP': "steals",
             'BlocksSGP': "blocks",
        }
    #debug stat_name_inputs = extract_raw_data("prop_lines/player_prop_categories.csv")
    players = dm.query_players()
    player_names = [player.name for player in players]
    teams = dm.query_teams()
    team_names = [team.nickname for team in teams]
    row_of_interest = 0
    current_category = None
    current_player = None
    current_team = None
    records = []
    for _, item in enumerate(raw_input):
        if item is None: 
            continue
        if item in stat_names:
            current_category = stat_names[item]
            print(f"Loading {current_category} props.")
        if item != current_team:
            if item in team_names:
                current_team = item
        if item in player_names:
            current_player = item 
            assert current_team
            record = [current_player, current_team, current_category]
            row_of_interest = 6


        if row_of_interest:
            row_of_interest -= 1
            if row_of_interest < 5:
                record.append(item)
                if row_of_interest == 1:
                    records.append(record)
                    record = []
    df = pd.DataFrame.from_records(records, columns=["player_name", "team", "stat", "over_threshold", "over_odds", "under_threshold", "under_odds"])
    
    df['player_name'] = df['player_name'].astype(str)
    df['team'] = df['team'].astype(str)
    df['stat'] = df['stat'].astype(str)
    df['over_threshold'] = df['over_threshold'].str.extract(r'(\d+\.\d+)').astype(float)
    df['under_threshold'] = df['under_threshold'].str.extract(r'(\d+\.\d+)').astype(float)
    df['over_odds'] = df['over_odds'].astype(int)
    df['under_odds'] = df['under_odds'].astype(int)

    return df


def get_analyzed_props(available_props):
    props = []
    for _, row in available_props.iterrows():
        print(row)
        for bet_type in ["over", "under"]:
            prop = Prop(
                    name=row["player_name"], 
                    team=row["team"],
                    stat=row["stat"], 
                threshold=row[f"{bet_type}_threshold"], 
                    odds=row[f"{bet_type}_odds"], 
                bet_type=bet_type
                )
            props.append(prop)    
            print("Prop object created.")

    return props


def generate_heterogenous_combinations(df, n):

    # Generate all combinations of n rows
    combinations = list(itertools.combinations(df.index, n))

    # Function to evaluate heterogeneity of a combination
    def evaluate_heterogeneity(comb, df):
        comb_list = list(comb)
        players = df.loc[comb_list, 'PLAYER']
        stats = df.loc[comb_list, 'STAT']
        teams = df.loc[comb_list, 'TEAM']
        # Calculate a simple heterogeneity score (you can define your own logic)
        player_score = len(set(players))
        stat_score = len(set(stats))
        team_score = len(set(teams))
        return player_score + stat_score + team_score

    # Evaluate all combinations and sort them by heterogeneity score
    comb_scores = [(comb, evaluate_heterogeneity(comb, df)) for comb in combinations]
    comb_scores_sorted = sorted(comb_scores, key=lambda x: x[1], reverse=True)

    # Select the most heterogeneous combinations (you can define how many you want)
    top_combinations = comb_scores_sorted # Top 5 combinations for example

    # Display the most heterogeneous combinations
    for comb, score in top_combinations:
        print(f"Combination: {comb}, Score: {score}")
        print(df.loc[list(comb)])
        print()

    # Optional: Convert combinations to DataFrame
    top_comb_dfs = [(df.loc[list(comb)], score) for comb, score in top_combinations]

    return top_comb_dfs


def generate_unique_combinations(df, combo_length):
    all_combinations = list(combinations(df.index, combo_length))
    
    unique_combinations = set()
    valid_combinations = []
    
    for combo in all_combinations:
        players_in_combo = df.loc[list(combo), 'Player']
        if players_in_combo.duplicated().sum() == 0:
            sorted_combo = tuple(sorted(df.loc[list(combo)].apply(lambda row: (row['Player'], row['Category'], row['Bet']), axis=1)))
            if sorted_combo not in unique_combinations:
                unique_combinations.add(sorted_combo)
                valid_combinations.append(combo)
    
    return valid_combinations




def distribute_props_into_parlays(props_df, n_parlays):
    """
    Distributes props into a specified number of parlays based on their probabilities.

    :param props_df: pandas DataFrame with columns 'player', 'team', 'threshold', 'odds', 'bet_type', 'probability'
    :param n_parlays: Number of parlays to distribute the props into
    :return: List of parlays, where each parlay is a list of props
    """
    # Initialize empty parlays
    parlays = [[] for _ in range(n_parlays)]

    # Calculate total probability
    total_probability = props_df["PROB"].sum()
    print(total_probability)

    # Distribute props into parlays based on their probabilities
    for index, row in props_df.iterrows():
        # Determine how many parlays to distribute this prop into
        print(row['PROB'])
        print(row["PROB"] / 1)
        num_parlays_for_prop = int(row["PROB"] / total_probability * n_parlays)
        num_parlays_for_prop = max(1, num_parlays_for_prop)

        # Randomly select parlays for this prop
        selected_parlays = random.sample(parlays, num_parlays_for_prop)
        for parlay in selected_parlays:
            parlay.append(row.to_dict())

    return parlays




In [8]:

n_props = 32
n_parlays = 3

available_props = load_available_props()
analyzed_props = get_analyzed_props(available_props)
filter_players = []
filtered_df = dm.filter_props(analyzed_props, filter_players, n_props)
parlays = distribute_props_into_parlays(filtered_df, n_parlays)


parlays = [parlay for parlay in parlays if len(parlay) >= 7]
average_len_of_parlays = sum(map(len, parlays)) / len(parlays) if parlays else 0
parlays = analyze.analyze_parlays(parlays)
folder_name = dm.create_directory(f"parlays/{datetime.now().strftime('%Y%m%d%H%M%S')}_study")
parlays.to_csv(f"{folder_name}/available_parlays.csv")


Received raw input.
Loading points props.
Loading assists props.
Loading fg3m props.
Loading rebounds props.
Loading fgm props.
Loading steals props.
Loading blocks props.
player_name        T.J. McConnell
team                       Pacers
stat                       points
over_threshold                9.5
over_odds                    -135
under_threshold               9.5
under_odds                    100
Name: 0, dtype: object

            PLAYER: T.J. McConnell
              STAT: points
            THRESH: 9.5
              ODDS: -135
              TYPE: over
              PROB: 0.8119961703007408
                EV: 2.0673740748397815
        HOUSE_PROB: 0.574468085106383
            
Prop object created.

            PLAYER: T.J. McConnell
              STAT: points
            THRESH: 9.5
              ODDS: 100
              TYPE: under
              PROB: 0.18800382969925922
                EV: -3.119961703007408
        HOUSE_PROB: 0.5
            
Prop object created.
player

In [9]:

parlays['PROPS'] = parlays['PLAYER'] + "-" + parlays['STAT'] + "-" + parlays['TYPE']
parlay_study = parlays.copy()
n_parlays = len(parlay_study['PARLAY_ID'].unique())
print(f"n_parlays: {n_parlays}")
props = parlay_study['PROPS'].unique()
n_props = len(props)
print(f"Props involved: {n_props}")
print(f"Average parlay len: {average_len_of_parlays}")
print(parlay_study.head())


n_parlays: 1
Props involved: 7
Average parlay len: 13.0
             PLAYER     TEAM     STAT  THRESH  ODDS   TYPE      PROB  \
0  Payton Pritchard  Celtics  assists     2.5  -140   over  0.841567   
1        Obi Toppin   Pacers     fg3m     0.5  -240   over  0.753403   
2      Jayson Tatum  Celtics   steals     1.5  -240  under  0.735759   
3      Jrue Holiday  Celtics   steals     1.5  -210  under  0.721048   
4     Pascal Siakam   Pacers   blocks     0.5  -185  under  0.670320   

         EV  HOUSE_PROB  PARLAY_ID  PARLAY_PROB  HOUSE_PARLAY_PROB  PARLAY_EV  \
0  2.213435    0.583333          0     0.085016           0.039404   1.157537   
1  0.336605    0.705882          0     0.085016           0.039404   1.157537   
2  0.211625    0.705882          0     0.085016           0.039404   1.157537   
3  0.322018    0.677419          0     0.085016           0.039404   1.157537   
4  0.163276    0.649123          0     0.085016           0.039404   1.157537   

      TO_WIN            

In [10]:
players = parlay_study['PLAYER'].unique()
game_data_players = []
for player in players:
    player_id = dm.get_player_id(player)
    player_game_data = dm.get_and_save_player_data(player_id)
    game_data_players.append(player_game_data)

teams = parlay_study['TEAM'].unique()
game_data_teams = []
for team in teams:
    team_id = dm.get_team_id(team)
    team_game_data = dm.get_and_save_team_data(team_id)
    game_data_teams.append(team_game_data)

In [11]:
props = parlay_study.drop_duplicates()[['PROPS', 'THRESH', 'PARLAY_ID', 'PARLAY_EV', 'PARLAY_PROB', 'HOUSE_PARLAY_PROB', 'PROB', 'HOUSE_PROB', 'EV', 'TO_WIN']]

game_data = dict(zip(players, game_data_players))
for i, team in enumerate(teams):
    game_data[team] = game_data_teams[i]
dm.save_as_excel_workbook(game_data, folder_name + "/study_packet")
props.to_csv(folder_name + "/props.csv")


Payton Pritchard          player_name player_position  minutes  points  rebounds  assists  \
91  Payton Pritchard               G     21.0      11         1        0   
3   Payton Pritchard               G     25.0      11         1        1   
4   Payton Pritchard               G     22.0       5         3        3   
0   Payton Pritchard               G     19.0      13         2        4   
5   Payton Pritchard               G     24.0      16         5        2   
..               ...             ...      ...     ...       ...      ...   
24  Payton Pritchard               G     19.0       0         0        2   
36  Payton Pritchard               G     26.0      15         4        9   
62  Payton Pritchard               G     25.0       0         7        6   
72  Payton Pritchard               G     12.0       3         0        1   
30  Payton Pritchard               G     11.0       4         1        1   

      efg  fg3a  fg3m  fg3_pct  fga  fgm  fta  ft_pct  steals  blocks 