In [1]:
import pandas as pd
import itertools
import multiprocessing as mp
from scipy.optimize import minimize
import numpy as np
import random

In [139]:
data = pd.read_csv('probability_score.csv')

In [242]:
def data_split(df, col, pattern):
    '''
    df : dataframe
    col: column name
    pattern: split based on column
    
    dataframe contains all the matches and this function helps to split the matches into individual matches. It 
    helps to identifies the indices of the starting and finishing row of the match
    '''
    
    index = 7
    split_indices = []
    last = 0
    
    while index < len(df):
        row = df.loc[index]
        if row[col] == pattern:
            indices = [last, index-1]
            split_indices.append(indices)
            last = index
            index +=8
        else:
            index +=1
    return split_indices
    
    
def odd_ratio(data_match, commission):
    '''
    commission: pre-defined by the aggregator
    data_match: dataframe with the probabilities columns
    '''
    commission_reduce = lambda x: (x - (commission/2)) if (commission/2) < x else 0
    # apply the lambda function to two columns of the DataFrame
    data_match[['team1_probability', 'team2_probability']] = data[['team1_probability', 'team2_probability']].applymap(commission_reduce)

    odd_cal = lambda x: 1/x if (commission/2) < x else 0
    data_match['team1_odd'] = data['team1_probability'].apply(odd_cal)
    data_match['team2_odd'] = data['team2_probability'].apply(odd_cal)
    
    return data_match


# Define the optimization function
def optimize(R1, R2, I):
    # Define the objective function to be maximized
    def objective(XY):
        X = XY[:len(R1)]
        Y = XY[len(R1):]
        Z = np.sum(X) + np.sum(Y)
        return np.dot(R1, X) - Z, np.dot(R2, Y) - Z
    
    # Define the constraint that the sum of X and Y must be less than or equal to I
    def constraint1(XY):
        return -np.sum(XY) + I

    def constraint2(XY):
        X = XY[:len(R1)]
        Y = XY[len(R1):]
        Z = np.sum(X) + np.sum(Y)
        return np.dot(R1, X) - Z
    
    
    def constraint3(XY):
        X = XY[:len(R2)]
        Y = XY[len(R2):]
        Z = np.sum(X) + np.sum(Y)
        return np.dot(R2, Y) - Z


    # Define the initial guess for X and Y
    initial_guess = [I / 2] * len(R1 + R2)
    
    # Define the bounds for the values of X and Y
    bounds = [(0, I)] * len(R1 + R2)
    
    # Solve the optimization problem using the SLSQP algorithm
    solution = minimize(lambda XY: -np.sum(objective(XY)), 
                        x0=initial_guess, 
                        constraints=[{'type': 'ineq', 'fun': constraint1},   {'type': 'ineq', 'fun': constraint2}, 
{'type': 'ineq', 'fun': constraint3}], 
                        bounds=bounds,
                        method='SLSQP')
    
    # Return the optimized values of X and Y
    return solution.x[:len(R1)], solution.x[len(R1):]


def objective_value(X_opt1, Y_opt1, R1, R2):

    '''
    The final objective value for the optimal value
    '''
    
    Z = np.sum(X_opt1) + np.sum(Y_opt1)
    print('The investment in A is', np.sum(X_opt1))
    print('The investment in B is', np.sum(Y_opt1))
    print('Team A wins:', np.dot(R1, X_opt1) - Z)
    print('Team B wins:', np.dot(R2, Y_opt1) - Z)
    return np.sum([np.dot(R1, X_opt1) - Z, np.dot(R2, Y_opt1) - Z])


def optimal_investment(data_match, subset, total_invest):
    '''
    data_match: pandas dataframe contains the match odds
    subset: combination of the balls picked to invest
    total_invest: constraint on total investment
    
    find optimal investment given that for a particular subset
    '''
    
    subset_data = data_match.loc[subset]
    temp_indices = list(subset_data.index)
    set_value = lambda i: 1 if i in temp_indices else 0
    data_match['Invest'] = data_match.index.map(set_value)
    
    R1 = list(data_match[data_match['Invest'] == 1]['team1_odd'].values)
    R2 = list(data_match[data_match['Invest'] == 1]['team2_odd'].values)
    X_opt, Y_opt = optimize(R1, R2, total_invest)
    objective_val = objective_value(X_opt, Y_opt, R1, R2)
    
    data_match['TeamA_Invest'] = 0
    data_match['TeamB_Invest'] = 0
    data_match.loc[data_match['Invest'] == 1, 'TeamA_Invest'] = X_opt
    data_match.loc[data_match['Invest'] == 1, 'TeamB_Invest'] = Y_opt
    return data_match, objective_val


def generate_positive_numbers_summing_to_total(subset, total):
    """
    Generates a list of n random positive numbers that sum to at most total.
    """
    nums = []
    n = len(subset)
    remaining_sum = total
    for i in range(n):
        # Generate a random number between 0 and the remaining sum
        num = random.uniform(0, remaining_sum)
        remaining_sum -= num
        if remaining_sum > 0:
            nums.append(num) # Add the remaining sum to the list
        else:
            nums.append(0)
        
    if remaining_sum > 0:
        nums[-1] += remaining_sum
    return nums

def random_investment(data_match, subset, total_invest):
    '''
    data_match: pandas dataframe contains the match odds
    subset: combination of the balls picked to invest
    total_invest: constraint on total investment
    
    find random investment given that for a particular subset
    '''
    
    subset_data = data_match.loc[subset]
    temp_indices = list(subset_data.index)
    set_value = lambda i: 1 if i in temp_indices else 0
    data_match['Invest'] = data_match.index.map(set_value)
    
    total_invest1 = random.randint(0, total_invest)
    total_invest2 = total_invest - total_invest1
    
    R1 = list(data_match[data_match['Invest'] == 1]['team1_odd'].values)
    R2 = list(data_match[data_match['Invest'] == 1]['team2_odd'].values)
    
    X_rand = generate_positive_numbers_summing_to_total(subset, total_invest1)
    Y_rand = generate_positive_numbers_summing_to_total(subset, total_invest2)
    print(len(X_rand))
    print(len(subset))
    objective_val = objective_value(X_rand, Y_rand, R1, R2)
    
    data_match['TeamA_Invest'] = 0
    data_match['TeamB_Invest'] = 0
    data_match.loc[data_match['Invest'] == 1, 'TeamA_Invest'] = X_rand
    data_match.loc[data_match['Invest'] == 1, 'TeamB_Invest'] = Y_rand
    return data_match, objective_val


def match_winner(data_one_match):
    '''
    Decide match winner based on probability value
    '''
    if data_one_match['team1_probability'].values[-1] < data_one_match['team2_probability'].values[-1]:
        return "team_2"
    else:
        return "team_1"
    
def profit_calculation(data_one_match):
    '''
    profit/loss from the bet with respect to each bet. 
    '''
    if match_winner(data_one_match) == 'team_1':
        col = 'TeamA_Invest'
        odd_col = 'team1_odd'
    else:
        odd_col = 'team2_odd'
        col = 'TeamB_Invest'

    data_one_match['profit_contribution'] = data_one_match[col]*data_one_match[odd_col]
    data_one_match['num_invest'] = data_one_match['Invest'].cumsum()
    return data_one_match


def custom_reward(subset, obj_value, Invest, profit_contribution, num_invest):
    '''
    reward function designed for this task
    '''
    # Define the rewards for each action
    if Invest:
        if (obj_value >= 0) and (len(subset) != num_invest):
            # Positive reward proportional to the profit made
            reward = min(10, (profit_contribution/obj_value)*5)
            
        elif (obj_value >= 0) and (len(subset) == num_invest):
            reward = min(10, (profit_contribution/obj_value)*10)
        elif (obj_value < 0) and (len(subset) != num_invest):
            # Negative reward proportional to the magnitude of the loss
            reward = max(-10, (profit_contribution/obj_value)*5)
        else:
            reward = max(-10, (profit_contribution/obj_value)*10)
            
    else:
        reward = 0
    
    return reward

In [245]:
# Total number of Bets 
min_bets = 2
min_sample = 5
commission = 0.03
total_invest = 10000
pos_neg_ratio = '1:1'

In [141]:
split_indices = data_split(data, 'innings_over', '1_1')

In [250]:
j = 13

match_index = split_indices[j]
data_one_match = data.loc[match_index[0]:match_index[1]].reset_index(drop=True)
data_one_match1 = odd_ratio(data_one_match, commission)


In [147]:
for no_bet in range(min_bets, len(data_one_match)//2):
    subset = random.sample(range(0, len(data_one_match)), no_bet)
    data_one_match = data_one_match1.copy()
    
    for pos in range(int(pos_neg_ratio.split(':')[0])):
        optimal_match, optimal_val = optimal_investment(data_one_match, subset, total_invest)
        data_one_match = profit_calculation(optimal_match)
        obj_value = np.round(optimal_val)
        data_one_match['reward'] = data_one_match.apply(lambda row: custom_reward(subset, obj_value, row['Invest'], row['profit_contribution'], row['num_invest']), axis=1)
    for pos in range(int(pos_neg_ratio.split(':')[1])):
        data_one_match_neg = data_one_match1.copy()
        random_match, random_val = random_investment(data_one_match_neg, subset, total_invest)
        data_one_match_neg = profit_calculation(random_match)
        obj_value = np.round(random_val)
        data_one_match_neg['reward'] = data_one_match_neg.apply(lambda row: custom_reward(subset, obj_value, row['Invest'], row['profit_contribution'], row['num_invest']), axis=1)


The investment in A is 4167.438745498578
The investment in B is 5832.561254501755
Team A wins: -5.220499588176608e-10
Team B wins: 332643.33685826726
10
10
The investment in A is 1906.9999999999998
The investment in B is 8093.0
Team A wins: -6140.742110564917
Team B wins: 17730.418245728277


1

array([0.00000000e+00, 4.32990181e-17, 1.21833781e-16, 3.31681987e-16,
       6.06338182e-16, 4.23579804e-16, 9.85488422e-15, 7.50490298e-15,
       5.15031636e+00, 1.23564221e-13])