## Analysis of the collected data

Import data + packages

In [1]:
import numpy as np
import pandas as pd
import time
import random

data = pd.read_csv('2024-01-26-filled.csv')

We have bet rates in columns: 1, 0, 2, 10, 02, 12. Also in column 'Wygrany' we have information about the match result - the first team won(1), draw(0) or lost(2). Using this information lets find a range of width 'x' that will be most profitable for betting.

In [3]:
def function_sum(data, start, iterations, bet_type):
    selected_data = data[start:start+iterations]
    selected_values = selected_data[selected_data[:, 1] == bet_type][:, 0]
    return np.sum(selected_values)

# set range and bet type
range_count = 100
bet_type = '1'

row_count = data.shape[0]
matrix_result = np.zeros((row_count, 2))
matrix_result_count = np.zeros((row_count-range_count+1, 3))

for index, row in data.iterrows():
    matrix_result[index][0] = row[bet_type]
    matrix_result[index][1] = row['Wygrany']

matrix_result_sorted = matrix_result[matrix_result[:,0].argsort()]

# calculate range count. 1st count outside loop - optimalization
matrix_result_count[0][0] = function_sum(matrix_result_sorted, 0, range_count, bet_type)    # value of profit
matrix_result_count[0][1] = matrix_result_sorted[0][0]                                      # value min range
matrix_result_count[0][2] = matrix_result_sorted[range_count][0]                            # value max range

for idx, _ in enumerate(matrix_result_count[1:], start=1):
    matrix_result_count[idx][0] = matrix_result_count[idx-1][0]
    if np.isnan(matrix_result_sorted[idx-1][1]):
        pass
    elif str(int(matrix_result_sorted[idx-1][1])) in str(bet_type):
        matrix_result_count[idx][0] -= matrix_result_sorted[idx-1][0]
    
    if np.isnan(matrix_result_sorted[idx+range_count-1][1]):
        pass
    elif str(int(matrix_result_sorted[idx+range_count-1][1])) in str(bet_type):
        matrix_result_count[idx][0] += matrix_result_sorted[idx+range_count-1][0]

    matrix_result_count[idx][1] = matrix_result_sorted[idx][0]
    matrix_result_count[idx][2] = matrix_result_sorted[idx+range_count-1][0]

max_index = np.argmax(matrix_result_count[:, 0])
print(max_index, matrix_result_count[max_index])


292 [23.75  1.71  1.92]


  selected_values = selected_data[selected_data[:, 1] == bet_type][:, 0]


A global maximum has been found for all data. Due to the different nature of the leagues, some deviations are possible - instead of the range of 1.7-1.9 it may be 1.6-1.9. For this reason, an analysis will be performed for the 1.6-2 range for each league.

Result lis list with ligues, profit and best range. Profit is calculated 

$ profit = if_{win}(value*tax_{free} - 1) - if_{lose}(1) $

In [4]:
ligues = data['Liga'].unique()
list_ligues = []
bet_type = '1'
_x_min = matrix_result_count[max_index][1]
_x_max = matrix_result_count[max_index][2]
x = _x_min - (_x_max - _x_min)# / 2
y = _x_max + (_x_max - _x_min) #/ 2
bet_range = 0.4

for l in ligues:
    list_ligues.append([l, 0.0, 0.0])

def calculate_ratio(sublists):
    if len(sublists) < 2:
        return 0
    if sublists[1] == 0:
        return 0
    return sublists[0] / sublists[1]

for ligue in ligues:
    best_range = None
    best_ratio = -np.inf

    for start in np.arange(x, y - bet_range, 0.01):
        end = start + bet_range
        total_value = 0.0
        total_count = 0.0
        
        for idx, row in data.iterrows():
            if row['Liga'] == ligue and start <= row[bet_type] < end:
                winner = row['Wygrany']
                value = row[bet_type]
                
                if np.isnan(winner):
                    pass
                elif str(int(winner)) in str(bet_type):
                    total_value += value * 0.88 - 1
                    total_count += 1
                else:
                    total_value -= 1
                    total_count += 1
        
        ratio = calculate_ratio([total_value, total_count])
        
        if ratio > best_ratio:
            best_ratio = round(ratio, 3)
            best_range = (start, end)
    
    for sub in list_ligues:
        if sub[0] == ligue:
            sub[1] = best_ratio
            sub[2] = best_range

print(list_ligues)

[['1-portugalia', 0.258, (1.6, 2.0)], ['1-niemcy', 0.147, (1.6600000000000001, 2.06)], ['1-belgia', 0.137, (1.52, 1.92)], ['1-francja', 0.024, (1.55, 1.9500000000000002)], ['1-hiszpania', 0.104, (1.51, 1.9100000000000001)], ['1-anglia', 0.224, (1.7200000000000002, 2.12)], ['1-holandia', -0.057, (1.7200000000000002, 2.12)], ['liga-mistrzow', 0.028, (1.6700000000000002, 2.0700000000000003)], ['liga-europy', 0.208, (1.51, 1.9100000000000001)]]


Using data of profitability and best range, a betting simulation will be performed. The code below is responsible for filtering the data.

In [5]:
# filtering data
minimum_profitability = 0.1

list_ligues_filtered = [sublist for sublist in list_ligues if sublist[1] > minimum_profitability]

def function_filtred_data():
    sample = pd.DataFrame(columns=data.columns)
    for ligue_info in list_ligues_filtered:
        min_range, max_range = ligue_info[2]
        
        matches = data[(data['Liga'].str.contains(ligue_info[0])) & 
                       ((data[bet_type].astype(float) > min_range) & (data[bet_type].astype(float) < max_range))]
        
        sample = pd.concat([sample, matches])
    return sample

filtered_data = function_filtred_data()
filtered_data.to_csv('filtered_data.csv', index=False)

Simulation of placing bets for stakes containing x% of the portfolio size.

In [47]:
STARTING_MONEY = 100
PERCENTAGE_MONEY = [0.02, 0.05, 0.08, 0.12, 0.15, 0.20]
BET_PAYMENT_BASIS = 2
NUMBER_OF_TESTS = 1000
NUMBER_OF_BETS_IN_TEST = 40
NUMBER_OF_MATCHES_IN_BET = 2
bet_type = '1'
tax = 0.12
bet_win_rate = NUMBER_OF_TESTS * NUMBER_OF_BETS_IN_TEST

def is_win(row):
    if row['Bramki_1'] > row['Bramki_2']:
        return 1
    elif row['Bramki_1'] == row['Bramki_2']:
        return 0
    else:
        return 2

def calculate_sample(sample):
    global bet_win_rate
    bet_value = 1
    for index, match in sample.iterrows():
        result = is_win(match)
        if str(result) in str(bet_type):
            bet_value *= filtered_data.at[index, f'{bet_type}']
        else:
            bet_win_rate -= 1
            return 0
    return bet_value * (1 - tax)

def calculate_bet_base_value(portfolio, percentage):
    if(portfolio < BET_PAYMENT_BASIS):
        return 0
    bet_value = int(portfolio * percentage)
    return max(bet_value, 1)

def generate_sample():
    sample = filtered_data.sample(n=NUMBER_OF_MATCHES_IN_BET)
    return sample

def calculate_bets_profit(bet_percentage, number_of_bets):
    money = [STARTING_MONEY] * len(bet_percentage)
    for bet in range(0, number_of_bets):        
        sample_matches = generate_sample()
        bet_value = calculate_sample(sample_matches)
        for percent in range(0, len(bet_percentage)):
            bet_base = calculate_bet_base_value(money[percent], bet_percentage[percent])
            money[percent] = money[percent] + (bet_value - 1) * bet_base
    return money

avg_win = [0] * len(PERCENTAGE_MONEY)
losses = [0] * len(PERCENTAGE_MONEY)
failure = [0] * len(PERCENTAGE_MONEY)
portfolio_history = np.zeros((NUMBER_OF_TESTS, len(PERCENTAGE_MONEY)))

for i in range(NUMBER_OF_TESTS):
    money = calculate_bets_profit(PERCENTAGE_MONEY, NUMBER_OF_BETS_IN_TEST)
    for x in range(0, len(money)):
        avg_win[x] += money[x]
        if money[x] < int(BET_PAYMENT_BASIS):
            losses[x] += 1
        if money[x] < STARTING_MONEY:
            failure[x] += 1
        if(money[x] < min_money):
            min_money = money[x]
        portfolio_history[i][x] = money[x]

avg_win = [round(x/NUMBER_OF_TESTS, 2) for x in avg_win]
bet_win_rate = round(bet_win_rate / (NUMBER_OF_TESTS * NUMBER_OF_BETS_IN_TEST) * 100, 2)

print('Number of tests: ', NUMBER_OF_TESTS)
print('For', NUMBER_OF_MATCHES_IN_BET, 'matches in one bet, winrate is: ', bet_win_rate, '%')
print('Percentage: ', PERCENTAGE_MONEY)
print('Average money in the end: ', avg_win)
print('Bankruptcy: ', losses)
print('How many times we lost: ', failure)

Number of tests:  1000
For 2 matches in one bet, winrate is:  49.07 %
Percentage:  [0.02, 0.05, 0.08, 0.12, 0.15, 0.2]
Average money in the end:  [125.51, 193.92, 300.52, 538.56, 837.74, 1805.25]
Bankruptcy:  [0, 0, 0, 0, 0, 0]
How many times we lost:  [131, 93, 117, 149, 170, 220]


In [48]:
for i in range(len(PERCENTAGE_MONEY)):
    portfolio_history = portfolio_history[portfolio_history[:,i].argsort()]
    q25 = round(portfolio_history[int(NUMBER_OF_TESTS*1/4)][i], 2)
    q50 = round(portfolio_history[int(NUMBER_OF_TESTS*1/2)][i], 2)
    q75 = round(portfolio_history[int(NUMBER_OF_TESTS*3/4)][i], 2)
    print('For ', PERCENTAGE_MONEY[i], ' percentage:')
    print('First quartile: ', q25, '\tSecond quartile: ', q50, '\tThird quartile: ', q75)

For  0.02  percentage:
First quartile:  111.08 	Second quartile:  126.19 	Third quartile:  138.51
For  0.05  percentage:
First quartile:  133.36 	Second quartile:  178.43 	Third quartile:  236.71
For  0.08  percentage:
First quartile:  149.73 	Second quartile:  242.09 	Third quartile:  378.23
For  0.12  percentage:
First quartile:  152.01 	Second quartile:  325.69 	Third quartile:  638.19
For  0.15  percentage:
First quartile:  149.65 	Second quartile:  374.99 	Third quartile:  874.77
For  0.2  percentage:
First quartile:  119.2 	Second quartile:  417.87 	Third quartile:  1272.15
