# Reading Model Predictions and Bet365 Odds

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import warnings
from rapidfuzz import process
from datetime import datetime

warnings.filterwarnings('ignore')
pd.options.mode.chained_assignment = None
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

csv_url = "https://docs.google.com/spreadsheets/d/1WfEG-1icUjj6k7TGePJQEXH-w0TLEIcN/export?format=csv"
uefa = pd.read_csv(csv_url, dtype={'FTHG': 'Int64', 'FTAG': 'Int64', 'HTHG': 'Int64', 'HTAG': 'Int64'})
uefa['FT'] = uefa['FTHG'].astype(str) + ' - ' + uefa['FTAG'].astype(str)
uefa['HT'] = '(' + uefa['HTHG'].astype(str) + '-' + uefa['HTAG'].astype(str) + ')'
uefa['FTTG'] = uefa['FTHG'] + uefa['FTAG']
uefa['HTTG'] = uefa['HTHG'] + uefa['HTAG']

predictions = pd.read_excel('C:/Users/99451/Desktop/MODEL/2025/dixon_coles_model_predictions/_predictions.xlsx')
bet365_odds = pd.read_excel('C:/Users/99451/Desktop/MODEL/2025/dixon_coles_model_predictions/final_odds.xlsx')

print(f"Games found: {len(predictions)} in predictions and {len(bet365_odds)} in odds dataset.")
bet365_odds.tail()

Games found: 4867 in predictions and 5092 in odds dataset.


Unnamed: 0,Home,Away,FT1,FTX,FT2,DC1X,DC12,DCX2,HT1,HTX,HT2,HT1X,HT12,HTX2,BTTS,OTTS,1.5O,1.5U,2.5O,2.5U,3.5O,3.5U,4.5O,4.5U,HT0.5O,HT0.5U,HT1.5O,HT1.5U
5087,Barracas Central,Godoy Cruz,2.55,3.2,2.9,1.4,1.36,1.5,3.5,1.83,4.0,1.22,1.83,1.29,2.25,1.57,1.57,2.25,2.88,1.4,6.0,1.13,15.0,1.03,1.62,2.2,4.0,1.22
5088,Villarreal,Espanyol,1.4,4.75,7.5,1.1,1.18,2.75,1.91,2.4,7.0,1.08,1.53,1.8,1.95,1.8,1.22,4.33,1.73,2.1,2.75,1.44,5.0,1.17,1.33,3.25,2.5,1.5
5089,Sporting CP,Estoril,1.18,7.5,12.0,1.05,1.1,4.5,1.57,2.88,10.0,1.03,1.4,2.25,2.05,1.7,1.13,6.0,1.44,2.7,2.1,1.67,3.5,1.29,1.22,4.0,2.1,1.67
5090,San Lorenzo,Racing Club,2.63,3.1,2.88,1.4,1.36,1.44,3.6,1.91,3.75,1.25,1.8,1.29,2.2,1.62,1.53,2.38,2.7,1.44,5.5,1.14,13.0,1.04,1.62,2.2,4.0,1.22
5091,U. Magdalena,Dep. Cali,2.55,3.0,3.0,1.36,1.36,1.5,3.4,1.91,3.75,1.25,1.8,1.3,2.1,1.67,1.5,2.5,2.6,1.48,5.0,1.17,13.0,1.04,1.57,2.25,3.75,1.25


# Merging 2 DataFrames for similarity of values

In [2]:
# Use predictions key values as the canonical list
home_keys = predictions['Home'].unique().tolist()
away_keys = predictions['Away'].unique().tolist()

def get_canonical(val, canonical_list, threshold=85):
    """
    For a given value from df2, find the best matching canonical value from df1
    using a fuzzy matching score. If no match meets the threshold, return the original value.
    """
    match = process.extractOne(val, canonical_list, score_cutoff=threshold)
    if match:
        return match[0]
    return val

# Replace values in bet365_odds Home and Away columns using the canonical mapping from predictions
bet365_odds['Home'] = bet365_odds['Home'].apply(lambda x: get_canonical(x, home_keys))
bet365_odds['Away'] = bet365_odds['Away'].apply(lambda x: get_canonical(x, away_keys))

# Merge 2 dataframes on Home and Away columns
merged_df = pd.merge(predictions, bet365_odds, on=['Home', 'Away'], how='inner')
merged_df = merged_df.drop_duplicates(subset=['League','Home', 'Away'], keep='last')
merged_df.tail()

Unnamed: 0,League,Home,Away,FT1_x,FTX_x,FT2_x,FTR,DC1X_x,DC12_x,DCX2_x,1.5O_x,2.5O_x,3.5U_x,4.5U_x,BTTS_x,HT1_x,HTX_x,HT2_x,HTR,HTDC1X,HTDC12,HTDCX2,HT0.5O_x,HT1.5U_x,H0.5O,A0.5O,H1.5O,A1.5O,H2.5U,A2.5U,FT1_y,FTX_y,FT2_y,DC1X_y,DC12_y,DCX2_y,HT1_y,HTX_y,HT2_y,HT1X,HT12,HTX2,BTTS_y,OTTS,1.5O_y,1.5U,2.5O_y,2.5U,3.5O,3.5U_y,4.5O,4.5U_y,HT0.5O_y,HT0.5U,HT1.5O,HT1.5U_y
4304,Spain2,Mirandes,Tenerife,68.48,22.71,8.8,1-0,91.19,77.28,31.51,66.95,39.7,80.46,91.93,32.78,43.13,43.66,13.12,0-0,86.79,56.25,56.78,63.21,74.63,83.23,38.6,53.29,8.65,73.43,98.64,1.9,3.0,4.75,1.18,1.36,1.91,2.75,1.91,5.5,1.14,1.8,1.4,2.38,1.53,1.53,2.38,2.7,1.44,5.5,1.14,13.0,1.04,1.62,2.2,4.0,1.22
4305,Turkey,Gaziantep,Eyupspor,28.88,32.85,38.26,1-1,61.73,67.14,71.11,69.51,40.67,79.72,91.5,49.0,16.06,42.34,41.5,0-0,58.4,57.56,83.84,66.34,71.01,65.35,71.36,28.63,35.55,90.83,86.84,2.35,3.4,2.9,1.4,1.3,1.57,3.0,2.2,3.5,1.29,1.62,1.36,1.67,2.1,1.25,3.75,1.9,1.95,3.25,1.33,6.0,1.13,1.4,2.75,2.75,1.4
4306,Turkey,Konyaspor,Trabzonspor,28.49,28.55,42.96,1-1,57.04,71.45,71.51,80.49,55.26,67.1,83.28,59.91,27.68,41.3,30.95,0-0,68.98,58.63,72.25,69.84,67.27,72.31,80.0,36.76,47.81,86.07,78.09,2.63,3.3,2.63,1.44,1.33,1.44,3.2,2.2,3.2,1.36,1.62,1.36,1.67,2.1,1.25,3.75,1.88,1.98,3.0,1.36,6.0,1.13,1.4,2.75,2.75,1.4
4307,Turkey,Basaksehir,Sivasspor,65.22,21.11,13.6,1-1,86.33,78.82,34.71,86.62,65.8,56.01,74.56,59.8,50.5,32.48,16.5,1-0,82.98,67.0,48.98,78.77,54.56,90.45,64.43,68.13,27.7,58.1,91.24,1.67,4.0,4.75,1.18,1.22,2.15,2.3,2.2,5.5,1.13,1.62,1.57,1.91,1.8,1.29,3.5,1.95,1.85,3.4,1.3,6.5,1.11,1.4,2.75,3.0,1.36
4308,Turkey,Goztepe,Samsunspor,41.2,30.33,28.47,1-1,71.53,69.67,58.8,76.09,48.97,72.89,87.28,55.27,30.71,45.62,23.63,0-0,76.33,54.34,69.25,63.62,73.95,76.62,69.25,42.64,32.98,82.05,88.4,1.95,3.4,3.8,1.25,1.3,1.8,2.75,2.05,4.5,1.17,1.73,1.4,1.91,1.8,1.36,3.0,2.15,1.67,4.0,1.22,9.0,1.07,1.44,2.63,3.25,1.33


# Scraping SoccerStats For Match Results

In [3]:
final =  pd.DataFrame()
liqa = ''
unique_leagues = predictions['League'].unique().tolist()

# Convert to lowercase and exclude UEFA competitions
uefa_list = ['unl', 'uel', 'ucl', 'ufcl']
list_2024 = ['norway_2024', 'sweden_2024', 'usa_2024']
unique_leagues = list(set([league.lower() for league in unique_leagues if league.lower() not in uefa_list]))

# Append list_2024 to unique_leagues
unique_leagues.extend(list_2024)

for i in unique_leagues:
    URL = "https://www.soccerstats.com/results.asp?league=" + i + "&pmtype=bydate"
    page = requests.get(URL)
    liqa = i
    soup = BeautifulSoup(page.content, "html.parser")
    results = soup.find(id="btable")
    sth = results.find_all("tr", class_="odd")
    sth


    date, league, home, away, ft, ht = [], [], [], [], [],[]
    for i in sth:
        date.append(i.find_all("td", align = 'right')[0].get_text(strip=True))
        league.append(liqa.capitalize())
        home.append(i.find_all("td", align = 'right')[1].get_text(strip=True))
        away.append(i.find("td", align = "left").get_text(strip = True))
        ft.append(i.find_all("td", align = 'center')[0].get_text(strip = True))
        try:
            ht.append(i.find_all("td", align = 'center')[2].get_text(strip = True))
        except IndexError as e:
            ht.append('NA')#print("Last output before error occurred:", i.find_all("td", align = 'center'))

    data = {'Date': date, 'League': league,'Home': home, 'Away': away, 'FT': ft, 'HT': ht}

# Create a DataFrame from the dictionary
    df = pd.DataFrame(data)

# Replace empty strings with NaN
    #next_df = df[(df['Date'] == formatted_date) & (df['HT'] == '')]
    df.replace('', pd.NA, inplace=True)

# Drop rows with NaN values
    df_cleaned = df.dropna()

#For Half-Time Results
    hthg, htag = [], []
    for i in df_cleaned['HT']:
        if i == 'NA':
            hthg.append('NA')
            htag.append('NA')
        elif i == '+' or i == '-':
            hthg.append('NA')
            htag.append('NA')
        else:
            try:
                hthg.append(int(i[1]))
                htag.append(int(i[3]))
            except IndexError as e:
                print("Last output before error occurred:", i)



#For Full-Time Results
    hg, ag, tg = [], [], []
    for i in df_cleaned['FT']:
        if len(i) < 5 or ':' in i:
            hg.append('NA')
            ag.append('NA')
            tg.append('NA')
        else:
            try:
                hghg = int(i.split(' - ')[0])
                hg.append(hghg)
                agag = int(i.split(' - ')[1])
                ag.append(agag)
                tg.append(hghg + agag)
            except:
                print(hghg + agag)

    
    df_cleaned['FTHG'], df_cleaned['FTAG'], df_cleaned['FTTG'] = hg, ag, tg
    df_cleaned['HTHG'], df_cleaned['HTAG'] = hthg, htag
    df_cleaned['HTTG'] = df_cleaned['HTHG'] + df_cleaned['HTAG']
    
    final = pd.concat([final, df_cleaned], ignore_index=True)
    
final = final[final['HT'] != 'NA']

# Get the date for today
today = datetime.now()

# Append the correct year based on if the date already occurred in 2025
def assign_year(date_str):
    if pd.isna(date_str):  # Handle NaN values
        return None
    
    # Check if the same day and month already occurred in 2025
    date_in_2025 = datetime.strptime(date_str + ' 2025', "%d %b %Y")

    # If this day and month already occurred in 2025, append 2025, otherwise 2024
    if date_in_2025 <= today:
        return f"{date_str} 2025"
    else:
        return f"{date_str} 2024"

# Example: Fix the 'date' column by removing the weekday and extra punctuation
final['Date'] = final['Date'].str.extract(r'(\d{1,2} \w{3})')  # Extract day and month part
final['Date'] = final['Date'].apply(assign_year)

# Convert to datetime format
final['Date'] = pd.to_datetime(final['Date'], format='%d %b %Y', errors='coerce')

# Filter rows before September 17th, 2024
final_filtered = final[final['Date'] >= pd.Timestamp('2024-09-17')]

# Remove "_YYYY" (4-digit year) at the end of usa, norway and sweden but keep other numbers
final_filtered['League'] = final_filtered['League'].str.replace(r'_\d{4}$', '', regex=True)

# Align columns of uefa to match final_filtered
uefa = uefa[final_filtered.columns]

# Concatenate
final_filtered = pd.concat([uefa, final_filtered], ignore_index=True)
combined = pd.concat([final_filtered.head(), final_filtered.tail()])
combined

Unnamed: 0,Date,League,Home,Away,FT,HT,FTHG,FTAG,FTTG,HTHG,HTAG,HTTG
0,9/17/2024,UCL,Juventus,PSV,3 - 1,(2-0),3.0,1.0,4.0,2.0,0.0,2.0
1,9/17/2024,UCL,Young Boys,Aston Villa,0 - 3,(0-2),0.0,3.0,3.0,0.0,2.0,2.0
2,9/17/2024,UCL,Bayern,Dinamo Zagreb,9 - 2,(3-0),9.0,2.0,11.0,3.0,0.0,3.0
3,9/17/2024,UCL,Milan,Liverpool,1 - 3,(1-2),1.0,3.0,4.0,1.0,2.0,3.0
4,9/17/2024,UCL,Real Madrid,Stuttgart,3 - 1,(0-0),3.0,1.0,4.0,0.0,0.0,0.0
7499,2024-10-20 00:00:00,Usa,Houston Dynamo,LA Galaxy,2 - 1,(1-0),2.0,1.0,3.0,1.0,0.0,1.0
7500,2024-10-20 00:00:00,Usa,Los Angeles FC,SJ Earthquakes,3 - 1,(0-1),3.0,1.0,4.0,0.0,1.0,1.0
7501,2024-10-20 00:00:00,Usa,Minnesota Utd,St. Louis City,4 - 1,(1-0),4.0,1.0,5.0,1.0,0.0,1.0
7502,2024-10-20 00:00:00,Usa,Real Salt Lake,Vancouver,2 - 1,(0-0),2.0,1.0,3.0,0.0,0.0,0.0
7503,2024-10-20 00:00:00,Usa,Seattle,Portland,1 - 1,(1-0),1.0,1.0,2.0,1.0,0.0,1.0


# Merging with Predictions + Odds Dataframes

In [4]:
# Merge on multiple columns
final_df = pd.merge(merged_df, final_filtered, on=['Home', 'Away'], how='inner')

# Drop duplicates based on the 'Home' and 'Away' columns
final_df_unique = final_df[~final_df.duplicated(subset=['League_x', 'Home', 'Away'], keep='last')]
final_df_unique = final_df_unique.dropna()

# Reset the index afterward
final_df_unique.reset_index(drop=True, inplace=True)

print('Number of games matched: ', len(final_df_unique))
final_df_unique.tail()

Number of games matched:  3936


Unnamed: 0,League_x,Home,Away,FT1_x,FTX_x,FT2_x,FTR,DC1X_x,DC12_x,DCX2_x,1.5O_x,2.5O_x,3.5U_x,4.5U_x,BTTS_x,HT1_x,HTX_x,HT2_x,HTR,HTDC1X,HTDC12,HTDCX2,HT0.5O_x,HT1.5U_x,H0.5O,A0.5O,H1.5O,A1.5O,H2.5U,A2.5U,FT1_y,FTX_y,FT2_y,DC1X_y,DC12_y,DCX2_y,HT1_y,HTX_y,HT2_y,HT1X,HT12,HTX2,BTTS_y,OTTS,1.5O_y,1.5U,2.5O_y,2.5U,3.5O,3.5U_y,4.5O,4.5U_y,HT0.5O_y,HT0.5U,HT1.5O,HT1.5U_y,Date,League_y,FT,HT,FTHG,FTAG,FTTG,HTHG,HTAG,HTTG
3931,Switzerland,Yverdon,Luzern,26.83,25.74,47.43,1-1,52.57,74.26,73.17,76.6,51.97,70.19,85.46,55.09,22.83,46.89,30.23,0-0,69.72,53.06,77.12,63.9,71.35,68.33,79.91,31.92,47.66,89.01,78.21,3.75,3.4,2.0,1.8,1.3,1.25,4.0,2.2,2.63,1.44,1.62,1.22,1.73,2.0,1.25,3.75,1.88,1.98,3.0,1.36,6.0,1.13,1.36,3.0,2.75,1.4,2025-03-02 00:00:00,Switzerland,2 - 2,(0-0),2.0,2.0,4.0,0.0,0.0,0.0
3932,Turkey,Rizespor,Alanyaspor,39.58,31.91,28.51,1-1,71.49,68.09,60.42,71.34,42.97,77.92,90.44,50.62,38.38,40.1,21.41,0-0,78.48,59.79,61.51,70.78,65.95,73.11,66.19,37.79,29.53,85.4,90.35,1.8,3.5,4.33,1.2,1.29,1.95,2.4,2.3,4.33,1.2,1.57,1.53,1.67,2.1,1.2,4.33,1.7,2.1,2.63,1.44,5.0,1.17,1.33,3.25,2.63,1.44,2025-03-02 00:00:00,Turkey,3 - 1,(0-0),3.0,1.0,4.0,0.0,0.0,0.0
3933,Turkey,Kasimpasa,Galatasaray,16.29,18.48,64.99,1-2,34.77,81.28,83.47,93.07,78.94,39.25,58.68,72.01,10.57,26.47,61.55,0-1,37.04,72.12,88.02,82.2,47.03,75.63,93.65,41.38,76.59,82.68,47.03,4.75,4.5,1.62,2.25,1.18,1.18,4.5,2.63,2.1,1.67,1.44,1.17,1.44,2.63,1.11,6.5,1.4,2.88,1.91,1.8,3.25,1.33,1.22,4.0,2.0,1.73,2025-03-02 00:00:00,Turkey,3 - 3,(0-1),3.0,3.0,6.0,0.0,1.0,1.0
3934,Turkey,Fenerbahce,Antalyaspor,86.62,9.31,3.12,3-0,95.93,89.74,12.43,91.54,77.32,40.53,60.01,47.46,65.67,24.22,8.2,1-0,89.89,73.87,32.42,82.59,45.29,95.96,48.33,85.22,14.38,32.5,96.03,1.22,7.0,9.5,1.07,1.1,4.0,1.67,2.75,9.5,1.07,1.4,2.1,2.1,1.67,1.17,5.0,1.53,2.4,2.25,1.57,4.0,1.22,1.25,3.75,2.2,1.6,2025-03-02 00:00:00,Turkey,3 - 0,(3-0),3.0,0.0,3.0,3.0,0.0,3.0
3935,Mexico2,Atlas,A. San Luis,44.77,22.59,32.63,1-1,67.36,77.4,55.22,78.67,56.8,65.6,82.18,58.42,45.88,44.07,9.46,0-0,89.95,55.34,53.53,73.07,47.55,80.17,73.89,48.08,38.83,77.87,84.71,2.0,3.5,3.7,1.29,1.29,1.8,2.63,2.2,4.0,1.22,1.62,1.44,1.8,1.95,1.29,3.5,2.0,1.85,3.4,1.3,7.0,1.1,1.4,2.75,3.0,1.36,2025-03-03 00:00:00,Mexico2,3 - 1,(0-1),3.0,1.0,4.0,0.0,1.0,1.0


# Creating Results Columns

In [5]:
import numpy as np

# Add the new columns based on the condition
final_df_unique['FT1'] = np.where(final_df_unique['FTHG'] > final_df_unique['FTAG'], 1, 0)
final_df_unique['FTX'] = np.where(final_df_unique['FTHG'] == final_df_unique['FTAG'], 1, 0)
final_df_unique['FT2'] = np.where(final_df_unique['FTHG'] < final_df_unique['FTAG'], 1, 0)

final_df_unique['FT1X'] = np.where(final_df_unique['FTHG'] >= final_df_unique['FTAG'], 1, 0)
final_df_unique['FT12'] = np.where(final_df_unique['FTX'] == 0, 1, 0)
final_df_unique['FTX2'] = np.where(final_df_unique['FTHG'] <= final_df_unique['FTAG'], 1, 0)

final_df_unique['1.5O'] = np.where(final_df_unique['FTTG'] > 1.5, 1, 0)
final_df_unique['1.5U_y'] = np.where(final_df_unique['FTTG'] < 1.5, 1, 0)
final_df_unique['2.5O'] = np.where(final_df_unique['FTTG'] > 2.5, 1, 0)
final_df_unique['2.5U_y'] = np.where(final_df_unique['FTTG'] < 2.5, 1, 0)
final_df_unique['3.5O_y'] = np.where(final_df_unique['FTTG'] > 3.5, 1, 0)
final_df_unique['3.5U'] = np.where(final_df_unique['FTTG'] < 3.5, 1, 0)
final_df_unique['4.5O_y'] = np.where(final_df_unique['FTTG'] > 4.5, 1, 0)
final_df_unique['4.5U'] = np.where(final_df_unique['FTTG'] < 4.5, 1, 0)

final_df_unique['BTTS'] = np.where((final_df_unique['FTHG'] != 0) & (final_df_unique['FTAG'] != 0), 1, 0)
final_df_unique['OTTS_y'] = np.where(final_df_unique['BTTS'] == 0, 1, 0)

final_df_unique['HT1'] = np.where(final_df_unique['HTHG'] > final_df_unique['HTAG'], 1, 0)
final_df_unique['HTX'] = np.where(final_df_unique['HTHG'] == final_df_unique['HTAG'], 1, 0)
final_df_unique['HT2'] = np.where(final_df_unique['HTHG'] < final_df_unique['HTAG'], 1, 0)

final_df_unique['HT1X_y'] = np.where(final_df_unique['HTHG'] >= final_df_unique['HTAG'], 1, 0)
final_df_unique['HT12_y'] = np.where(final_df_unique['HTX'] == 0, 1, 0)
final_df_unique['HTX2_y'] = np.where(final_df_unique['HTHG'] <= final_df_unique['HTAG'], 1, 0)

final_df_unique['HT0.5O'] = np.where(final_df_unique['HTTG'] > 0.5, 1, 0)
final_df_unique['HT0.5U_y'] = np.where(final_df_unique['HTTG'] < 0.5, 1, 0)
final_df_unique['HT1.5O_y'] = np.where(final_df_unique['HTTG'] > 1.5, 1, 0)
final_df_unique['HT1.5U'] = np.where(final_df_unique['HTTG'] < 1.5, 1, 0)

print('Games Found: ', len(final_df_unique))
final_df_unique.tail()

Games Found:  3936


Unnamed: 0,League_x,Home,Away,FT1_x,FTX_x,FT2_x,FTR,DC1X_x,DC12_x,DCX2_x,1.5O_x,2.5O_x,3.5U_x,4.5U_x,BTTS_x,HT1_x,HTX_x,HT2_x,HTR,HTDC1X,HTDC12,HTDCX2,HT0.5O_x,HT1.5U_x,H0.5O,A0.5O,H1.5O,A1.5O,H2.5U,A2.5U,FT1_y,FTX_y,FT2_y,DC1X_y,DC12_y,DCX2_y,HT1_y,HTX_y,HT2_y,HT1X,HT12,HTX2,BTTS_y,OTTS,1.5O_y,1.5U,2.5O_y,2.5U,3.5O,3.5U_y,4.5O,4.5U_y,HT0.5O_y,HT0.5U,HT1.5O,HT1.5U_y,Date,League_y,FT,HT,FTHG,FTAG,FTTG,HTHG,HTAG,HTTG,FT1,FTX,FT2,FT1X,FT12,FTX2,1.5O,1.5U_y,2.5O,2.5U_y,3.5O_y,3.5U,4.5O_y,4.5U,BTTS,OTTS_y,HT1,HTX,HT2,HT1X_y,HT12_y,HTX2_y,HT0.5O,HT0.5U_y,HT1.5O_y,HT1.5U
3931,Switzerland,Yverdon,Luzern,26.83,25.74,47.43,1-1,52.57,74.26,73.17,76.6,51.97,70.19,85.46,55.09,22.83,46.89,30.23,0-0,69.72,53.06,77.12,63.9,71.35,68.33,79.91,31.92,47.66,89.01,78.21,3.75,3.4,2.0,1.8,1.3,1.25,4.0,2.2,2.63,1.44,1.62,1.22,1.73,2.0,1.25,3.75,1.88,1.98,3.0,1.36,6.0,1.13,1.36,3.0,2.75,1.4,2025-03-02 00:00:00,Switzerland,2 - 2,(0-0),2.0,2.0,4.0,0.0,0.0,0.0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,1
3932,Turkey,Rizespor,Alanyaspor,39.58,31.91,28.51,1-1,71.49,68.09,60.42,71.34,42.97,77.92,90.44,50.62,38.38,40.1,21.41,0-0,78.48,59.79,61.51,70.78,65.95,73.11,66.19,37.79,29.53,85.4,90.35,1.8,3.5,4.33,1.2,1.29,1.95,2.4,2.3,4.33,1.2,1.57,1.53,1.67,2.1,1.2,4.33,1.7,2.1,2.63,1.44,5.0,1.17,1.33,3.25,2.63,1.44,2025-03-02 00:00:00,Turkey,3 - 1,(0-0),3.0,1.0,4.0,0.0,0.0,0.0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,1
3933,Turkey,Kasimpasa,Galatasaray,16.29,18.48,64.99,1-2,34.77,81.28,83.47,93.07,78.94,39.25,58.68,72.01,10.57,26.47,61.55,0-1,37.04,72.12,88.02,82.2,47.03,75.63,93.65,41.38,76.59,82.68,47.03,4.75,4.5,1.62,2.25,1.18,1.18,4.5,2.63,2.1,1.67,1.44,1.17,1.44,2.63,1.11,6.5,1.4,2.88,1.91,1.8,3.25,1.33,1.22,4.0,2.0,1.73,2025-03-02 00:00:00,Turkey,3 - 3,(0-1),3.0,3.0,6.0,0.0,1.0,1.0,0,1,0,1,0,1,1,0,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,0,0,1
3934,Turkey,Fenerbahce,Antalyaspor,86.62,9.31,3.12,3-0,95.93,89.74,12.43,91.54,77.32,40.53,60.01,47.46,65.67,24.22,8.2,1-0,89.89,73.87,32.42,82.59,45.29,95.96,48.33,85.22,14.38,32.5,96.03,1.22,7.0,9.5,1.07,1.1,4.0,1.67,2.75,9.5,1.07,1.4,2.1,2.1,1.67,1.17,5.0,1.53,2.4,2.25,1.57,4.0,1.22,1.25,3.75,2.2,1.6,2025-03-02 00:00:00,Turkey,3 - 0,(3-0),3.0,0.0,3.0,3.0,0.0,3.0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,1,0
3935,Mexico2,Atlas,A. San Luis,44.77,22.59,32.63,1-1,67.36,77.4,55.22,78.67,56.8,65.6,82.18,58.42,45.88,44.07,9.46,0-0,89.95,55.34,53.53,73.07,47.55,80.17,73.89,48.08,38.83,77.87,84.71,2.0,3.5,3.7,1.29,1.29,1.8,2.63,2.2,4.0,1.22,1.62,1.44,1.8,1.95,1.29,3.5,2.0,1.85,3.4,1.3,7.0,1.1,1.4,2.75,3.0,1.36,2025-03-03 00:00:00,Mexico2,3 - 1,(0-1),3.0,1.0,4.0,0.0,1.0,1.0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1


# Creating Profit Columns for Initial Model Predictions

In [6]:
ft1p, ftxp, ft2p, ft1xp, ft12p, ftx2p = [], [], [], [], [], []
over15, under15, over25, under25, over35, under35, over45, under45 = [], [], [], [], [], [], [], []
btts, otts, ht1p, htxp, ht2p, ht1xp, ht12p, htx2p = [], [], [], [], [], [], [], []
htover05, htunder05, htover15, htunder15 = [], [], [], []

#if prediction == result -> coefficient, elif prediction != result -> 0, else -> -1
for i in range(len(final_df_unique)):
    ft_list = [final_df_unique['FT1_x'].iloc[i], final_df_unique['FTX_x'].iloc[i], final_df_unique['FT2_x'].iloc[i]]
    if (ft_list.index(max(ft_list)) == 0) and (final_df_unique['FT1'].iloc[i] == 1):
        ft1p.append(final_df_unique['FT1_y'].iloc[i])
    elif (ft_list.index(max(ft_list)) == 0) and (final_df_unique['FT1'].iloc[i] != 1):
        ft1p.append(0)
    else:
        ft1p.append(-1)
    
    if (ft_list.index(max(ft_list)) == 1) and (final_df_unique['FTX'].iloc[i] == 1):
        ftxp.append(final_df_unique['FTX_y'].iloc[i])
    elif (ft_list.index(max(ft_list)) == 1) and (final_df_unique['FTX'].iloc[i] != 1):
        ftxp.append(0)
    else:
        ftxp.append(-1)
    
    if (ft_list.index(max(ft_list)) == 2) and (final_df_unique['FT2'].iloc[i] == 1):
        ft2p.append(final_df_unique['FT2_y'].iloc[i])
    elif (ft_list.index(max(ft_list)) == 2) and (final_df_unique['FT2'].iloc[i] != 1):
        ft2p.append(0)
    else:
        ft2p.append(-1)

    dc_list = [final_df_unique['DC1X_x'].iloc[i], final_df_unique['DC12_x'].iloc[i], final_df_unique['DCX2_x'].iloc[i]]
    if (dc_list.index(max(dc_list)) == 0) and (final_df_unique['FT1X'].iloc[i] == 1):
        ft1xp.append(final_df_unique['DC1X_y'].iloc[i])
    elif (dc_list.index(max(dc_list)) == 0) and (final_df_unique['FT1X'].iloc[i] != 1):
        ft1xp.append(0)
    else:
        ft1xp.append(-1)
    
    if (dc_list.index(max(dc_list)) == 1) and (final_df_unique['FT12'].iloc[i] == 1):
        ft12p.append(final_df_unique['DC12_y'].iloc[i])
    elif (dc_list.index(max(dc_list)) == 1) and (final_df_unique['FT12'].iloc[i] != 1):
        ft12p.append(0)
    else:
        ft12p.append(-1)
    
    if (dc_list.index(max(dc_list)) == 2) and (final_df_unique['FTX2'].iloc[i] == 1):
        ftx2p.append(final_df_unique['DCX2_y'].iloc[i])
    elif (dc_list.index(max(dc_list)) == 2) and (final_df_unique['FTX2'].iloc[i] != 1):
        ftx2p.append(0)
    else:
        ftx2p.append(-1)
    
    if (final_df_unique['1.5O_x'].iloc[i] >= 50) and (final_df_unique['1.5O'].iloc[i] == 1):
        over15.append(final_df_unique['1.5O_y'].iloc[i])
    elif (final_df_unique['1.5O_x'].iloc[i] >= 50) and (final_df_unique['1.5O'].iloc[i] != 1):
        over15.append(0)
    else:
        over15.append(-1)
    
    if (final_df_unique['1.5O_x'].iloc[i] < 50) and (final_df_unique['1.5O'].iloc[i] != 1):
        under15.append(final_df_unique['1.5U'].iloc[i])
    elif (final_df_unique['1.5O_x'].iloc[i] < 50) and (final_df_unique['1.5O'].iloc[i] == 1):
        under15.append(0)
    else:
        under15.append(-1)
    
    if (final_df_unique['2.5O_x'].iloc[i] >= 50) and (final_df_unique['2.5O'].iloc[i] == 1):
        over25.append(final_df_unique['2.5O_y'].iloc[i])
    elif (final_df_unique['2.5O_x'].iloc[i] >= 50) and (final_df_unique['2.5O'].iloc[i] != 1):
        over25.append(0)
    else:
        over25.append(-1)
    
    if (final_df_unique['2.5O_x'].iloc[i] < 50) and (final_df_unique['2.5O'].iloc[i] != 1):
        under25.append(final_df_unique['2.5U'].iloc[i])
    elif (final_df_unique['2.5O_x'].iloc[i] < 50) and (final_df_unique['2.5O'].iloc[i] == 1):
        under25.append(0)
    else:
        under25.append(-1)
    
    if (final_df_unique['3.5U_x'].iloc[i] <= 50) and (final_df_unique['3.5U'].iloc[i] != 1):
        over35.append(final_df_unique['3.5O'].iloc[i])
    elif (final_df_unique['3.5U_x'].iloc[i] <= 50) and (final_df_unique['3.5U'].iloc[i] == 1):
        over35.append(0)
    else:
        over35.append(-1)
    
    if (final_df_unique['3.5U_x'].iloc[i] > 50) and (final_df_unique['3.5U'].iloc[i] == 1):
        under35.append(final_df_unique['3.5U_y'].iloc[i])
    elif (final_df_unique['3.5U_x'].iloc[i] > 50) and (final_df_unique['3.5U'].iloc[i] != 1):
        under35.append(0)
    else:
        under35.append(-1)
    
    if (final_df_unique['4.5U_x'].iloc[i] <= 50) and (final_df_unique['4.5U'].iloc[i] != 1):
        over45.append(final_df_unique['4.5O'].iloc[i])
    elif (final_df_unique['4.5U_x'].iloc[i] <= 50) and (final_df_unique['4.5U'].iloc[i] == 1):
        over45.append(0)
    else:
        over45.append(-1)
    
    if (final_df_unique['4.5U_x'].iloc[i] > 50) and (final_df_unique['4.5U'].iloc[i] == 1):
        under45.append(final_df_unique['4.5U_y'].iloc[i])
    elif (final_df_unique['4.5U_x'].iloc[i] > 50) and (final_df_unique['4.5U'].iloc[i] != 1):
        under45.append(0)
    else:
        under45.append(-1)
    
    if (final_df_unique['BTTS_x'].iloc[i] >= 50) and (final_df_unique['BTTS'].iloc[i] == 1):
        btts.append(final_df_unique['BTTS_y'].iloc[i])
    elif (final_df_unique['BTTS_x'].iloc[i] >= 50) and (final_df_unique['BTTS'].iloc[i] != 1):
        btts.append(0)
    else:
        btts.append(-1)
    
    if (final_df_unique['BTTS_x'].iloc[i] < 50) and (final_df_unique['BTTS'].iloc[i] != 1):
        otts.append(final_df_unique['OTTS'].iloc[i])
    elif (final_df_unique['BTTS_x'].iloc[i] < 50) and (final_df_unique['BTTS'].iloc[i] == 1):
        otts.append(0)
    else:
        otts.append(-1)
    
    ht_list = [final_df_unique['HT1_x'].iloc[i], final_df_unique['HTX_x'].iloc[i], final_df_unique['HT2_x'].iloc[i]]
    if (ht_list.index(max(ht_list)) == 0) and (final_df_unique['HT1'].iloc[i] == 1):
        ht1p.append(final_df_unique['HT1_y'].iloc[i])
    elif (ht_list.index(max(ht_list)) == 0) and (final_df_unique['HT1'].iloc[i] != 1):
        ht1p.append(0)
    else:
        ht1p.append(-1)
    
    if (ht_list.index(max(ht_list)) == 1) and (final_df_unique['HTX'].iloc[i] == 1):
        htxp.append(final_df_unique['HTX_y'].iloc[i])
    elif (ht_list.index(max(ht_list)) == 1) and (final_df_unique['HTX'].iloc[i] != 1):
        htxp.append(0)
    else:
        htxp.append(-1)
    
    if (ht_list.index(max(ht_list)) == 2) and (final_df_unique['HT2'].iloc[i] == 1):
        ht2p.append(final_df_unique['HT2_y'].iloc[i])
    elif (ht_list.index(max(ht_list)) == 2) and (final_df_unique['HT2'].iloc[i] != 1):
        ht2p.append(0)
    else:
        ht2p.append(-1)
    
    htdc_list = [final_df_unique['HTDC1X'].iloc[i], final_df_unique['HTDC12'].iloc[i], final_df_unique['HTDCX2'].iloc[i]]
    if (htdc_list.index(max(htdc_list)) == 0) and (final_df_unique['HT1X_y'].iloc[i] == 1):
        ht1xp.append(final_df_unique['HT1X'].iloc[i])
    elif (htdc_list.index(max(htdc_list)) == 0) and (final_df_unique['HT1X_y'].iloc[i] != 1):
        ht1xp.append(0)
    else:
        ht1xp.append(-1)
    
    if (htdc_list.index(max(htdc_list)) == 1) and (final_df_unique['HT12_y'].iloc[i] == 1):
        ht12p.append(final_df_unique['HT12'].iloc[i])
    elif (htdc_list.index(max(htdc_list)) == 1) and (final_df_unique['HT12_y'].iloc[i] != 1):
        ht12p.append(0)
    else:
        ht12p.append(-1)
    
    if (htdc_list.index(max(htdc_list)) == 2) and (final_df_unique['HTX2_y'].iloc[i] == 1):
        htx2p.append(final_df_unique['HTX2'].iloc[i])
    elif (htdc_list.index(max(htdc_list)) == 2) and (final_df_unique['HTX2_y'].iloc[i] != 1):
        htx2p.append(0)
    else:
        htx2p.append(-1)
    
    if (final_df_unique['HT0.5O_x'].iloc[i] >= 50) and (final_df_unique['HT0.5O'].iloc[i] == 1):
        htover05.append(final_df_unique['HT0.5O_y'].iloc[i])
    elif (final_df_unique['HT0.5O_x'].iloc[i] >= 50) and (final_df_unique['HT0.5O'].iloc[i] != 1):
        htover05.append(0)
    else:
        htover05.append(-1)
    
    if (final_df_unique['HT0.5O_x'].iloc[i] < 50) and (final_df_unique['HT0.5O'].iloc[i] != 1):
        htunder05.append(final_df_unique['HT0.5U'].iloc[i])
    elif (final_df_unique['HT0.5O_x'].iloc[i] < 50) and (final_df_unique['HT0.5O'].iloc[i] == 1):
        htunder05.append(0)
    else:
        htunder05.append(-1)
    
    if (final_df_unique['HT1.5U_x'].iloc[i] < 50) and (final_df_unique['HT1.5U'].iloc[i] != 1):
        htover15.append(final_df_unique['HT1.5O'].iloc[i])
    elif (final_df_unique['HT1.5U_x'].iloc[i] < 50) and (final_df_unique['HT1.5U'].iloc[i] == 1):
        htover15.append(0)
    else:
        htover15.append(-1)
    
    if (final_df_unique['HT1.5U_x'].iloc[i] >= 50) and (final_df_unique['HT1.5U'].iloc[i] == 1):
        htunder15.append(final_df_unique['HT1.5U_y'].iloc[i])
    elif (final_df_unique['HT1.5U_x'].iloc[i] >= 50) and (final_df_unique['HT1.5U'].iloc[i] != 1):
        htunder15.append(0)
    else:
        htunder15.append(-1)

final_df_unique['FT1P'], final_df_unique['FTXP'], final_df_unique['FT2P'] = ft1p, ftxp, ft2p
final_df_unique['FT1XP'], final_df_unique['FT12P'], final_df_unique['FTX2P'] = ft1xp, ft12p, ftx2p
final_df_unique['1.5OP'], final_df_unique['1.5UP'], final_df_unique['2.5OP'], final_df_unique['2.5UP'] = over15, under15, over25, under25
final_df_unique['3.5OP'], final_df_unique['3.5UP'], final_df_unique['4.5OP'], final_df_unique['4.5UP'] = over35, under35, over45, under45
final_df_unique['BTTSP'], final_df_unique['OTTSP'] = btts, otts
final_df_unique['HT1P'], final_df_unique['HTXP'], final_df_unique['HT2P'] = ht1p, htxp, ht2p
final_df_unique['HT1XP'], final_df_unique['HT12P'], final_df_unique['HTX2P'] = ht1xp, ht12p, htx2p
final_df_unique['HT0.5OP'], final_df_unique['HT0.5UP'] = htover05, htunder05
final_df_unique['HT1.5OP'], final_df_unique['HT1.5UP'] = htover15, htunder15

print('Games Found: ', len(final_df_unique))
final_df_unique.tail()

Games Found:  3936


Unnamed: 0,League_x,Home,Away,FT1_x,FTX_x,FT2_x,FTR,DC1X_x,DC12_x,DCX2_x,1.5O_x,2.5O_x,3.5U_x,4.5U_x,BTTS_x,HT1_x,HTX_x,HT2_x,HTR,HTDC1X,HTDC12,HTDCX2,HT0.5O_x,HT1.5U_x,H0.5O,A0.5O,H1.5O,A1.5O,H2.5U,A2.5U,FT1_y,FTX_y,FT2_y,DC1X_y,DC12_y,DCX2_y,HT1_y,HTX_y,HT2_y,HT1X,HT12,HTX2,BTTS_y,OTTS,1.5O_y,1.5U,2.5O_y,2.5U,3.5O,3.5U_y,4.5O,4.5U_y,HT0.5O_y,HT0.5U,HT1.5O,HT1.5U_y,Date,League_y,FT,HT,FTHG,FTAG,FTTG,HTHG,HTAG,HTTG,FT1,FTX,FT2,FT1X,FT12,FTX2,1.5O,1.5U_y,2.5O,2.5U_y,3.5O_y,3.5U,4.5O_y,4.5U,BTTS,OTTS_y,HT1,HTX,HT2,HT1X_y,HT12_y,HTX2_y,HT0.5O,HT0.5U_y,HT1.5O_y,HT1.5U,FT1P,FTXP,FT2P,FT1XP,FT12P,FTX2P,1.5OP,1.5UP,2.5OP,2.5UP,3.5OP,3.5UP,4.5OP,4.5UP,BTTSP,OTTSP,HT1P,HTXP,HT2P,HT1XP,HT12P,HTX2P,HT0.5OP,HT0.5UP,HT1.5OP,HT1.5UP
3931,Switzerland,Yverdon,Luzern,26.83,25.74,47.43,1-1,52.57,74.26,73.17,76.6,51.97,70.19,85.46,55.09,22.83,46.89,30.23,0-0,69.72,53.06,77.12,63.9,71.35,68.33,79.91,31.92,47.66,89.01,78.21,3.75,3.4,2.0,1.8,1.3,1.25,4.0,2.2,2.63,1.44,1.62,1.22,1.73,2.0,1.25,3.75,1.88,1.98,3.0,1.36,6.0,1.13,1.36,3.0,2.75,1.4,2025-03-02 00:00:00,Switzerland,2 - 2,(0-0),2.0,2.0,4.0,0.0,0.0,0.0,0,1,0,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,1,-1.0,-1.0,0.0,-1.0,0.0,-1.0,1.25,-1.0,1.88,-1.0,-1.0,0.0,-1.0,1.13,1.73,-1.0,-1.0,2.2,-1.0,-1.0,-1.0,1.22,0.0,-1.0,-1.0,1.4
3932,Turkey,Rizespor,Alanyaspor,39.58,31.91,28.51,1-1,71.49,68.09,60.42,71.34,42.97,77.92,90.44,50.62,38.38,40.1,21.41,0-0,78.48,59.79,61.51,70.78,65.95,73.11,66.19,37.79,29.53,85.4,90.35,1.8,3.5,4.33,1.2,1.29,1.95,2.4,2.3,4.33,1.2,1.57,1.53,1.67,2.1,1.2,4.33,1.7,2.1,2.63,1.44,5.0,1.17,1.33,3.25,2.63,1.44,2025-03-02 00:00:00,Turkey,3 - 1,(0-0),3.0,1.0,4.0,0.0,0.0,0.0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,1,1.8,-1.0,-1.0,1.2,-1.0,-1.0,1.2,-1.0,-1.0,0.0,-1.0,0.0,-1.0,1.17,1.67,-1.0,-1.0,2.3,-1.0,1.2,-1.0,-1.0,0.0,-1.0,-1.0,1.44
3933,Turkey,Kasimpasa,Galatasaray,16.29,18.48,64.99,1-2,34.77,81.28,83.47,93.07,78.94,39.25,58.68,72.01,10.57,26.47,61.55,0-1,37.04,72.12,88.02,82.2,47.03,75.63,93.65,41.38,76.59,82.68,47.03,4.75,4.5,1.62,2.25,1.18,1.18,4.5,2.63,2.1,1.67,1.44,1.17,1.44,2.63,1.11,6.5,1.4,2.88,1.91,1.8,3.25,1.33,1.22,4.0,2.0,1.73,2025-03-02 00:00:00,Turkey,3 - 3,(0-1),3.0,3.0,6.0,0.0,1.0,1.0,0,1,0,1,0,1,1,0,1,0,1,0,1,0,1,0,0,0,1,0,1,1,1,0,0,1,-1.0,-1.0,0.0,-1.0,-1.0,1.18,1.11,-1.0,1.4,-1.0,1.91,-1.0,-1.0,0.0,1.44,-1.0,-1.0,-1.0,2.1,-1.0,-1.0,1.17,1.22,-1.0,0.0,-1.0
3934,Turkey,Fenerbahce,Antalyaspor,86.62,9.31,3.12,3-0,95.93,89.74,12.43,91.54,77.32,40.53,60.01,47.46,65.67,24.22,8.2,1-0,89.89,73.87,32.42,82.59,45.29,95.96,48.33,85.22,14.38,32.5,96.03,1.22,7.0,9.5,1.07,1.1,4.0,1.67,2.75,9.5,1.07,1.4,2.1,2.1,1.67,1.17,5.0,1.53,2.4,2.25,1.57,4.0,1.22,1.25,3.75,2.2,1.6,2025-03-02 00:00:00,Turkey,3 - 0,(3-0),3.0,0.0,3.0,3.0,0.0,3.0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,0,1,1,0,0,1,1,0,1,0,1,0,1.22,-1.0,-1.0,1.07,-1.0,-1.0,1.17,-1.0,1.53,-1.0,0.0,-1.0,-1.0,1.22,-1.0,1.67,1.67,-1.0,-1.0,1.07,-1.0,-1.0,1.25,-1.0,2.2,-1.0
3935,Mexico2,Atlas,A. San Luis,44.77,22.59,32.63,1-1,67.36,77.4,55.22,78.67,56.8,65.6,82.18,58.42,45.88,44.07,9.46,0-0,89.95,55.34,53.53,73.07,47.55,80.17,73.89,48.08,38.83,77.87,84.71,2.0,3.5,3.7,1.29,1.29,1.8,2.63,2.2,4.0,1.22,1.62,1.44,1.8,1.95,1.29,3.5,2.0,1.85,3.4,1.3,7.0,1.1,1.4,2.75,3.0,1.36,2025-03-03 00:00:00,Mexico2,3 - 1,(0-1),3.0,1.0,4.0,0.0,1.0,1.0,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,0,0,1,0,1,1,1,0,0,1,2.0,-1.0,-1.0,-1.0,1.29,-1.0,1.29,-1.0,2.0,-1.0,-1.0,0.0,-1.0,1.1,1.8,-1.0,0.0,-1.0,-1.0,0.0,-1.0,-1.0,1.4,-1.0,0.0,-1.0


# Checking For ROI of Profit Columns

In [7]:
# Select columns that end with 'P'
columns = [col for col in final_df_unique.columns if col.endswith('P')]

# Initialize lists to store results and games
results, games_list = [],  []

for col in columns:
    my_df = final_df_unique[final_df_unique[col] >= 0]
    numerator = np.sum(my_df[col]) - len(my_df)
    result = round(numerator / len(my_df) * 100, 2)
    
    # Append results and games to respective lists
    results.append(result)
    games_list.append(len(my_df))

# Convert results to a DataFrame with an additional column for Games
results_df = pd.DataFrame({
    'Column': columns,
    'ROI': results,
    'Games': games_list
})
results_df

Unnamed: 0,Column,ROI,Games
0,FT1P,-5.18,2231
1,FTXP,-5.27,269
2,FT2P,-10.91,1436
3,FT1XP,-3.18,1734
4,FT12P,-5.82,1153
5,FTX2P,-6.15,1049
6,1.5OP,-5.8,3625
7,1.5UP,-0.06,311
8,2.5OP,-5.8,1851
9,2.5UP,-5.18,2085


# ROI of Profit Columns According To Leagues

In [8]:
# Step 1: Filter leagues with at least 10 games
league_counts = final_df_unique['League_x'].value_counts()
leagues_with_10_games = league_counts[league_counts >= 10].index
filtered_df = final_df_unique[final_df_unique['League_x'].isin(leagues_with_10_games)]

# Group by 'League' and calculate results for each group
grouped_results = {}
for league, group in filtered_df.groupby('League_x'):
    group_results = {}
    for col in columns:
        my_df = group[group[col] >= 0]
        numerator = np.sum(my_df[col]) - len(my_df)
        group_results[col] = round(numerator / len(my_df) * 100, 2)
    # Add the number of games for this league
    group_results['Games'] = round(len(group),2)
    grouped_results[league] = group_results

# Convert grouped results to a DataFrame for better visualization
grouped_results_df = pd.DataFrame(grouped_results).T

# Define a function to apply conditional formatting
def highlight_positive(val):
    # Highlight background to red if the value is positive
    color = 'background-color: red' if isinstance(val, (int, float)) and val > 0 else ''
    return color

# Apply the function to the DataFrame
styled_df = (
    grouped_results_df.style
    .applymap(highlight_positive)
    .format("{:.2f}")  # Format only numeric columns, excluding 'Games'
)

# Save the styled DataFrame to Excel
styled_df.to_excel("ROI_leagues.xlsx", index=True)

# Display the styled DataFrame
styled_df

Unnamed: 0,FT1P,FTXP,FT2P,FT1XP,FT12P,FTX2P,1.5OP,1.5UP,2.5OP,2.5UP,3.5OP,3.5UP,4.5OP,4.5UP,BTTSP,OTTSP,HT1P,HTXP,HT2P,HT1XP,HT12P,HTX2P,HT0.5OP,HT0.5UP,HT1.5OP,HT1.5UP,Games
Argentina,-33.25,50.0,,1.69,-100.0,,-2.73,-25.0,-100.0,2.15,,-1.07,,-3.14,-100.0,23.77,-100.0,-13.27,,-0.08,,46.5,7.89,39.6,,-2.36,14.0
Australia,-29.33,,-15.1,18.2,-3.33,30.6,-8.11,,5.0,76.2,-41.29,-25.5,,-0.63,-21.62,-35.0,-0.33,13.33,45.43,7.67,,-6.1,-20.0,,24.6,-10.79,19.0
Austria,-36.55,-22.5,-25.47,-5.92,9.11,7.0,1.51,300.0,26.1,5.21,16.5,6.85,-100.0,-2.16,-17.14,-24.71,-50.09,-5.63,-23.57,0.07,,-7.96,2.1,62.78,16.64,5.92,50.0
Belgium,8.02,5.38,-40.11,8.58,-36.12,-7.46,-8.61,-35.0,-14.29,4.21,-31.42,6.75,-100.0,0.44,-22.58,-5.81,-6.23,-20.6,-26.88,3.96,-100.0,-11.04,-6.2,23.29,-77.37,-0.83,105.0
Brazil,-24.6,-100.0,-19.82,-3.09,5.46,-10.22,-0.64,175.0,-5.52,-12.92,,-4.05,,-0.03,9.0,-22.83,-2.62,2.58,,-10.99,,4.86,0.29,41.29,-100.0,-10.0,106.0
Denmark,14.2,75.0,-22.68,12.44,-29.56,6.94,-1.14,,-3.9,4.08,5.33,-18.8,-100.0,-8.0,25.63,20.21,43.41,22.5,-38.0,3.13,-24.25,19.71,-9.59,0.0,49.5,16.21,44.0
England,-3.94,-26.67,-16.41,-9.13,17.71,-2.57,-0.8,-100.0,-8.63,-15.32,-60.33,-9.95,-100.0,0.5,-2.04,-13.63,-9.21,-8.97,38.76,-7.27,34.0,-18.69,6.69,-2.78,4.39,-0.9,119.0
England2,-8.11,13.46,-21.71,2.6,-9.62,2.41,-4.83,19.04,-11.63,7.32,-46.55,0.13,,1.09,-5.49,-6.2,7.68,-10.23,-67.5,-1.35,,-14.72,-4.29,12.48,39.87,-4.83,231.0
England3,5.02,49.85,-7.63,-6.82,-4.22,-7.68,-4.69,4.64,0.52,0.09,-27.0,0.21,-46.43,-2.08,5.56,2.55,-26.86,-12.51,6.72,-4.83,15.0,-11.44,-6.36,-12.03,2.78,-5.01,231.0
England4,-6.77,61.67,-15.99,-3.72,-3.84,8.44,-12.4,-28.18,-11.92,-3.42,9.58,2.37,116.67,0.21,-21.0,-14.7,-11.63,-1.26,-51.58,-8.59,-16.0,-6.92,-14.01,-28.91,-21.85,1.47,208.0


# Creating Optimum Threshold for Each Prediction Column

In [9]:
# Assuming `df` is your DataFrame and it contains the columns for percentages and correctness
def calculate_threshold(percentages, predictions):
    # Ensure inputs are pandas Series
    percentages = pd.Series(percentages)
    predictions = pd.Series(predictions)
    
    thresholds = percentages.unique()
    best_threshold = 0
    best_j_stat = -np.inf  # Start with negative infinity for comparison
    
    for threshold in thresholds:
        # Predict 1s based on the threshold
        predicted_1s = (percentages >= threshold).astype(int)
        
        # Calculate true positives, true negatives, false positives, false negatives
        true_positives = ((predicted_1s == 1) & (predictions == 1)).sum()
        true_negatives = ((predicted_1s == 0) & (predictions == 0)).sum()
        false_positives = ((predicted_1s == 1) & (predictions == 0)).sum()
        false_negatives = ((predicted_1s == 0) & (predictions == 1)).sum()
        
        # Calculate Sensitivity (Recall) and Specificity
        sensitivity = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
        specificity = true_negatives / (true_negatives + false_positives) if (true_negatives + false_positives) > 0 else 0
        
        # Calculate Youden's J Statistic
        j_stat = sensitivity + specificity - 1
        
        # Update best threshold if J statistic improves
        if j_stat > best_j_stat:
            best_j_stat = j_stat
            best_threshold = threshold
    
    return best_threshold, round(best_j_stat, 2)

# Select only numeric columns
numeric_columns = final_df_unique.select_dtypes(include=[np.number])

# Remove rows where any numeric value is greater than 100
final_df_unique = final_df_unique[(numeric_columns <= 100).all(axis=1)]

#Selecting dataframes with model predictions
ft1df = final_df_unique[final_df_unique['FT1_x'] >= final_df_unique[['FTX_x', 'FT2_x']].max(axis=1)]
ftxdf = final_df_unique[final_df_unique['FTX_x'] >= final_df_unique[['FT1_x', 'FT2_x']].max(axis=1)]
ft2df = final_df_unique[final_df_unique['FT2_x'] >= final_df_unique[['FTX_x', 'FT1_x']].max(axis=1)]
dc1xdf = final_df_unique[final_df_unique['DC1X_x'] >= final_df_unique[['DC12_x', 'DCX2_x']].max(axis=1)]
dc12df = final_df_unique[final_df_unique['DC12_x'] >= final_df_unique[['DC1X_x', 'DCX2_x']].max(axis=1)]
dcx2df = final_df_unique[final_df_unique['DCX2_x'] >= final_df_unique[['DC1X_x', 'DC12_x']].max(axis=1)]
over15df, under15df = final_df_unique[final_df_unique['1.5O_x'] >= 50], final_df_unique[final_df_unique['1.5O_x'] < 50]
over25df, under25df = final_df_unique[final_df_unique['2.5O_x'] >= 50], final_df_unique[final_df_unique['2.5O_x'] < 50]
over35df, under35df = final_df_unique[final_df_unique['3.5U_x'] < 50], final_df_unique[final_df_unique['3.5U_x'] >= 50]
over45df, under45df = final_df_unique[final_df_unique['4.5U_x'] < 50], final_df_unique[final_df_unique['4.5U_x'] >= 50]
bttsdf, ottsdf = final_df_unique[final_df_unique['BTTS_x'] >= 50], final_df_unique[final_df_unique['BTTS_x'] < 50]
ht1df = final_df_unique[final_df_unique['HT1_x'] >= final_df_unique[['HTX_x', 'HT2_x']].max(axis=1)]
htxdf = final_df_unique[final_df_unique['HTX_x'] >= final_df_unique[['HT1_x', 'HT2_x']].max(axis=1)]
ht2df = final_df_unique[final_df_unique['HT2_x'] >= final_df_unique[['HT1_x', 'HTX_x']].max(axis=1)]
ht1xdf = final_df_unique[final_df_unique['HTDC1X'] >= final_df_unique[['HTDC12', 'HTDCX2']].max(axis=1)]
ht12df = final_df_unique[final_df_unique['HTDC12'] >= final_df_unique[['HTDC1X', 'HTDCX2']].max(axis=1)]
htx2df = final_df_unique[final_df_unique['HTDCX2'] >= final_df_unique[['HTDC1X', 'HTDC12']].max(axis=1)]
htover05df, htunder05df = final_df_unique[final_df_unique['HT0.5O_x'] >= 50], final_df_unique[final_df_unique['HT0.5O_x'] < 50]
htover15df, htunder15df = final_df_unique[final_df_unique['HT1.5U_x'] < 50], final_df_unique[final_df_unique['HT1.5U_x'] >= 50]

ft1t, ft1a = calculate_threshold(ft1df['FT1_x'], ft1df['FT1'])
ftxt, ftxa = calculate_threshold(ftxdf['FTX_x'], ftxdf['FTX'])
ft2t, ft2a = calculate_threshold(ft2df['FT2_x'], ft2df['FT2'])
ft1xt, ft1xa = calculate_threshold(dc1xdf['DC1X_x'], dc1xdf['FT1X'])
ft12t, ft12a = calculate_threshold(dc12df['DC12_x'], dc12df['FT12'])
ftx2t, ftx2a = calculate_threshold(dcx2df['DCX2_x'], dcx2df['FTX2'])
over15t, over15a = calculate_threshold(over15df['1.5O_x'], over15df['1.5O'])
under15t, under15a = calculate_threshold(under15df['1.5O_x'], under15df['1.5U'])
over25t, over25a = calculate_threshold(over25df['2.5O_x'], over25df['2.5O'])
under25t, under25a = calculate_threshold(under25df['2.5O_x'], under25df['2.5U'])
over35t, over35a = calculate_threshold(over35df['3.5U_x'], over35df['3.5O'])
under35t, under35a = calculate_threshold(under35df['3.5U_x'], under35df['3.5U'])
over45t, over45a = calculate_threshold(over45df['4.5U_x'], over45df['4.5O'])
under45t, under45a = calculate_threshold(under45df['4.5U_x'], under45df['4.5U'])
bttst, bttsa = calculate_threshold(bttsdf['BTTS_x'], bttsdf['BTTS'])
ottst, ottsa = calculate_threshold(ottsdf['BTTS_x'], ottsdf['OTTS'])
ht1t, ht1a = calculate_threshold(ht1df['HT1_x'], ht1df['HT1'])
htxt, htxa = calculate_threshold(htxdf['HTX_x'], htxdf['HTX'])
ht2t, ht2a = calculate_threshold(ht2df['HT2_x'], ht2df['HT2'])
ht1xt, ht1xa = calculate_threshold(ht1xdf['HTDC1X'], ht1xdf['HT1X'])
ht12t, ht12a = calculate_threshold(ht12df['HTDC12'], ht12df['HT12'])
htx2t, htx2a = calculate_threshold(htx2df['HTDCX2'], htx2df['HTX2'])
htover05t, htover05a = calculate_threshold(htover05df['HT0.5O_x'], htover05df['HT0.5O'])
htunder05t, htunder05a = calculate_threshold(htunder05df['HT0.5O_x'], htunder05df['HT0.5U'])
htover15t, htover15a = calculate_threshold(htover15df['HT1.5U_x'], htover15df['HT1.5O'])
htunder15t, htunder15a = calculate_threshold(htunder15df['HT1.5U_x'], htunder15df['HT1.5U'])

new_ft1df, new_ftxdf, new_ft2df = ft1df[ft1df['FT1_x'] >= ft1t],ftxdf[ftxdf['FTX_x'] >= ftxt],ft2df[ft2df['FT2_x'] >= ft2t]
new_ft1xdf, new_ft12df, new_ftx2df = dc1xdf[dc1xdf['DC1X_x'] >= ft1xt],dc12df[dc12df['DC12_x'] >= ft12t],dcx2df[dcx2df['DCX2_x'] >= ftx2t] 
new_over15, new_under15 = over15df[over15df['1.5O_x'] >= over15t], under15df[under15df['1.5O_x'] <= under15t]
new_over25, new_under25 = over25df[over25df['2.5O_x'] >= over25t], under25df[under25df['2.5O_x'] <= under25t]
new_over35, new_under35 = over35df[over35df['3.5U_x'] <= over35t], under35df[under35df['3.5U_x'] >= under35t]
new_over45, new_under45 = over45df[over45df['4.5U_x'] <= over45t], under45df[under45df['4.5U_x'] >= under45t]
new_btts, new_otts = bttsdf[bttsdf['BTTS_x'] >= bttst], ottsdf[ottsdf['BTTS_x'] <= ottst]
new_ht1df, new_htxdf, new_ht2df = ht1df[ht1df['HT1_x'] >= ht1t],htxdf[htxdf['HTX_x'] >= htxt],ht2df[ht2df['HT2_x'] >= ht2t]
new_ht1xdf, new_ht12df, new_htx2df = ht1xdf[ht1xdf['HTDC1X'] >= ht1xt],ht12df[ht12df['HTDC12'] >= ht12t],htx2df[htx2df['HTDCX2'] >= htx2t]
new_htover05, new_htunder05 = htover05df[htover05df['HT0.5O_x'] >= htover05t], htunder05df[htunder05df['HT0.5O_x'] <= htunder05t]
new_htover15, new_htunder15 = htover15df[htover15df['HT1.5U_x'] <= htover15t], htunder15df[htunder15df['HT1.5U_x'] >= htunder15t]

# Store the results in a list
results = [
('FT1', ft1t, ft1a, len(new_ft1df), round(len(new_ft1df)/len(ft1df)*100,2), np.sum(new_ft1df['FT1P']) - len(new_ft1df)),
('FTX', ftxt, ftxa, len(new_ftxdf), round(len(new_ftxdf)/len(ftxdf)*100,2), np.sum(new_ftxdf['FTXP']) - len(new_ftxdf)),
('FT2', ft2t, ft2a, len(new_ft2df), round(len(new_ft2df)/len(ft2df)*100,2), np.sum(new_ft2df['FT2P']) - len(new_ft2df)),
('FT1X', ft1xt, ft1xa, len(new_ft1xdf), round(len(new_ft1xdf)/len(dc1xdf)*100,2), np.sum(new_ft1xdf['FT1XP']) - len(new_ft1xdf)),
('FT12', ft12t, ft12a, len(new_ft12df), round(len(new_ft12df)/len(dc12df)*100,2), np.sum(new_ft12df['FT12P']) - len(new_ft12df)),
('FTX2', ftx2t, ftx2a, len(new_ftx2df), round(len(new_ftx2df)/len(dcx2df)*100,2), np.sum(new_ftx2df['FTX2P']) - len(new_ftx2df)),
('1.5O', over15t, over15a, len(new_over15), round(len(new_over15)/len(over15df)*100,2), np.sum(new_over15['1.5OP'])-len(new_over15)),
('1.5U', under15t, under15a, len(new_under15), round(len(new_under15)/len(under15df)*100,2), np.sum(new_under15['1.5UP'])-len(new_under15)),
('2.5O', over25t, over25a, len(new_over25), round(len(new_over25)/len(over25df)*100,2), np.sum(new_over25['2.5OP'])-len(new_over25)),
('2.5U', under25t, under25a, len(new_under25), round(len(new_under25)/len(under25df)*100,2), np.sum(new_under25['2.5UP'])-len(new_under25)),
('3.5O', over35t, over35a, len(new_over35), round(len(new_over35)/len(over35df)*100,2), np.sum(new_over35['3.5OP'])-len(new_over35)),
('3.5U', under35t, under35a, len(new_under35), round(len(new_under35)/len(under35df)*100,2), np.sum(new_under35['3.5UP'])-len(new_under35)),
('4.5O', over45t, over45a, len(new_over45), round(len(new_over45)/len(over45df)*100,2), np.sum(new_over45['4.5OP'])-len(new_over45)),
('4.5U', under45t, under45a, len(new_under45), round(len(new_under45)/len(under45df)*100,2), np.sum(new_under45['4.5UP'])-len(new_under45)),
('BTTS', bttst, bttsa, len(new_btts), round(len(new_btts)/len(bttsdf)*100,2), np.sum(new_btts['BTTSP'])-len(new_btts)),
('OTTS', ottst, ottsa, len(new_otts), round(len(new_otts)/len(ottsdf)*100,2), np.sum(new_otts['OTTSP'])-len(new_otts)),
('HT1', ht1t, ht1a, len(new_ht1df), round(len(new_ht1df)/len(ht1df)*100,2), np.sum(new_ht1df['HT1P']) - len(new_ht1df)),
('HTX', htxt, htxa, len(new_htxdf), round(len(new_htxdf)/len(htxdf)*100,2), np.sum(new_htxdf['HTXP']) - len(new_htxdf)),
('HT2', ht2t, ht2a, len(new_ht2df), round(len(new_ht2df)/len(ht2df)*100,2), np.sum(new_ht2df['HT2P']) - len(new_ht2df)),
('HT1X', ht1xt, ht1xa, len(new_ht1xdf), round(len(new_ht1xdf)/len(ht1xdf)*100,2), np.sum(new_ht1xdf['HT1XP']) - len(new_ht1xdf)),
('HT12', ht12t, ht12a, len(new_ht12df), round(len(new_ht12df)/len(ht12df)*100,2), np.sum(new_ht12df['HT12P']) - len(new_ht12df)),
('HTX2', htx2t, htx2a, len(new_htx2df), round(len(new_htx2df)/len(htx2df)*100,2), np.sum(new_htx2df['HTX2P']) - len(new_htx2df)),
('HT0.5O', htover05t, htover05a, len(new_htover05), round(len(new_htover05)/len(htover05df)*100,2), np.sum(new_htover05['HT0.5OP'])-len(new_htover05)),
('HT0.5U', htunder05t, htunder05a, len(new_htunder05), round(len(new_htunder05)/len(htunder05df)*100,2), np.sum(new_htunder05['HT0.5UP'])-len(new_htunder05)),
('HT1.5O', htover15t, htover15a, len(new_htover15), round(len(new_htover15)/len(htover15df)*100,2), np.sum(new_htover15['HT1.5OP'])-len(new_htover15)),
('HT1.5U', htunder15t, htunder15a, len(new_htunder15), round(len(new_htunder15)/len(htunder15df)*100,2), np.sum(new_htunder15['HT1.5UP'])-len(new_htunder15))
]

# Create a DataFrame from the results
results_df = pd.DataFrame(results, columns=['Prediction', 'Threshold', 'J-Stat', 'Games', 'Games%', 'Profit'])
results_df['ROI'] = round(results_df['Profit'] / results_df['Games'] * 100, 2)
print('Number of matches: ', len(final_df_unique))
results_df

Number of matches:  3894


Unnamed: 0,Prediction,Threshold,J-Stat,Games,Games%,Profit,ROI
0,FT1,59.23,0.18,876,39.6,13.5,1.54
1,FTX,51.02,0.05,42,16.6,6.95,16.55
2,FT2,48.26,0.1,766,53.53,-61.54,-8.03
3,FT1X,78.47,0.14,1233,72.32,-25.01,-2.03
4,FT12,75.59,0.07,647,56.16,-22.11,-3.42
5,FTX2,80.19,0.11,546,52.65,-29.18,-5.34
6,1.5O,76.26,0.07,1697,47.32,-95.38,-5.62
7,1.5U,30.21,-1.0,45,14.61,1.81,4.02
8,2.5O,64.8,0.06,766,41.77,-37.06,-4.84
9,2.5U,7.67,-1.0,33,1.6,-2.22,-6.73


# Testing Best / Most Profitable Model Predictions

In [10]:
# Select columns from predictions table
predictions = ['FT1_x', 'FTX_x', 'FT2_x', 'DC1X_x', 'DC12_x', 'DCX2_x', 
               '1.5O_x', '2.5O_x', '3.5U_x', '4.5U_x', 'BTTS_x', 
               'HT1_x', 'HTX_x', 'HT2_x', 'HTDC1X', 'HTDC12', 'HTDCX2', 
               'HT0.5O_x', 'HT1.5U_x']

# Select columns from betting odds table
results = ['FT1', 'FTX', 'FT2', 'FT1X', 'FT12', 'FTX2', 
           '1.5O', '2.5O', '3.5U', '4.5U', 'BTTS',
            'HT1', 'HTX', 'HT2', 'HT1X', 'HT12', 'HTX2', 
            'HT0.5O', 'df2_HT1.5U']

# Select columns ending with 'P' (profit columns)
profits = ['FT1P', 'FTXP', 'FT2P', 'FT1XP', 'FT12P', 'FTX2P', 
           '1.5OP', '2.5OP', '3.5UP', '4.5UP', 'BTTSP',
            'HT1P', 'HTXP', 'HT2P', 'HT1XP', 'HT12P', 'HTX2P', 
            'HT0.5OP', 'HT1.5UP']

bet, percentage, profit = [], [], []

for i in range(len(final_df_unique)):
    my_list = []
    for j in predictions:
        my_list.append(final_df_unique[j].iloc[i])
    percentage.append(max(my_list))
    max_index = my_list.index(max(my_list))
    bet.append(results[max_index])
    profit_column = profits[max_index]
    profit.append(final_df_unique[profit_column].iloc[i])

# Create a DataFrame
model_recs = pd.DataFrame({
    'League': final_df_unique['League_x'],
    'Home': final_df_unique['Home'],
    'Away': final_df_unique['Away'],
    'BET': bet,
    'Percentage': percentage,
    'Profit': profit
})


print('Matches found: ', len(final_df_unique))
print(f"Correct Predictions: {len(model_recs[model_recs['Profit'] > 0])/len(model_recs)*100}")
print(f"Profit: {round(sum(model_recs['Profit']) - len(model_recs),2)} ROI: {round((sum(model_recs['Profit']) - len(model_recs)) / len(model_recs) * 100, 2)}%")
model_recs.tail()

Matches found:  3894
Correct Predictions: 83.82126348228043
Profit: -152.93 ROI: -3.93%


Unnamed: 0,League,Home,Away,BET,Percentage,Profit
3931,Switzerland,Yverdon,Luzern,4.5U,85.46,1.13
3932,Turkey,Rizespor,Alanyaspor,4.5U,90.44,1.17
3933,Turkey,Kasimpasa,Galatasaray,1.5O,93.07,1.11
3934,Turkey,Fenerbahce,Antalyaspor,FT1X,95.93,1.07
3935,Mexico2,Atlas,A. San Luis,HT1X,89.95,0.0


In [11]:
final_df_unique['OTTS_x'] = 100 - final_df_unique['BTTS_x']
final_df_unique['1.5U_x'] = 100 - final_df_unique['1.5O_x']
final_df_unique['2.5U_x'] = 100 - final_df_unique['2.5O_x']
final_df_unique['3.5O_x'] = 100 - final_df_unique['3.5U_x']
final_df_unique['4.5O_x'] = 100 - final_df_unique['4.5U_x']
final_df_unique['HT0.5U_x'] = 100 - final_df_unique['HT0.5O_x']
final_df_unique['HT1.5O_x'] = 100 - final_df_unique['HT1.5U_x'] 

# Select columns starting with 'df1_'
predictions = ['FT1_x', 'FTX_x', 'FT2_x', 'DC1X_x', 'DC12_x', 'DCX2_x', 
               '1.5O_x', '1.5U_x', '2.5O_x','2.5U_x','3.5O_x', '3.5U_x', 
               '4.5O_x', '4.5U_x', 'BTTS_x', 'OTTS_x',
               'HT1_x', 'HTX_x', 'HT2_x', 'HTDC1X', 'HTDC12', 'HTDCX2', 
               'HT0.5O_x', 'HT0.5U_x', 'HT1.5O_x', 'HT1.5U_x']

# Select columns starting with 'df2_'
odds = ['FT1_y', 'FTX_y', 'FT2_y', 'DC1X_y', 'DC12_y', 'DCX2_y', 
           '1.5O_y', '1.5U', '2.5O_y','2.5U', '3.5O','3.5U_y', 
           '4.5O', '4.5U_y', 'BTTS_y', 'OTTS',
            'HT1_y', 'HTX_y', 'HT2_y', 'HT1X', 'HT12', 'HTX2', 
            'HT0.5O_y', 'HT0.5U', 'HT1.5O', 'HT1.5U_y']

# Select columns ending with 'P'
profit = ['FT1P', 'FTXP', 'FT2P', 'FT1XP', 'FT12P', 'FTX2P', 
           '1.5OP', '1.5UP', '2.5OP', '2.5UP', '3.5OP', '3.5UP', '4.5OP', '4.5UP', 
           'BTTSP', 'OTTSP', 'HT1P', 'HTXP', 'HT2P', 'HT1XP', 'HT12P', 'HTX2P', 
            'HT0.5OP', 'HT0.5UP', 'HT1.5OP','HT1.5UP']

bets, percentages, profits, difference = [], [], [], [] 
for i in range(len(final_df_unique)):
    my_list = []
    valid_indices = []  # To keep track of indices where profit is not negative
    for j in range(len(predictions)):
        pred_column = predictions[j]
        odds_column = odds[j]
        profit_column = profit[j]  # Corresponding profit column
        
        # Calculate the value
        my_value = (100 / final_df_unique[pred_column].iloc[i]) - final_df_unique[odds_column].iloc[i]
        
        # Only add to the list if the corresponding profit is non-negative
        if final_df_unique[profit_column].iloc[i] >= 0:
            my_list.append(my_value)
            valid_indices.append(j)
        else:
            my_list.append(float('-inf'))  # Set to negative infinity to ignore in max()
    
    if valid_indices:  # Ensure there is at least one valid index
        max_index = my_list.index(max(my_list))
        rec_bet = profit[max_index]
        percent_bet = predictions[max_index]
        bets.append(rec_bet)
        percentages.append(final_df_unique[percent_bet].iloc[i])
        rec_profit = profit[max_index]
        profits.append(final_df_unique[rec_profit].iloc[i])
        difference.append(round(max(my_list), 2))
    else:
        # Handle case where no valid profits are found for this match
        bets.append(None)
        percentages.append(None)
        profits.append(None)
        difference.append(None)

# Create a DataFrame
model_recs = pd.DataFrame({
    'League': final_df_unique['League_x'],
    'Home': final_df_unique['Home'],
    'Away': final_df_unique['Away'],
    'BET': bets,
    'Percentage': percentages,
    'Profit': profits,
    'Difference': difference
}).dropna()  # Drop rows with None values

print('Matches found: ', len(final_df_unique))
print(f"Correct Predictions: {len(model_recs[model_recs['Profit'] > 0])/len(model_recs)*100}")
print(f"Profit: {round(sum(model_recs['Profit']) - len(model_recs), 2)} ROI: {round((sum(model_recs['Profit']) - len(model_recs)) / len(model_recs) * 100, 2)}%")
model_recs.tail()

Matches found:  3894
Correct Predictions: 67.7452491011813
Profit: -149.59 ROI: -3.84%


Unnamed: 0,League,Home,Away,BET,Percentage,Profit,Difference
3931,Switzerland,Yverdon,Luzern,HT0.5OP,63.9,0.0,0.2
3932,Turkey,Rizespor,Alanyaspor,FT1P,39.58,1.8,0.73
3933,Turkey,Kasimpasa,Galatasaray,4.5UP,58.68,0.0,0.37
3934,Turkey,Fenerbahce,Antalyaspor,4.5UP,60.01,1.22,0.45
3935,Mexico2,Atlas,A. San Luis,FT1P,44.77,2.0,0.23


## Checking the Betting Strategy in Article 

probability(model) / probability(bookies) > r (between 1 and 1.5)

In [47]:
# Step 1: Compute r values and store in new columns
for col1, col2 in zip(predictions, odds):
    final_df_unique[f'{col1}_r'] = (final_df_unique[col1] / 100) / (1 / final_df_unique[col2])

fulltime = final_df_unique[(final_df_unique['FT1_x_r'] > 1.45) & (final_df_unique['FT1P'] >= 0)]
print(sum(fulltime['FT1P']) - len(fulltime))
fulltime.tail()

7.329999999999984


Unnamed: 0,League_x,Home,Away,FT1_x,FTX_x,FT2_x,FTR,DC1X_x,DC12_x,DCX2_x,1.5O_x,2.5O_x,3.5U_x,4.5U_x,BTTS_x,HT1_x,HTX_x,HT2_x,HTR,HTDC1X,HTDC12,HTDCX2,HT0.5O_x,HT1.5U_x,H0.5O,A0.5O,H1.5O,A1.5O,H2.5U,A2.5U,FT1_y,FTX_y,FT2_y,DC1X_y,DC12_y,DCX2_y,HT1_y,HTX_y,HT2_y,HT1X,HT12,HTX2,BTTS_y,OTTS,1.5O_y,1.5U,2.5O_y,2.5U,3.5O,3.5U_y,4.5O,4.5U_y,HT0.5O_y,HT0.5U,HT1.5O,HT1.5U_y,Date,League_y,FT,HT,FTHG,FTAG,FTTG,HTHG,HTAG,HTTG,FT1,FTX,FT2,FT1X,FT12,FTX2,1.5O,1.5U_y,2.5O,2.5U_y,3.5O_y,3.5U,4.5O_y,4.5U,BTTS,OTTS_y,HT1,HTX,HT2,HT1X_y,HT12_y,HTX2_y,HT0.5O,HT0.5U_y,HT1.5O_y,HT1.5U,FT1P,FTXP,FT2P,FT1XP,FT12P,FTX2P,1.5OP,1.5UP,2.5OP,2.5UP,3.5OP,3.5UP,4.5OP,4.5UP,BTTSP,OTTSP,HT1P,HTXP,HT2P,HT1XP,HT12P,HTX2P,HT0.5OP,HT0.5UP,HT1.5OP,HT1.5UP,OTTS_x,1.5U_x,2.5U_x,3.5O_x,4.5O_x,HT0.5U_x,HT1.5O_x,FT1_x/x,FT1_x_r,FTX_x_r,FT2_x_r,DC1X_x_r,DC12_x_r,DCX2_x_r,1.5O_x_r,1.5U_x_r,2.5O_x_r,2.5U_x_r,3.5O_x_r,3.5U_x_r,4.5O_x_r,4.5U_x_r,BTTS_x_r,OTTS_x_r,HT1_x_r,HTX_x_r,HT2_x_r,HTDC1X_r,HTDC12_r,HTDCX2_r,HT0.5O_x_r,HT0.5U_x_r,HT1.5O_x_r,HT1.5U_x_r
3489,UFCL,Heidenheim,Copenhagen,68.77,20.43,10.77,2-0,89.2,79.54,31.2,75.59,50.62,71.4,86.28,42.91,34.16,57.99,7.84,0-0,92.15,42.0,65.83,46.77,85.42,86.83,48.87,60.16,14.58,66.87,96.9,2.5,3.6,2.63,1.44,1.29,1.5,3.0,2.2,3.4,1.27,1.58,1.33,1.67,2.1,1.29,3.5,1.93,1.93,3.25,1.33,6.5,1.11,1.4,2.75,3.0,1.36,02/20/2025,UFCL,1 - 2,(0-1),1.0,2.0,3.0,0.0,1.0,1.0,0,0,1,0,1,1,1,0,1,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0.0,-1.0,-1.0,0.0,-1.0,-1.0,1.29,-1.0,1.93,-1.0,-1.0,1.33,-1.0,1.11,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,-1.0,-1.0,0.0,-1.0,1.36,57.09,24.41,49.38,28.6,13.72,53.23,14.58,34.81,1.71925,0.73548,0.283251,1.28448,1.026066,0.468,0.975111,0.85435,0.976966,0.953034,0.9295,0.94962,0.8918,0.957708,0.716597,1.19889,1.0248,1.27578,0.26656,1.170305,0.6636,0.875539,0.65478,1.463825,0.4374,1.161712
3494,UFCL,Shamrock,Molde,88.86,5.92,2.61,4-0,94.78,91.47,8.53,93.14,84.12,27.83,45.69,51.07,69.31,7.12,0.73,3-0,76.43,70.04,7.85,73.63,13.01,95.81,51.79,89.26,17.19,21.72,93.33,5.0,3.8,1.7,2.1,1.25,1.17,5.0,2.2,2.38,1.53,1.62,1.14,1.8,1.91,1.29,3.5,1.93,1.93,3.25,1.33,6.5,1.11,1.4,2.75,2.75,1.4,02/20/2025,UFCL,0 - 1,(0-1),0.0,1.0,1.0,0.0,1.0,1.0,0,0,1,0,1,1,0,1,0,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0.0,-1.0,-1.0,0.0,-1.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,-1.0,0.0,-1.0,-1.0,1.4,-1.0,0.0,-1.0,48.93,6.86,15.88,72.17,54.31,26.37,86.99,74.75,4.443,0.22496,0.04437,1.99038,1.143375,0.099801,1.201506,0.2401,1.623516,0.306484,2.345525,0.370139,3.53015,0.507159,0.91926,0.934563,3.4655,0.15664,0.017374,1.169379,1.134648,0.08949,1.03082,0.725175,2.392225,0.18214
3646,England2,Watford,Luton Town,63.54,22.04,14.4,2-0,85.58,77.94,36.44,78.41,54.24,68.05,83.96,50.18,28.72,44.04,27.2,0-0,72.76,55.92,71.24,65.52,72.57,86.46,57.23,59.42,20.91,67.62,94.5,2.38,3.4,3.0,1.36,1.3,1.53,3.1,2.05,3.75,1.25,1.73,1.36,1.83,1.83,1.36,3.2,2.1,1.73,4.0,1.25,8.0,1.08,1.44,2.63,3.25,1.33,2025-02-23 00:00:00,England2,2 - 0,(2-0),2.0,0.0,2.0,2.0,0.0,2.0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,1,1,0,0,1,1,0,1,0,1,0,2.38,-1.0,-1.0,1.36,-1.0,-1.0,1.36,-1.0,0.0,-1.0,-1.0,1.25,-1.0,1.08,0.0,-1.0,-1.0,0.0,-1.0,1.25,-1.0,-1.0,1.44,-1.0,-1.0,0.0,49.82,21.59,45.76,31.95,16.04,34.48,27.43,34.46,1.512252,0.74936,0.432,1.163888,1.01322,0.557532,1.066376,0.69088,1.13904,0.791648,1.278,0.850625,1.2832,0.906768,0.918294,0.911706,0.89032,0.90282,1.02,0.9095,0.967416,0.968864,0.943488,0.906824,0.891475,0.965181
3778,Belgium,Gent,Club Brugge,37.2,29.76,33.03,1-1,66.96,70.23,62.79,81.97,56.58,65.82,82.34,62.08,26.6,43.92,29.44,0-0,70.52,56.04,73.36,64.42,75.05,78.18,76.03,44.97,41.8,80.31,82.65,4.1,4.0,1.73,2.05,1.22,1.22,4.5,2.3,2.38,1.53,1.57,1.17,1.7,2.05,1.22,4.0,1.7,2.1,2.75,1.4,5.5,1.14,1.33,3.25,2.63,1.44,2025-03-01 00:00:00,Belgium,1 - 1,(0-1),1.0,1.0,2.0,0.0,1.0,1.0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,0.0,-1.0,-1.0,-1.0,0.0,-1.0,1.22,-1.0,0.0,-1.0,-1.0,1.4,-1.0,1.14,1.7,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,1.17,1.33,-1.0,-1.0,1.44,37.92,18.03,43.42,34.18,17.66,35.58,24.95,21.05,1.5252,1.1904,0.571419,1.37268,0.856806,0.766038,1.000034,0.7212,0.96186,0.91182,0.93995,0.92148,0.9713,0.938676,1.05536,0.77736,1.197,1.01016,0.700672,1.078956,0.879828,0.858312,0.856786,1.15635,0.656185,1.08072
3871,Argentina,I. Rivadavia,Lanus,59.53,32.74,7.72,1-0,92.27,67.25,40.46,57.09,27.4,88.85,96.23,28.17,71.01,23.99,3.56,1-0,95.0,74.57,27.55,77.35,56.08,75.57,33.49,41.14,6.36,83.12,99.16,2.88,3.0,2.7,1.44,1.36,1.4,3.75,1.91,3.6,1.29,1.8,1.25,2.1,1.67,1.53,2.38,2.7,1.44,5.5,1.14,13.0,1.04,1.62,2.2,4.0,1.22,2025-03-02 00:00:00,Argentina,1 - 1,(0-0),1.0,1.0,2.0,0.0,0.0,0.0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0.0,-1.0,-1.0,1.44,-1.0,-1.0,1.53,-1.0,-1.0,1.44,-1.0,1.14,-1.0,1.04,-1.0,0.0,0.0,-1.0,-1.0,1.29,-1.0,-1.0,0.0,-1.0,-1.0,1.22,71.83,42.91,72.6,11.15,3.77,22.65,43.92,16.31,1.714464,0.9822,0.20844,1.328688,0.9146,0.56644,0.873477,1.021258,0.7398,1.04544,0.61325,1.01289,0.4901,1.000792,0.59157,1.199561,2.662875,0.458209,0.12816,1.2255,1.34226,0.344375,1.25307,0.4983,1.7568,0.684176


# Combining Different Bets

In [12]:
# Define the list of base columns and multiplier columns
base_columns = ['FT1_x', 'FTX_x', 'FT2_x']  
multiplier_columns = ['1.5O_x', '2.5O_x', 'df1_2.5U', '3.5U_x', '4.5U_x']

# Nested loop: Iterate over each base column and multiplier column
for base_column in base_columns:
    for multiplier in multiplier_columns:
        # Create a new column name dynamically
        new_column = f"{base_column}/{multiplier.split('_')[1]}"
        
        # Perform the calculation and assign it to the new column
        final_df_unique[new_column] = round(final_df_unique[base_column] * final_df_unique[multiplier] / 100, 2)

final_df_unique.tail()

KeyError: 'df1_2.5U'