# Reading Model Predictions and Bet365 Odds

In [25]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import warnings
from rapidfuzz import process
from datetime import datetime

warnings.filterwarnings('ignore')
pd.options.mode.chained_assignment = None
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

csv_url = "https://docs.google.com/spreadsheets/d/1WfEG-1icUjj6k7TGePJQEXH-w0TLEIcN/export?format=csv"
uefa = pd.read_csv(csv_url, dtype={'FTHG': 'Int64', 'FTAG': 'Int64', 'HTHG': 'Int64', 'HTAG': 'Int64'})
uefa['FT'] = uefa['FTHG'].astype(str) + ' - ' + uefa['FTAG'].astype(str)
uefa['HT'] = '(' + uefa['HTHG'].astype(str) + '-' + uefa['HTAG'].astype(str) + ')'
uefa['FTTG'] = uefa['FTHG'] + uefa['FTAG']
uefa['HTTG'] = uefa['HTHG'] + uefa['HTAG']

predictions = pd.read_excel('C:/Users/99451/Desktop/MODEL/2025/dixon_coles_model_predictions/_predictions.xlsx')
bet365_odds = pd.read_excel('C:/Users/99451/Desktop/MODEL/2025/dixon_coles_model_predictions/final_odds.xlsx')

print(f"Games found: {len(predictions)} in predictions and {len(bet365_odds)} in odds dataset.")
bet365_odds.tail()

Games found: 4767 in predictions and 4990 in odds dataset.


Unnamed: 0,Home,Away,FT1,FTX,FT2,DC1X,DC12,DCX2,HT1,HTX,HT2,HT1X,HT12,HTX2,BTTS,OTTS,1.5O,1.5U,2.5O,2.5U,3.5O,3.5U,4.5O,4.5U,HT0.5O,HT0.5U,HT1.5O,HT1.5U
4985,Atl. Madrid,Ath Bilbao,2.0,3.5,3.7,1.29,1.29,1.8,2.75,2.05,4.33,1.2,1.73,1.4,1.95,1.8,1.4,3.0,2.2,1.67,4.0,1.25,8.0,1.08,1.44,2.63,3.25,1.33
4986,Almeria,Malaga,1.44,4.75,6.0,1.13,1.18,2.63,1.95,2.4,6.5,1.1,1.53,1.8,1.83,1.83,1.22,4.0,1.7,2.1,2.75,1.4,5.0,1.17,1.33,3.25,2.5,1.5
4987,PSG,Lille,1.4,4.75,7.5,1.1,1.18,2.75,1.83,2.63,6.0,1.11,1.44,1.83,1.67,2.1,1.14,5.5,1.5,2.63,2.2,1.67,4.0,1.25,1.25,3.75,2.1,1.67
4988,Vitoria Guimaraes,Casa Pia,1.75,3.6,4.75,1.18,1.29,2.0,2.4,2.1,5.5,1.13,1.67,1.53,2.05,1.7,1.36,3.0,2.15,1.67,4.0,1.22,9.0,1.07,1.44,2.63,3.4,1.3
4989,River Plate,Estudiantes L.P.,1.67,3.6,5.75,1.14,1.29,2.15,2.3,2.1,5.5,1.13,1.67,1.57,2.0,1.75,1.33,3.25,2.08,1.73,3.75,1.25,8.0,1.08,1.44,2.63,3.25,1.33


# Merging 2 DataFrames for similarity of values

In [26]:
# Use predictions key values as the canonical list
home_keys = predictions['Home'].unique().tolist()
away_keys = predictions['Away'].unique().tolist()

def get_canonical(val, canonical_list, threshold=85):
    """
    For a given value from df2, find the best matching canonical value from df1
    using a fuzzy matching score. If no match meets the threshold, return the original value.
    """
    match = process.extractOne(val, canonical_list, score_cutoff=threshold)
    if match:
        return match[0]
    return val

# Replace values in bet365_odds Home and Away columns using the canonical mapping from predictions
bet365_odds['Home'] = bet365_odds['Home'].apply(lambda x: get_canonical(x, home_keys))
bet365_odds['Away'] = bet365_odds['Away'].apply(lambda x: get_canonical(x, away_keys))

# Merge 2 dataframes on Home and Away columns
merged_df = pd.merge(predictions, bet365_odds, on=['Home', 'Away'], how='inner')
merged_df = merged_df.drop_duplicates(subset=['League','Home', 'Away'], keep='last')
merged_df.tail()

Unnamed: 0,League,Home,Away,FT1_x,FTX_x,FT2_x,FTR,DC1X_x,DC12_x,DCX2_x,1.5O_x,2.5O_x,3.5U_x,4.5U_x,BTTS_x,HT1_x,HTX_x,HT2_x,HTR,HTDC1X,HTDC12,HTDCX2,HT0.5O_x,HT1.5U_x,H0.5O,A0.5O,H1.5O,A1.5O,H2.5U,A2.5U,FT1_y,FTX_y,FT2_y,DC1X_y,DC12_y,DCX2_y,HT1_y,HTX_y,HT2_y,HT1X,HT12,HTX2,BTTS_y,OTTS,1.5O_y,1.5U,2.5O_y,2.5U,3.5O,3.5U_y,4.5O,4.5U_y,HT0.5O_y,HT0.5U,HT1.5O,HT1.5U_y
4206,Switzerland,FC Basel,Sion,73.17,15.86,10.79,2-0,89.03,83.96,26.65,87.68,69.81,51.22,70.37,57.34,48.63,35.03,15.82,0-0,83.66,64.45,50.85,77.89,52.49,92.84,61.36,74.24,24.67,50.33,92.64,1.65,4.2,4.5,1.18,1.22,2.2,2.25,2.38,5.0,1.17,1.53,1.57,1.73,2.0,1.22,4.0,1.7,2.1,2.63,1.44,5.0,1.17,1.33,3.25,2.5,1.5
4207,Switzerland,Winterthur,St. Gallen,18.77,23.11,58.11,1-1,41.88,76.88,81.22,78.27,54.4,67.91,83.86,53.26,16.45,47.14,36.36,0-0,63.59,52.81,83.5,62.24,73.2,62.32,84.74,25.55,56.07,92.39,70.88,3.8,3.8,1.85,1.91,1.25,1.25,4.0,2.4,2.4,1.53,1.53,1.22,1.53,2.38,1.17,5.0,1.57,2.35,2.38,1.53,4.33,1.2,1.29,3.5,2.38,1.53
4209,Switzerland,Lausanne Sport,Servette,52.28,22.99,24.71,1-1,75.27,76.99,47.7,83.23,61.92,60.33,78.12,61.85,33.03,43.02,23.85,0-0,76.05,56.88,66.87,69.61,64.62,85.41,71.92,57.35,36.26,69.66,86.37,2.25,3.4,3.1,1.36,1.3,1.62,2.88,2.25,3.5,1.29,1.57,1.4,1.62,2.2,1.22,4.0,1.75,2.05,2.75,1.4,5.5,1.14,1.36,3.0,2.63,1.44
4210,Turkey,Adana Demirspor,Bodrumspor,30.09,37.42,32.5,0-0,67.51,62.59,69.92,56.19,27.09,89.04,96.32,37.64,23.38,56.72,19.89,0-0,80.1,43.27,76.61,48.88,85.79,58.55,60.34,22.05,23.67,94.03,93.3,4.5,4.0,1.7,2.1,1.22,1.18,4.75,2.3,2.3,1.57,1.57,1.17,1.73,2.0,1.22,4.0,1.73,2.08,2.75,1.4,5.5,1.14,1.33,3.25,2.63,1.44
4211,Turkey,Besiktas,Kayserispor,72.93,19.35,7.66,2-0,92.28,80.59,27.01,80.64,56.52,65.82,82.34,44.67,62.27,27.4,9.12,1-0,89.67,71.39,36.52,80.2,50.73,89.84,48.18,66.7,14.12,59.78,97.02,1.48,4.5,5.5,1.14,1.18,2.63,2.0,2.5,5.5,1.13,1.5,1.73,1.73,2.0,1.17,5.0,1.57,2.35,2.38,1.53,4.0,1.22,1.29,3.5,2.38,1.53


# Scraping SoccerStats For Match Results

In [27]:
final =  pd.DataFrame()
liqa = ''
unique_leagues = predictions['League'].unique().tolist()

# Convert to lowercase and exclude UEFA competitions
uefa_list = ['unl', 'uel', 'ucl', 'ufcl']
list_2024 = ['norway_2024', 'sweden_2024', 'usa_2024']
unique_leagues = list(set([league.lower() for league in unique_leagues if league.lower() not in uefa_list]))

# Append list_2024 to unique_leagues
unique_leagues.extend(list_2024)

for i in unique_leagues:
    URL = "https://www.soccerstats.com/results.asp?league=" + i + "&pmtype=bydate"
    page = requests.get(URL)
    liqa = i
    soup = BeautifulSoup(page.content, "html.parser")
    results = soup.find(id="btable")
    sth = results.find_all("tr", class_="odd")
    sth


    date, league, home, away, ft, ht = [], [], [], [], [],[]
    for i in sth:
        date.append(i.find_all("td", align = 'right')[0].get_text(strip=True))
        league.append(liqa.capitalize())
        home.append(i.find_all("td", align = 'right')[1].get_text(strip=True))
        away.append(i.find("td", align = "left").get_text(strip = True))
        ft.append(i.find_all("td", align = 'center')[0].get_text(strip = True))
        try:
            ht.append(i.find_all("td", align = 'center')[2].get_text(strip = True))
        except IndexError as e:
            ht.append('NA')#print("Last output before error occurred:", i.find_all("td", align = 'center'))

    data = {'Date': date, 'League': league,'Home': home, 'Away': away, 'FT': ft, 'HT': ht}

# Create a DataFrame from the dictionary
    df = pd.DataFrame(data)

# Replace empty strings with NaN
    #next_df = df[(df['Date'] == formatted_date) & (df['HT'] == '')]
    df.replace('', pd.NA, inplace=True)

# Drop rows with NaN values
    df_cleaned = df.dropna()

#For Half-Time Results
    hthg, htag = [], []
    for i in df_cleaned['HT']:
        if i == 'NA':
            hthg.append('NA')
            htag.append('NA')
        elif i == '+' or i == '-':
            hthg.append('NA')
            htag.append('NA')
        else:
            try:
                hthg.append(int(i[1]))
                htag.append(int(i[3]))
            except IndexError as e:
                print("Last output before error occurred:", i)



#For Full-Time Results
    hg, ag, tg = [], [], []
    for i in df_cleaned['FT']:
        if len(i) < 5 or ':' in i:
            hg.append('NA')
            ag.append('NA')
            tg.append('NA')
        else:
            try:
                hghg = int(i.split(' - ')[0])
                hg.append(hghg)
                agag = int(i.split(' - ')[1])
                ag.append(agag)
                tg.append(hghg + agag)
            except:
                print(hghg + agag)

    
    df_cleaned['FTHG'], df_cleaned['FTAG'], df_cleaned['FTTG'] = hg, ag, tg
    df_cleaned['HTHG'], df_cleaned['HTAG'] = hthg, htag
    df_cleaned['HTTG'] = df_cleaned['HTHG'] + df_cleaned['HTAG']
    
    final = pd.concat([final, df_cleaned], ignore_index=True)
    
final = final[final['HT'] != 'NA']

# Get the date for today
today = datetime.now()

# Append the correct year based on if the date already occurred in 2025
def assign_year(date_str):
    if pd.isna(date_str):  # Handle NaN values
        return None
    
    # Check if the same day and month already occurred in 2025
    date_in_2025 = datetime.strptime(date_str + ' 2025', "%d %b %Y")

    # If this day and month already occurred in 2025, append 2025, otherwise 2024
    if date_in_2025 <= today:
        return f"{date_str} 2025"
    else:
        return f"{date_str} 2024"

# Example: Fix the 'date' column by removing the weekday and extra punctuation
final['Date'] = final['Date'].str.extract(r'(\d{1,2} \w{3})')  # Extract day and month part
final['Date'] = final['Date'].apply(assign_year)

# Convert to datetime format
final['Date'] = pd.to_datetime(final['Date'], format='%d %b %Y', errors='coerce')

# Filter rows before September 17th, 2024
final_filtered = final[final['Date'] >= pd.Timestamp('2024-09-17')]

# Remove "_YYYY" (4-digit year) at the end of usa, norway and sweden but keep other numbers
final_filtered['League'] = final_filtered['League'].str.replace(r'_\d{4}$', '', regex=True)

# Align columns of uefa to match final_filtered
uefa = uefa[final_filtered.columns]

# Concatenate
final_filtered = pd.concat([uefa, final_filtered], ignore_index=True)
combined = pd.concat([final_filtered.head(), final_filtered.tail()])
combined

Unnamed: 0,Date,League,Home,Away,FT,HT,FTHG,FTAG,FTTG,HTHG,HTAG,HTTG
0,9/17/2024,UCL,Juventus,PSV,3 - 1,(2-0),3.0,1.0,4.0,2.0,0.0,2.0
1,9/17/2024,UCL,Young Boys,Aston Villa,0 - 3,(0-2),0.0,3.0,3.0,0.0,2.0,2.0
2,9/17/2024,UCL,Bayern,Dinamo Zagreb,9 - 2,(3-0),9.0,2.0,11.0,3.0,0.0,3.0
3,9/17/2024,UCL,Milan,Liverpool,1 - 3,(1-2),1.0,3.0,4.0,1.0,2.0,3.0
4,9/17/2024,UCL,Real Madrid,Stuttgart,3 - 1,(0-0),3.0,1.0,4.0,0.0,0.0,0.0
7249,2024-10-20 00:00:00,Usa,Houston Dynamo,LA Galaxy,2 - 1,(1-0),2.0,1.0,3.0,1.0,0.0,1.0
7250,2024-10-20 00:00:00,Usa,Los Angeles FC,SJ Earthquakes,3 - 1,(0-1),3.0,1.0,4.0,0.0,1.0,1.0
7251,2024-10-20 00:00:00,Usa,Minnesota Utd,St. Louis City,4 - 1,(1-0),4.0,1.0,5.0,1.0,0.0,1.0
7252,2024-10-20 00:00:00,Usa,Real Salt Lake,Vancouver,2 - 1,(0-0),2.0,1.0,3.0,0.0,0.0,0.0
7253,2024-10-20 00:00:00,Usa,Seattle,Portland,1 - 1,(1-0),1.0,1.0,2.0,1.0,0.0,1.0


# Merging with Predictions + Odds Dataframes

In [28]:
# Merge on multiple columns
final_df = pd.merge(merged_df, final_filtered, on=['Home', 'Away'], how='inner')

# Drop duplicates based on the 'Home' and 'Away' columns
final_df_unique = final_df[~final_df.duplicated(subset=['League_x', 'Home', 'Away'], keep='last')]
final_df_unique = final_df_unique.dropna()

# Reset the index afterward
final_df_unique.reset_index(drop=True, inplace=True)

print('Number of games matched: ', len(final_df_unique))
final_df_unique.tail()

Number of games matched:  3784


Unnamed: 0,League_x,Home,Away,FT1_x,FTX_x,FT2_x,FTR,DC1X_x,DC12_x,DCX2_x,1.5O_x,2.5O_x,3.5U_x,4.5U_x,BTTS_x,HT1_x,HTX_x,HT2_x,HTR,HTDC1X,HTDC12,HTDCX2,HT0.5O_x,HT1.5U_x,H0.5O,A0.5O,H1.5O,A1.5O,H2.5U,A2.5U,FT1_y,FTX_y,FT2_y,DC1X_y,DC12_y,DCX2_y,HT1_y,HTX_y,HT2_y,HT1X,HT12,HTX2,BTTS_y,OTTS,1.5O_y,1.5U,2.5O_y,2.5U,3.5O,3.5U_y,4.5O,4.5U_y,HT0.5O_y,HT0.5U,HT1.5O,HT1.5U_y,Date,League_y,FT,HT,FTHG,FTAG,FTTG,HTHG,HTAG,HTTG
3779,Mexico2,Mazatlan,Cruz Azul,31.62,30.57,37.81,0-1,62.19,69.43,68.38,53.96,27.98,88.5,96.07,35.11,12.61,56.83,30.53,0-0,69.44,43.14,87.36,53.69,75.99,57.78,62.34,21.38,25.56,94.32,92.4,4.5,3.75,1.75,2.0,1.25,1.18,5.0,2.2,2.38,1.53,1.62,1.14,1.8,1.95,1.25,3.75,1.9,1.95,3.25,1.33,6.5,1.11,1.4,2.75,2.75,1.4,2025-03-01 00:00:00,Mexico2,1 - 1,(0-0),1.0,1.0,2.0,0.0,0.0,0.0
3780,Scotland,Dundee FC,St. Johnstone,55.69,19.42,24.85,2-1,75.11,80.54,44.27,87.1,70.01,51.12,70.28,66.9,60.19,27.96,10.61,1-0,88.15,70.8,38.57,81.22,48.25,88.87,75.77,64.49,41.45,62.27,82.85,2.2,3.3,3.3,1.33,1.33,1.67,2.88,2.1,3.75,1.25,1.67,1.4,1.75,2.0,1.3,3.5,2.0,1.85,3.4,1.33,6.5,1.11,1.4,2.75,3.0,1.36,2024-10-26 00:00:00,Scotland,1 - 2,(1-0),1.0,2.0,3.0,1.0,0.0,1.0
3781,Scotland,Ross County,Kilmarnock,43.15,24.93,31.91,1-1,68.08,75.06,56.84,72.01,47.45,74.21,88.14,51.26,24.93,53.81,21.25,0-0,78.74,46.18,75.06,53.05,82.53,75.48,68.84,41.01,32.51,83.21,88.67,3.2,3.1,2.35,1.57,1.36,1.36,4.0,2.0,3.1,1.36,1.73,1.22,1.95,1.8,1.4,3.0,2.3,1.62,4.33,1.22,9.0,1.07,1.5,2.5,3.5,1.29,2024-10-26 00:00:00,Scotland,2 - 1,(0-1),2.0,1.0,3.0,0.0,1.0,1.0
3782,Switzerland,FC Basel,Sion,73.17,15.86,10.79,2-0,89.03,83.96,26.65,87.68,69.81,51.22,70.37,57.34,48.63,35.03,15.82,0-0,83.66,64.45,50.85,77.89,52.49,92.84,61.36,74.24,24.67,50.33,92.64,1.65,4.2,4.5,1.18,1.22,2.2,2.25,2.38,5.0,1.17,1.53,1.57,1.73,2.0,1.22,4.0,1.7,2.1,2.63,1.44,5.0,1.17,1.33,3.25,2.5,1.5,2025-01-26 00:00:00,Switzerland,4 - 1,(1-1),4.0,1.0,5.0,1.0,1.0,2.0
3783,Switzerland,Lausanne Sport,Servette,52.28,22.99,24.71,1-1,75.27,76.99,47.7,83.23,61.92,60.33,78.12,61.85,33.03,43.02,23.85,0-0,76.05,56.88,66.87,69.61,64.62,85.41,71.92,57.35,36.26,69.66,86.37,2.25,3.4,3.1,1.36,1.3,1.62,2.88,2.25,3.5,1.29,1.57,1.4,1.62,2.2,1.22,4.0,1.75,2.05,2.75,1.4,5.5,1.14,1.36,3.0,2.63,1.44,2024-11-03 00:00:00,Switzerland,1 - 0,(1-0),1.0,0.0,1.0,1.0,0.0,1.0


# Creating Results Columns

In [29]:
import numpy as np

# Add the new columns based on the condition
final_df_unique['FT1'] = np.where(final_df_unique['FTHG'] > final_df_unique['FTAG'], 1, 0)
final_df_unique['FTX'] = np.where(final_df_unique['FTHG'] == final_df_unique['FTAG'], 1, 0)
final_df_unique['FT2'] = np.where(final_df_unique['FTHG'] < final_df_unique['FTAG'], 1, 0)

final_df_unique['FT1X'] = np.where(final_df_unique['FTHG'] >= final_df_unique['FTAG'], 1, 0)
final_df_unique['FT12'] = np.where(final_df_unique['FTX'] == 0, 1, 0)
final_df_unique['FTX2'] = np.where(final_df_unique['FTHG'] <= final_df_unique['FTAG'], 1, 0)

final_df_unique['1.5O'] = np.where(final_df_unique['FTTG'] > 1.5, 1, 0)
final_df_unique['1.5U_y'] = np.where(final_df_unique['FTTG'] < 1.5, 1, 0)
final_df_unique['2.5O'] = np.where(final_df_unique['FTTG'] > 2.5, 1, 0)
final_df_unique['2.5U_y'] = np.where(final_df_unique['FTTG'] < 2.5, 1, 0)
final_df_unique['3.5O_y'] = np.where(final_df_unique['FTTG'] > 3.5, 1, 0)
final_df_unique['3.5U'] = np.where(final_df_unique['FTTG'] < 3.5, 1, 0)
final_df_unique['4.5O_y'] = np.where(final_df_unique['FTTG'] > 4.5, 1, 0)
final_df_unique['4.5U'] = np.where(final_df_unique['FTTG'] < 4.5, 1, 0)

final_df_unique['BTTS'] = np.where((final_df_unique['FTHG'] != 0) & (final_df_unique['FTAG'] != 0), 1, 0)
final_df_unique['OTTS_y'] = np.where(final_df_unique['BTTS'] == 0, 1, 0)

final_df_unique['HT1'] = np.where(final_df_unique['HTHG'] > final_df_unique['HTAG'], 1, 0)
final_df_unique['HTX'] = np.where(final_df_unique['HTHG'] == final_df_unique['HTAG'], 1, 0)
final_df_unique['HT2'] = np.where(final_df_unique['HTHG'] < final_df_unique['HTAG'], 1, 0)

final_df_unique['HT1X_y'] = np.where(final_df_unique['HTHG'] >= final_df_unique['HTAG'], 1, 0)
final_df_unique['HT12_y'] = np.where(final_df_unique['HTX'] == 0, 1, 0)
final_df_unique['HTX2_y'] = np.where(final_df_unique['HTHG'] <= final_df_unique['HTAG'], 1, 0)

final_df_unique['HT0.5O'] = np.where(final_df_unique['HTTG'] > 0.5, 1, 0)
final_df_unique['HT0.5U_y'] = np.where(final_df_unique['HTTG'] < 0.5, 1, 0)
final_df_unique['HT1.5O_y'] = np.where(final_df_unique['HTTG'] > 1.5, 1, 0)
final_df_unique['HT1.5U'] = np.where(final_df_unique['HTTG'] < 1.5, 1, 0)

print('Games Found: ', len(final_df_unique))
final_df_unique.tail()

Games Found:  3784


Unnamed: 0,League_x,Home,Away,FT1_x,FTX_x,FT2_x,FTR,DC1X_x,DC12_x,DCX2_x,1.5O_x,2.5O_x,3.5U_x,4.5U_x,BTTS_x,HT1_x,HTX_x,HT2_x,HTR,HTDC1X,HTDC12,HTDCX2,HT0.5O_x,HT1.5U_x,H0.5O,A0.5O,H1.5O,A1.5O,H2.5U,A2.5U,FT1_y,FTX_y,FT2_y,DC1X_y,DC12_y,DCX2_y,HT1_y,HTX_y,HT2_y,HT1X,HT12,HTX2,BTTS_y,OTTS,1.5O_y,1.5U,2.5O_y,2.5U,3.5O,3.5U_y,4.5O,4.5U_y,HT0.5O_y,HT0.5U,HT1.5O,HT1.5U_y,Date,League_y,FT,HT,FTHG,FTAG,FTTG,HTHG,HTAG,HTTG,FT1,FTX,FT2,FT1X,FT12,FTX2,1.5O,1.5U_y,2.5O,2.5U_y,3.5O_y,3.5U,4.5O_y,4.5U,BTTS,OTTS_y,HT1,HTX,HT2,HT1X_y,HT12_y,HTX2_y,HT0.5O,HT0.5U_y,HT1.5O_y,HT1.5U
3779,Mexico2,Mazatlan,Cruz Azul,31.62,30.57,37.81,0-1,62.19,69.43,68.38,53.96,27.98,88.5,96.07,35.11,12.61,56.83,30.53,0-0,69.44,43.14,87.36,53.69,75.99,57.78,62.34,21.38,25.56,94.32,92.4,4.5,3.75,1.75,2.0,1.25,1.18,5.0,2.2,2.38,1.53,1.62,1.14,1.8,1.95,1.25,3.75,1.9,1.95,3.25,1.33,6.5,1.11,1.4,2.75,2.75,1.4,2025-03-01 00:00:00,Mexico2,1 - 1,(0-0),1.0,1.0,2.0,0.0,0.0,0.0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1
3780,Scotland,Dundee FC,St. Johnstone,55.69,19.42,24.85,2-1,75.11,80.54,44.27,87.1,70.01,51.12,70.28,66.9,60.19,27.96,10.61,1-0,88.15,70.8,38.57,81.22,48.25,88.87,75.77,64.49,41.45,62.27,82.85,2.2,3.3,3.3,1.33,1.33,1.67,2.88,2.1,3.75,1.25,1.67,1.4,1.75,2.0,1.3,3.5,2.0,1.85,3.4,1.33,6.5,1.11,1.4,2.75,3.0,1.36,2024-10-26 00:00:00,Scotland,1 - 2,(1-0),1.0,2.0,3.0,1.0,0.0,1.0,0,0,1,0,1,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1
3781,Scotland,Ross County,Kilmarnock,43.15,24.93,31.91,1-1,68.08,75.06,56.84,72.01,47.45,74.21,88.14,51.26,24.93,53.81,21.25,0-0,78.74,46.18,75.06,53.05,82.53,75.48,68.84,41.01,32.51,83.21,88.67,3.2,3.1,2.35,1.57,1.36,1.36,4.0,2.0,3.1,1.36,1.73,1.22,1.95,1.8,1.4,3.0,2.3,1.62,4.33,1.22,9.0,1.07,1.5,2.5,3.5,1.29,2024-10-26 00:00:00,Scotland,2 - 1,(0-1),2.0,1.0,3.0,0.0,1.0,1.0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1
3782,Switzerland,FC Basel,Sion,73.17,15.86,10.79,2-0,89.03,83.96,26.65,87.68,69.81,51.22,70.37,57.34,48.63,35.03,15.82,0-0,83.66,64.45,50.85,77.89,52.49,92.84,61.36,74.24,24.67,50.33,92.64,1.65,4.2,4.5,1.18,1.22,2.2,2.25,2.38,5.0,1.17,1.53,1.57,1.73,2.0,1.22,4.0,1.7,2.1,2.63,1.44,5.0,1.17,1.33,3.25,2.5,1.5,2025-01-26 00:00:00,Switzerland,4 - 1,(1-1),4.0,1.0,5.0,1.0,1.0,2.0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,0
3783,Switzerland,Lausanne Sport,Servette,52.28,22.99,24.71,1-1,75.27,76.99,47.7,83.23,61.92,60.33,78.12,61.85,33.03,43.02,23.85,0-0,76.05,56.88,66.87,69.61,64.62,85.41,71.92,57.35,36.26,69.66,86.37,2.25,3.4,3.1,1.36,1.3,1.62,2.88,2.25,3.5,1.29,1.57,1.4,1.62,2.2,1.22,4.0,1.75,2.05,2.75,1.4,5.5,1.14,1.36,3.0,2.63,1.44,2024-11-03 00:00:00,Switzerland,1 - 0,(1-0),1.0,0.0,1.0,1.0,0.0,1.0,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1


# Creating Profit Columns for Initial Model Predictions

In [30]:
ft1p, ftxp, ft2p, ft1xp, ft12p, ftx2p = [], [], [], [], [], []
over15, under15, over25, under25, over35, under35, over45, under45 = [], [], [], [], [], [], [], []
btts, otts, ht1p, htxp, ht2p, ht1xp, ht12p, htx2p = [], [], [], [], [], [], [], []
htover05, htunder05, htover15, htunder15 = [], [], [], []

#if prediction == result -> coefficient, elif prediction != result -> 0, else -> -1
for i in range(len(final_df_unique)):
    ft_list = [final_df_unique['FT1_x'].iloc[i], final_df_unique['FTX_x'].iloc[i], final_df_unique['FT2_x'].iloc[i]]
    if (ft_list.index(max(ft_list)) == 0) and (final_df_unique['FT1'].iloc[i] == 1):
        ft1p.append(final_df_unique['FT1_y'].iloc[i])
    elif (ft_list.index(max(ft_list)) == 0) and (final_df_unique['FT1'].iloc[i] != 1):
        ft1p.append(0)
    else:
        ft1p.append(-1)
    
    if (ft_list.index(max(ft_list)) == 1) and (final_df_unique['FTX'].iloc[i] == 1):
        ftxp.append(final_df_unique['FTX_y'].iloc[i])
    elif (ft_list.index(max(ft_list)) == 1) and (final_df_unique['FTX'].iloc[i] != 1):
        ftxp.append(0)
    else:
        ftxp.append(-1)
    
    if (ft_list.index(max(ft_list)) == 2) and (final_df_unique['FT2'].iloc[i] == 1):
        ft2p.append(final_df_unique['FT2_y'].iloc[i])
    elif (ft_list.index(max(ft_list)) == 2) and (final_df_unique['FT2'].iloc[i] != 1):
        ft2p.append(0)
    else:
        ft2p.append(-1)

    dc_list = [final_df_unique['DC1X_x'].iloc[i], final_df_unique['DC12_x'].iloc[i], final_df_unique['DCX2_x'].iloc[i]]
    if (dc_list.index(max(dc_list)) == 0) and (final_df_unique['FT1X'].iloc[i] == 1):
        ft1xp.append(final_df_unique['DC1X_y'].iloc[i])
    elif (dc_list.index(max(dc_list)) == 0) and (final_df_unique['FT1X'].iloc[i] != 1):
        ft1xp.append(0)
    else:
        ft1xp.append(-1)
    
    if (dc_list.index(max(dc_list)) == 1) and (final_df_unique['FT12'].iloc[i] == 1):
        ft12p.append(final_df_unique['DC12_y'].iloc[i])
    elif (dc_list.index(max(dc_list)) == 1) and (final_df_unique['FT12'].iloc[i] != 1):
        ft12p.append(0)
    else:
        ft12p.append(-1)
    
    if (dc_list.index(max(dc_list)) == 2) and (final_df_unique['FTX2'].iloc[i] == 1):
        ftx2p.append(final_df_unique['DCX2_y'].iloc[i])
    elif (dc_list.index(max(dc_list)) == 2) and (final_df_unique['FTX2'].iloc[i] != 1):
        ftx2p.append(0)
    else:
        ftx2p.append(-1)
    
    if (final_df_unique['1.5O_x'].iloc[i] >= 50) and (final_df_unique['1.5O'].iloc[i] == 1):
        over15.append(final_df_unique['1.5O_y'].iloc[i])
    elif (final_df_unique['1.5O_x'].iloc[i] >= 50) and (final_df_unique['1.5O'].iloc[i] != 1):
        over15.append(0)
    else:
        over15.append(-1)
    
    if (final_df_unique['1.5O_x'].iloc[i] < 50) and (final_df_unique['1.5O'].iloc[i] != 1):
        under15.append(final_df_unique['1.5U'].iloc[i])
    elif (final_df_unique['1.5O_x'].iloc[i] < 50) and (final_df_unique['1.5O'].iloc[i] == 1):
        under15.append(0)
    else:
        under15.append(-1)
    
    if (final_df_unique['2.5O_x'].iloc[i] >= 50) and (final_df_unique['2.5O'].iloc[i] == 1):
        over25.append(final_df_unique['2.5O_y'].iloc[i])
    elif (final_df_unique['2.5O_x'].iloc[i] >= 50) and (final_df_unique['2.5O'].iloc[i] != 1):
        over25.append(0)
    else:
        over25.append(-1)
    
    if (final_df_unique['2.5O_x'].iloc[i] < 50) and (final_df_unique['2.5O'].iloc[i] != 1):
        under25.append(final_df_unique['2.5U'].iloc[i])
    elif (final_df_unique['2.5O_x'].iloc[i] < 50) and (final_df_unique['2.5O'].iloc[i] == 1):
        under25.append(0)
    else:
        under25.append(-1)
    
    if (final_df_unique['3.5U_x'].iloc[i] <= 50) and (final_df_unique['3.5U'].iloc[i] != 1):
        over35.append(final_df_unique['3.5O'].iloc[i])
    elif (final_df_unique['3.5U_x'].iloc[i] <= 50) and (final_df_unique['3.5U'].iloc[i] == 1):
        over35.append(0)
    else:
        over35.append(-1)
    
    if (final_df_unique['3.5U_x'].iloc[i] > 50) and (final_df_unique['3.5U'].iloc[i] == 1):
        under35.append(final_df_unique['3.5U_y'].iloc[i])
    elif (final_df_unique['3.5U_x'].iloc[i] > 50) and (final_df_unique['3.5U'].iloc[i] != 1):
        under35.append(0)
    else:
        under35.append(-1)
    
    if (final_df_unique['4.5U_x'].iloc[i] <= 50) and (final_df_unique['4.5U'].iloc[i] != 1):
        over45.append(final_df_unique['4.5O'].iloc[i])
    elif (final_df_unique['4.5U_x'].iloc[i] <= 50) and (final_df_unique['4.5U'].iloc[i] == 1):
        over45.append(0)
    else:
        over45.append(-1)
    
    if (final_df_unique['4.5U_x'].iloc[i] > 50) and (final_df_unique['4.5U'].iloc[i] == 1):
        under45.append(final_df_unique['4.5U_y'].iloc[i])
    elif (final_df_unique['4.5U_x'].iloc[i] > 50) and (final_df_unique['4.5U'].iloc[i] != 1):
        under45.append(0)
    else:
        under45.append(-1)
    
    if (final_df_unique['BTTS_x'].iloc[i] >= 50) and (final_df_unique['BTTS'].iloc[i] == 1):
        btts.append(final_df_unique['BTTS_y'].iloc[i])
    elif (final_df_unique['BTTS_x'].iloc[i] >= 50) and (final_df_unique['BTTS'].iloc[i] != 1):
        btts.append(0)
    else:
        btts.append(-1)
    
    if (final_df_unique['BTTS_x'].iloc[i] < 50) and (final_df_unique['BTTS'].iloc[i] != 1):
        otts.append(final_df_unique['OTTS'].iloc[i])
    elif (final_df_unique['BTTS_x'].iloc[i] < 50) and (final_df_unique['BTTS'].iloc[i] == 1):
        otts.append(0)
    else:
        otts.append(-1)
    
    ht_list = [final_df_unique['HT1_x'].iloc[i], final_df_unique['HTX_x'].iloc[i], final_df_unique['HT2_x'].iloc[i]]
    if (ht_list.index(max(ht_list)) == 0) and (final_df_unique['HT1'].iloc[i] == 1):
        ht1p.append(final_df_unique['HT1_y'].iloc[i])
    elif (ht_list.index(max(ht_list)) == 0) and (final_df_unique['HT1'].iloc[i] != 1):
        ht1p.append(0)
    else:
        ht1p.append(-1)
    
    if (ht_list.index(max(ht_list)) == 1) and (final_df_unique['HTX'].iloc[i] == 1):
        htxp.append(final_df_unique['HTX_y'].iloc[i])
    elif (ht_list.index(max(ht_list)) == 1) and (final_df_unique['HTX'].iloc[i] != 1):
        htxp.append(0)
    else:
        htxp.append(-1)
    
    if (ht_list.index(max(ht_list)) == 2) and (final_df_unique['HT2'].iloc[i] == 1):
        ht2p.append(final_df_unique['HT2_y'].iloc[i])
    elif (ht_list.index(max(ht_list)) == 2) and (final_df_unique['HT2'].iloc[i] != 1):
        ht2p.append(0)
    else:
        ht2p.append(-1)
    
    htdc_list = [final_df_unique['HTDC1X'].iloc[i], final_df_unique['HTDC12'].iloc[i], final_df_unique['HTDCX2'].iloc[i]]
    if (htdc_list.index(max(htdc_list)) == 0) and (final_df_unique['HT1X_y'].iloc[i] == 1):
        ht1xp.append(final_df_unique['HT1X'].iloc[i])
    elif (htdc_list.index(max(htdc_list)) == 0) and (final_df_unique['HT1X_y'].iloc[i] != 1):
        ht1xp.append(0)
    else:
        ht1xp.append(-1)
    
    if (htdc_list.index(max(htdc_list)) == 1) and (final_df_unique['HT12_y'].iloc[i] == 1):
        ht12p.append(final_df_unique['HT12'].iloc[i])
    elif (htdc_list.index(max(htdc_list)) == 1) and (final_df_unique['HT12_y'].iloc[i] != 1):
        ht12p.append(0)
    else:
        ht12p.append(-1)
    
    if (htdc_list.index(max(htdc_list)) == 2) and (final_df_unique['HTX2_y'].iloc[i] == 1):
        htx2p.append(final_df_unique['HTX2'].iloc[i])
    elif (htdc_list.index(max(htdc_list)) == 2) and (final_df_unique['HTX2_y'].iloc[i] != 1):
        htx2p.append(0)
    else:
        htx2p.append(-1)
    
    if (final_df_unique['HT0.5O_x'].iloc[i] >= 50) and (final_df_unique['HT0.5O'].iloc[i] == 1):
        htover05.append(final_df_unique['HT0.5O_y'].iloc[i])
    elif (final_df_unique['HT0.5O_x'].iloc[i] >= 50) and (final_df_unique['HT0.5O'].iloc[i] != 1):
        htover05.append(0)
    else:
        htover05.append(-1)
    
    if (final_df_unique['HT0.5O_x'].iloc[i] < 50) and (final_df_unique['HT0.5O'].iloc[i] != 1):
        htunder05.append(final_df_unique['HT0.5U'].iloc[i])
    elif (final_df_unique['HT0.5O_x'].iloc[i] < 50) and (final_df_unique['HT0.5O'].iloc[i] == 1):
        htunder05.append(0)
    else:
        htunder05.append(-1)
    
    if (final_df_unique['HT1.5U_x'].iloc[i] < 50) and (final_df_unique['HT1.5U'].iloc[i] != 1):
        htover15.append(final_df_unique['HT1.5O'].iloc[i])
    elif (final_df_unique['HT1.5U_x'].iloc[i] < 50) and (final_df_unique['HT1.5U'].iloc[i] == 1):
        htover15.append(0)
    else:
        htover15.append(-1)
    
    if (final_df_unique['HT1.5U_x'].iloc[i] >= 50) and (final_df_unique['HT1.5U'].iloc[i] == 1):
        htunder15.append(final_df_unique['HT1.5U_y'].iloc[i])
    elif (final_df_unique['HT1.5U_x'].iloc[i] >= 50) and (final_df_unique['HT1.5U'].iloc[i] != 1):
        htunder15.append(0)
    else:
        htunder15.append(-1)

final_df_unique['FT1P'], final_df_unique['FTXP'], final_df_unique['FT2P'] = ft1p, ftxp, ft2p
final_df_unique['FT1XP'], final_df_unique['FT12P'], final_df_unique['FTX2P'] = ft1xp, ft12p, ftx2p
final_df_unique['1.5OP'], final_df_unique['1.5UP'], final_df_unique['2.5OP'], final_df_unique['2.5UP'] = over15, under15, over25, under25
final_df_unique['3.5OP'], final_df_unique['3.5UP'], final_df_unique['4.5OP'], final_df_unique['4.5UP'] = over35, under35, over45, under45
final_df_unique['BTTSP'], final_df_unique['OTTSP'] = btts, otts
final_df_unique['HT1P'], final_df_unique['HTXP'], final_df_unique['HT2P'] = ht1p, htxp, ht2p
final_df_unique['HT1XP'], final_df_unique['HT12P'], final_df_unique['HTX2P'] = ht1xp, ht12p, htx2p
final_df_unique['HT0.5OP'], final_df_unique['HT0.5UP'] = htover05, htunder05
final_df_unique['HT1.5OP'], final_df_unique['HT1.5UP'] = htover15, htunder15

print('Games Found: ', len(final_df_unique))
final_df_unique.tail()

Games Found:  3784


Unnamed: 0,League_x,Home,Away,FT1_x,FTX_x,FT2_x,FTR,DC1X_x,DC12_x,DCX2_x,1.5O_x,2.5O_x,3.5U_x,4.5U_x,BTTS_x,HT1_x,HTX_x,HT2_x,HTR,HTDC1X,HTDC12,HTDCX2,HT0.5O_x,HT1.5U_x,H0.5O,A0.5O,H1.5O,A1.5O,H2.5U,A2.5U,FT1_y,FTX_y,FT2_y,DC1X_y,DC12_y,DCX2_y,HT1_y,HTX_y,HT2_y,HT1X,HT12,HTX2,BTTS_y,OTTS,1.5O_y,1.5U,2.5O_y,2.5U,3.5O,3.5U_y,4.5O,4.5U_y,HT0.5O_y,HT0.5U,HT1.5O,HT1.5U_y,Date,League_y,FT,HT,FTHG,FTAG,FTTG,HTHG,HTAG,HTTG,FT1,FTX,FT2,FT1X,FT12,FTX2,1.5O,1.5U_y,2.5O,2.5U_y,3.5O_y,3.5U,4.5O_y,4.5U,BTTS,OTTS_y,HT1,HTX,HT2,HT1X_y,HT12_y,HTX2_y,HT0.5O,HT0.5U_y,HT1.5O_y,HT1.5U,FT1P,FTXP,FT2P,FT1XP,FT12P,FTX2P,1.5OP,1.5UP,2.5OP,2.5UP,3.5OP,3.5UP,4.5OP,4.5UP,BTTSP,OTTSP,HT1P,HTXP,HT2P,HT1XP,HT12P,HTX2P,HT0.5OP,HT0.5UP,HT1.5OP,HT1.5UP
3779,Mexico2,Mazatlan,Cruz Azul,31.62,30.57,37.81,0-1,62.19,69.43,68.38,53.96,27.98,88.5,96.07,35.11,12.61,56.83,30.53,0-0,69.44,43.14,87.36,53.69,75.99,57.78,62.34,21.38,25.56,94.32,92.4,4.5,3.75,1.75,2.0,1.25,1.18,5.0,2.2,2.38,1.53,1.62,1.14,1.8,1.95,1.25,3.75,1.9,1.95,3.25,1.33,6.5,1.11,1.4,2.75,2.75,1.4,2025-03-01 00:00:00,Mexico2,1 - 1,(0-0),1.0,1.0,2.0,0.0,0.0,0.0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,-1.0,-1.0,0.0,-1.0,0.0,-1.0,1.25,-1.0,-1.0,1.95,-1.0,1.33,-1.0,1.11,-1.0,0.0,-1.0,2.2,-1.0,-1.0,-1.0,1.14,0.0,-1.0,-1.0,1.4
3780,Scotland,Dundee FC,St. Johnstone,55.69,19.42,24.85,2-1,75.11,80.54,44.27,87.1,70.01,51.12,70.28,66.9,60.19,27.96,10.61,1-0,88.15,70.8,38.57,81.22,48.25,88.87,75.77,64.49,41.45,62.27,82.85,2.2,3.3,3.3,1.33,1.33,1.67,2.88,2.1,3.75,1.25,1.67,1.4,1.75,2.0,1.3,3.5,2.0,1.85,3.4,1.33,6.5,1.11,1.4,2.75,3.0,1.36,2024-10-26 00:00:00,Scotland,1 - 2,(1-0),1.0,2.0,3.0,1.0,0.0,1.0,0,0,1,0,1,1,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,0.0,-1.0,-1.0,-1.0,1.33,-1.0,1.3,-1.0,2.0,-1.0,-1.0,1.33,-1.0,1.11,1.75,-1.0,2.88,-1.0,-1.0,1.25,-1.0,-1.0,1.4,-1.0,0.0,-1.0
3781,Scotland,Ross County,Kilmarnock,43.15,24.93,31.91,1-1,68.08,75.06,56.84,72.01,47.45,74.21,88.14,51.26,24.93,53.81,21.25,0-0,78.74,46.18,75.06,53.05,82.53,75.48,68.84,41.01,32.51,83.21,88.67,3.2,3.1,2.35,1.57,1.36,1.36,4.0,2.0,3.1,1.36,1.73,1.22,1.95,1.8,1.4,3.0,2.3,1.62,4.33,1.22,9.0,1.07,1.5,2.5,3.5,1.29,2024-10-26 00:00:00,Scotland,2 - 1,(0-1),2.0,1.0,3.0,0.0,1.0,1.0,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,3.2,-1.0,-1.0,-1.0,1.36,-1.0,1.4,-1.0,-1.0,0.0,-1.0,1.22,-1.0,1.07,1.95,-1.0,-1.0,0.0,-1.0,0.0,-1.0,-1.0,1.5,-1.0,-1.0,1.29
3782,Switzerland,FC Basel,Sion,73.17,15.86,10.79,2-0,89.03,83.96,26.65,87.68,69.81,51.22,70.37,57.34,48.63,35.03,15.82,0-0,83.66,64.45,50.85,77.89,52.49,92.84,61.36,74.24,24.67,50.33,92.64,1.65,4.2,4.5,1.18,1.22,2.2,2.25,2.38,5.0,1.17,1.53,1.57,1.73,2.0,1.22,4.0,1.7,2.1,2.63,1.44,5.0,1.17,1.33,3.25,2.5,1.5,2025-01-26 00:00:00,Switzerland,4 - 1,(1-1),4.0,1.0,5.0,1.0,1.0,2.0,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,0,1.65,-1.0,-1.0,1.18,-1.0,-1.0,1.22,-1.0,1.7,-1.0,-1.0,0.0,-1.0,0.0,1.73,-1.0,0.0,-1.0,-1.0,1.17,-1.0,-1.0,1.33,-1.0,-1.0,0.0
3783,Switzerland,Lausanne Sport,Servette,52.28,22.99,24.71,1-1,75.27,76.99,47.7,83.23,61.92,60.33,78.12,61.85,33.03,43.02,23.85,0-0,76.05,56.88,66.87,69.61,64.62,85.41,71.92,57.35,36.26,69.66,86.37,2.25,3.4,3.1,1.36,1.3,1.62,2.88,2.25,3.5,1.29,1.57,1.4,1.62,2.2,1.22,4.0,1.75,2.05,2.75,1.4,5.5,1.14,1.36,3.0,2.63,1.44,2024-11-03 00:00:00,Switzerland,1 - 0,(1-0),1.0,0.0,1.0,1.0,0.0,1.0,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,2.25,-1.0,-1.0,-1.0,1.3,-1.0,0.0,-1.0,0.0,-1.0,-1.0,1.4,-1.0,1.14,0.0,-1.0,-1.0,0.0,-1.0,1.29,-1.0,-1.0,1.36,-1.0,-1.0,1.44


# Checking For ROI of Profit Columns

In [31]:
# Select columns that end with 'P'
columns = [col for col in final_df_unique.columns if col.endswith('P')]

# Initialize lists to store results and games
results, games_list = [],  []

for col in columns:
    my_df = final_df_unique[final_df_unique[col] >= 0]
    numerator = np.sum(my_df[col]) - len(my_df)
    result = round(numerator / len(my_df) * 100, 2)
    
    # Append results and games to respective lists
    results.append(result)
    games_list.append(len(my_df))

# Convert results to a DataFrame with an additional column for Games
results_df = pd.DataFrame({
    'Column': columns,
    'ROI': results,
    'Games': games_list
})
results_df

Unnamed: 0,Column,ROI,Games
0,FT1P,-4.64,2138
1,FTXP,-5.56,262
2,FT2P,-11.1,1384
3,FT1XP,-3.32,1664
4,FT12P,-5.75,1105
5,FTX2P,-6.06,1015
6,1.5OP,-5.89,3481
7,1.5UP,0.32,303
8,2.5OP,-5.77,1787
9,2.5UP,-5.18,1997


# ROI of Profit Columns According To Leagues

In [33]:
# Step 1: Filter leagues with at least 10 games
league_counts = final_df_unique['League_x'].value_counts()
leagues_with_10_games = league_counts[league_counts >= 10].index
filtered_df = final_df_unique[final_df_unique['League_x'].isin(leagues_with_10_games)]

# Group by 'League' and calculate results for each group
grouped_results = {}
for league, group in filtered_df.groupby('League_x'):
    group_results = {}
    for col in columns:
        my_df = group[group[col] >= 0]
        numerator = np.sum(my_df[col]) - len(my_df)
        group_results[col] = round(numerator / len(my_df) * 100, 2)
    # Add the number of games for this league
    group_results['Games'] = round(len(group),2)
    grouped_results[league] = group_results

# Convert grouped results to a DataFrame for better visualization
grouped_results_df = pd.DataFrame(grouped_results).T

# Define a function to apply conditional formatting
def highlight_positive(val):
    # Highlight background to red if the value is positive
    color = 'background-color: red' if isinstance(val, (int, float)) and val > 0 else ''
    return color

# Apply the function to the DataFrame
styled_df = (
    grouped_results_df.style
    .applymap(highlight_positive)
    .format("{:.2f}")  # Format only numeric columns, excluding 'Games'
)

# Save the styled DataFrame to Excel
styled_df.to_excel("ROI_leagues.xlsx", index=True)

# Display the styled DataFrame
styled_df

Unnamed: 0,FT1P,FTXP,FT2P,FT1XP,FT12P,FTX2P,1.5OP,1.5UP,2.5OP,2.5UP,3.5OP,3.5UP,4.5OP,4.5UP,BTTSP,OTTSP,HT1P,HTXP,HT2P,HT1XP,HT12P,HTX2P,HT0.5OP,HT0.5UP,HT1.5OP,HT1.5UP,Games
Australia,-39.62,,-15.1,19.25,-3.33,30.6,-9.11,,2.62,76.2,-41.29,-18.73,,4.89,-21.62,-22.0,-19.4,13.33,45.43,6.88,,-6.1,-22.22,,5.75,-10.79,18.0
Austria,-40.77,-22.5,-33.62,-7.13,6.53,7.0,-0.2,300.0,22.17,11.06,16.5,8.62,-100.0,-3.04,-17.0,-20.95,-50.09,-2.93,-23.57,3.4,,-9.68,-0.82,62.78,16.64,6.86,47.0
Belgium,11.1,5.38,-41.17,8.12,-27.0,-7.8,-6.8,-35.0,-12.45,-1.04,-31.42,6.24,-100.0,0.84,-19.0,-5.85,-6.11,-21.88,-20.23,3.36,-100.0,-11.45,-4.46,23.29,-77.37,-2.23,99.0
Brazil,-24.6,-100.0,-19.82,-3.09,5.46,-10.22,-0.64,175.0,-5.52,-12.92,,-4.05,,-0.03,9.0,-22.83,-2.62,2.58,,-10.99,,4.86,0.29,41.29,-100.0,-10.0,106.0
Denmark,10.74,75.0,-29.24,11.88,-29.56,5.13,-2.14,,-8.9,4.08,-12.75,-16.41,-100.0,-9.17,23.04,20.21,43.41,29.71,-30.25,2.36,-24.25,19.13,-11.77,0.0,41.93,20.52,42.0
England,-3.94,-26.67,-16.41,-9.13,17.71,-2.57,-0.8,-100.0,-8.63,-15.32,-60.33,-9.95,-100.0,0.5,-2.04,-13.63,-9.21,-8.97,38.76,-7.27,34.0,-18.69,6.69,-2.78,4.39,-0.9,119.0
England2,-7.57,13.46,-23.76,2.18,-9.45,2.41,-3.91,19.04,-9.43,6.33,-46.55,-0.19,,1.3,-3.01,-4.9,9.61,-10.45,-67.5,-2.05,,-15.27,-3.51,12.48,39.87,-5.39,225.0
England3,4.95,49.85,-10.01,-6.76,-3.94,-8.56,-5.33,9.0,0.62,2.13,-27.0,0.51,-46.43,-1.68,4.44,6.05,-29.28,-10.56,6.72,-2.98,15.0,-11.83,-6.88,-9.19,5.71,-4.48,219.0
England4,-5.19,61.67,-14.01,0.92,-2.06,7.71,-12.64,-28.18,-10.36,-2.03,19.55,2.53,116.67,-0.28,-20.17,-12.59,-10.6,-0.55,-51.58,-8.88,-16.0,-4.65,-15.27,-37.43,-21.85,1.06,198.0
England5,-12.41,-100.0,-19.63,-6.51,-13.07,-6.1,-5.15,-100.0,-10.94,-17.95,-66.69,-9.01,-100.0,0.58,-7.23,-27.13,-10.84,-13.47,-5.82,-8.83,-56.14,-6.05,-4.03,-24.91,-16.13,-5.24,167.0


# Creating Optimum Threshold for Each Prediction Column

In [35]:
# Assuming `df` is your DataFrame and it contains the columns for percentages and correctness
def calculate_threshold(percentages, predictions):
    # Ensure inputs are pandas Series
    percentages = pd.Series(percentages)
    predictions = pd.Series(predictions)
    
    thresholds = percentages.unique()
    best_threshold = 0
    best_j_stat = -np.inf  # Start with negative infinity for comparison
    
    for threshold in thresholds:
        # Predict 1s based on the threshold
        predicted_1s = (percentages >= threshold).astype(int)
        
        # Calculate true positives, true negatives, false positives, false negatives
        true_positives = ((predicted_1s == 1) & (predictions == 1)).sum()
        true_negatives = ((predicted_1s == 0) & (predictions == 0)).sum()
        false_positives = ((predicted_1s == 1) & (predictions == 0)).sum()
        false_negatives = ((predicted_1s == 0) & (predictions == 1)).sum()
        
        # Calculate Sensitivity (Recall) and Specificity
        sensitivity = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
        specificity = true_negatives / (true_negatives + false_positives) if (true_negatives + false_positives) > 0 else 0
        
        # Calculate Youden's J Statistic
        j_stat = sensitivity + specificity - 1
        
        # Update best threshold if J statistic improves
        if j_stat > best_j_stat:
            best_j_stat = j_stat
            best_threshold = threshold
    
    return best_threshold, round(best_j_stat, 2)

# Select only numeric columns
numeric_columns = final_df_unique.select_dtypes(include=[np.number])

# Remove rows where any numeric value is greater than 100
final_df_unique = final_df_unique[(numeric_columns <= 100).all(axis=1)]

#Selecting dataframes with model predictions
ft1df = final_df_unique[final_df_unique['FT1_x'] >= final_df_unique[['FTX_x', 'FT2_x']].max(axis=1)]
ftxdf = final_df_unique[final_df_unique['FTX_x'] >= final_df_unique[['FT1_x', 'FT2_x']].max(axis=1)]
ft2df = final_df_unique[final_df_unique['FT2_x'] >= final_df_unique[['FTX_x', 'FT1_x']].max(axis=1)]
dc1xdf = final_df_unique[final_df_unique['DC1X_x'] >= final_df_unique[['DC12_x', 'DCX2_x']].max(axis=1)]
dc12df = final_df_unique[final_df_unique['DC12_x'] >= final_df_unique[['DC1X_x', 'DCX2_x']].max(axis=1)]
dcx2df = final_df_unique[final_df_unique['DCX2_x'] >= final_df_unique[['DC1X_x', 'DC12_x']].max(axis=1)]
over15df, under15df = final_df_unique[final_df_unique['1.5O_x'] >= 50], final_df_unique[final_df_unique['1.5O_x'] < 50]
over25df, under25df = final_df_unique[final_df_unique['2.5O_x'] >= 50], final_df_unique[final_df_unique['2.5O_x'] < 50]
over35df, under35df = final_df_unique[final_df_unique['3.5U_x'] < 50], final_df_unique[final_df_unique['3.5U_x'] >= 50]
over45df, under45df = final_df_unique[final_df_unique['4.5U_x'] < 50], final_df_unique[final_df_unique['4.5U_x'] >= 50]
bttsdf, ottsdf = final_df_unique[final_df_unique['BTTS_x'] >= 50], final_df_unique[final_df_unique['BTTS_x'] < 50]
ht1df = final_df_unique[final_df_unique['HT1_x'] >= final_df_unique[['HTX_x', 'HT2_x']].max(axis=1)]
htxdf = final_df_unique[final_df_unique['HTX_x'] >= final_df_unique[['HT1_x', 'HT2_x']].max(axis=1)]
ht2df = final_df_unique[final_df_unique['HT2_x'] >= final_df_unique[['HT1_x', 'HTX_x']].max(axis=1)]
ht1xdf = final_df_unique[final_df_unique['HTDC1X'] >= final_df_unique[['HTDC12', 'HTDCX2']].max(axis=1)]
ht12df = final_df_unique[final_df_unique['HTDC12'] >= final_df_unique[['HTDC1X', 'HTDCX2']].max(axis=1)]
htx2df = final_df_unique[final_df_unique['HTDCX2'] >= final_df_unique[['HTDC1X', 'HTDC12']].max(axis=1)]
htover05df, htunder05df = final_df_unique[final_df_unique['HT0.5O_x'] >= 50], final_df_unique[final_df_unique['HT0.5O_x'] < 50]
htover15df, htunder15df = final_df_unique[final_df_unique['HT1.5U_x'] < 50], final_df_unique[final_df_unique['HT1.5U_x'] >= 50]

ft1t, ft1a = calculate_threshold(ft1df['FT1_x'], ft1df['FT1'])
ftxt, ftxa = calculate_threshold(ftxdf['FTX_x'], ftxdf['FTX'])
ft2t, ft2a = calculate_threshold(ft2df['FT2_x'], ft2df['FT2'])
ft1xt, ft1xa = calculate_threshold(dc1xdf['DC1X_x'], dc1xdf['FT1X'])
ft12t, ft12a = calculate_threshold(dc12df['DC12_x'], dc12df['FT12'])
ftx2t, ftx2a = calculate_threshold(dcx2df['DCX2_x'], dcx2df['FTX2'])
over15t, over15a = calculate_threshold(over15df['1.5O_x'], over15df['1.5O'])
under15t, under15a = calculate_threshold(under15df['1.5O_x'], under15df['1.5U'])
over25t, over25a = calculate_threshold(over25df['2.5O_x'], over25df['2.5O'])
under25t, under25a = calculate_threshold(under25df['2.5O_x'], under25df['2.5U'])
over35t, over35a = calculate_threshold(over35df['3.5U_x'], over35df['3.5O'])
under35t, under35a = calculate_threshold(under35df['3.5U_x'], under35df['3.5U'])
over45t, over45a = calculate_threshold(over45df['4.5U_x'], over45df['4.5O'])
under45t, under45a = calculate_threshold(under45df['4.5U_x'], under45df['4.5U'])
bttst, bttsa = calculate_threshold(bttsdf['BTTS_x'], bttsdf['BTTS'])
ottst, ottsa = calculate_threshold(ottsdf['BTTS_x'], ottsdf['OTTS'])
ht1t, ht1a = calculate_threshold(ht1df['HT1_x'], ht1df['HT1'])
htxt, htxa = calculate_threshold(htxdf['HTX_x'], htxdf['HTX'])
ht2t, ht2a = calculate_threshold(ht2df['HT2_x'], ht2df['HT2'])
ht1xt, ht1xa = calculate_threshold(ht1xdf['HTDC1X'], ht1xdf['HT1X'])
ht12t, ht12a = calculate_threshold(ht12df['HTDC12'], ht12df['HT12'])
htx2t, htx2a = calculate_threshold(htx2df['HTDCX2'], htx2df['HTX2'])
htover05t, htover05a = calculate_threshold(htover05df['HT0.5O_x'], htover05df['HT0.5O'])
htunder05t, htunder05a = calculate_threshold(htunder05df['HT0.5O_x'], htunder05df['HT0.5U'])
htover15t, htover15a = calculate_threshold(htover15df['HT1.5U_x'], htover15df['HT1.5O'])
htunder15t, htunder15a = calculate_threshold(htunder15df['HT1.5U_x'], htunder15df['HT1.5U'])

new_ft1df, new_ftxdf, new_ft2df = ft1df[ft1df['FT1_x'] >= ft1t],ftxdf[ftxdf['FTX_x'] >= ftxt],ft2df[ft2df['FT2_x'] >= ft2t]
new_ft1xdf, new_ft12df, new_ftx2df = dc1xdf[dc1xdf['DC1X_x'] >= ft1xt],dc12df[dc12df['DC12_x'] >= ft12t],dcx2df[dcx2df['DCX2_x'] >= ftx2t] 
new_over15, new_under15 = over15df[over15df['1.5O_x'] >= over15t], under15df[under15df['1.5O_x'] <= under15t]
new_over25, new_under25 = over25df[over25df['2.5O_x'] >= over25t], under25df[under25df['2.5O_x'] <= under25t]
new_over35, new_under35 = over35df[over35df['3.5U_x'] <= over35t], under35df[under35df['3.5U_x'] >= under35t]
new_over45, new_under45 = over45df[over45df['4.5U_x'] <= over45t], under45df[under45df['4.5U_x'] >= under45t]
new_btts, new_otts = bttsdf[bttsdf['BTTS_x'] >= bttst], ottsdf[ottsdf['BTTS_x'] <= ottst]
new_ht1df, new_htxdf, new_ht2df = ht1df[ht1df['HT1_x'] >= ht1t],htxdf[htxdf['HTX_x'] >= htxt],ht2df[ht2df['HT2_x'] >= ht2t]
new_ht1xdf, new_ht12df, new_htx2df = ht1xdf[ht1xdf['HTDC1X'] >= ht1xt],ht12df[ht12df['HTDC12'] >= ht12t],htx2df[htx2df['HTDCX2'] >= htx2t]
new_htover05, new_htunder05 = htover05df[htover05df['HT0.5O_x'] >= htover05t], htunder05df[htunder05df['HT0.5O_x'] <= htunder05t]
new_htover15, new_htunder15 = htover15df[htover15df['HT1.5U_x'] <= htover15t], htunder15df[htunder15df['HT1.5U_x'] >= htunder15t]

# Store the results in a list
results = [
('FT1', ft1t, ft1a, len(new_ft1df), round(len(new_ft1df)/len(ft1df)*100,2), np.sum(new_ft1df['FT1P']) - len(new_ft1df)),
('FTX', ftxt, ftxa, len(new_ftxdf), round(len(new_ftxdf)/len(ftxdf)*100,2), np.sum(new_ftxdf['FTXP']) - len(new_ftxdf)),
('FT2', ft2t, ft2a, len(new_ft2df), round(len(new_ft2df)/len(ft2df)*100,2), np.sum(new_ft2df['FT2P']) - len(new_ft2df)),
('FT1X', ft1xt, ft1xa, len(new_ft1xdf), round(len(new_ft1xdf)/len(dc1xdf)*100,2), np.sum(new_ft1xdf['FT1XP']) - len(new_ft1xdf)),
('FT12', ft12t, ft12a, len(new_ft12df), round(len(new_ft12df)/len(dc12df)*100,2), np.sum(new_ft12df['FT12P']) - len(new_ft12df)),
('FTX2', ftx2t, ftx2a, len(new_ftx2df), round(len(new_ftx2df)/len(dcx2df)*100,2), np.sum(new_ftx2df['FTX2P']) - len(new_ftx2df)),
('1.5O', over15t, over15a, len(new_over15), round(len(new_over15)/len(over15df)*100,2), np.sum(new_over15['1.5OP'])-len(new_over15)),
('1.5U', under15t, under15a, len(new_under15), round(len(new_under15)/len(under15df)*100,2), np.sum(new_under15['1.5UP'])-len(new_under15)),
('2.5O', over25t, over25a, len(new_over25), round(len(new_over25)/len(over25df)*100,2), np.sum(new_over25['2.5OP'])-len(new_over25)),
('2.5U', under25t, under25a, len(new_under25), round(len(new_under25)/len(under25df)*100,2), np.sum(new_under25['2.5UP'])-len(new_under25)),
('3.5O', over35t, over35a, len(new_over35), round(len(new_over35)/len(over35df)*100,2), np.sum(new_over35['3.5OP'])-len(new_over35)),
('3.5U', under35t, under35a, len(new_under35), round(len(new_under35)/len(under35df)*100,2), np.sum(new_under35['3.5UP'])-len(new_under35)),
('4.5O', over45t, over45a, len(new_over45), round(len(new_over45)/len(over45df)*100,2), np.sum(new_over45['4.5OP'])-len(new_over45)),
('4.5U', under45t, under45a, len(new_under45), round(len(new_under45)/len(under45df)*100,2), np.sum(new_under45['4.5UP'])-len(new_under45)),
('BTTS', bttst, bttsa, len(new_btts), round(len(new_btts)/len(bttsdf)*100,2), np.sum(new_btts['BTTSP'])-len(new_btts)),
('OTTS', ottst, ottsa, len(new_otts), round(len(new_otts)/len(ottsdf)*100,2), np.sum(new_otts['OTTSP'])-len(new_otts)),
('HT1', ht1t, ht1a, len(new_ht1df), round(len(new_ht1df)/len(ht1df)*100,2), np.sum(new_ht1df['HT1P']) - len(new_ht1df)),
('HTX', htxt, htxa, len(new_htxdf), round(len(new_htxdf)/len(htxdf)*100,2), np.sum(new_htxdf['HTXP']) - len(new_htxdf)),
('HT2', ht2t, ht2a, len(new_ht2df), round(len(new_ht2df)/len(ht2df)*100,2), np.sum(new_ht2df['HT2P']) - len(new_ht2df)),
('HT1X', ht1xt, ht1xa, len(new_ht1xdf), round(len(new_ht1xdf)/len(ht1xdf)*100,2), np.sum(new_ht1xdf['HT1XP']) - len(new_ht1xdf)),
('HT12', ht12t, ht12a, len(new_ht12df), round(len(new_ht12df)/len(ht12df)*100,2), np.sum(new_ht12df['HT12P']) - len(new_ht12df)),
('HTX2', htx2t, htx2a, len(new_htx2df), round(len(new_htx2df)/len(htx2df)*100,2), np.sum(new_htx2df['HTX2P']) - len(new_htx2df)),
('HT0.5O', htover05t, htover05a, len(new_htover05), round(len(new_htover05)/len(htover05df)*100,2), np.sum(new_htover05['HT0.5OP'])-len(new_htover05)),
('HT0.5U', htunder05t, htunder05a, len(new_htunder05), round(len(new_htunder05)/len(htunder05df)*100,2), np.sum(new_htunder05['HT0.5UP'])-len(new_htunder05)),
('HT1.5O', htover15t, htover15a, len(new_htover15), round(len(new_htover15)/len(htover15df)*100,2), np.sum(new_htover15['HT1.5OP'])-len(new_htover15)),
('HT1.5U', htunder15t, htunder15a, len(new_htunder15), round(len(new_htunder15)/len(htunder15df)*100,2), np.sum(new_htunder15['HT1.5UP'])-len(new_htunder15))
]

# Create a DataFrame from the results
results_df = pd.DataFrame(results, columns=['Prediction', 'Threshold', 'J-Stat', 'Games', 'Games%', 'Profit'])
results_df['ROI'] = round(results_df['Profit'] / results_df['Games'] * 100, 2)
print('Number of matches: ', len(final_df_unique))
results_df

Number of matches:  3742


Unnamed: 0,Prediction,Threshold,J-Stat,Games,Games%,Profit,ROI
0,FT1,59.23,0.17,849,40.07,11.29,1.33
1,FTX,51.02,0.06,41,16.67,7.95,19.39
2,FT2,49.87,0.1,682,49.46,-60.7,-8.9
3,FT1X,79.03,0.15,1147,70.15,-24.5,-2.14
4,FT12,75.59,0.07,632,57.25,-21.27,-3.37
5,FTX2,80.19,0.11,531,52.94,-28.49,-5.37
6,1.5O,76.26,0.07,1634,47.47,-90.12,-5.52
7,1.5U,30.21,-1.0,43,14.33,1.56,3.63
8,2.5O,64.74,0.06,751,42.43,-34.47,-4.59
9,2.5U,7.67,-1.0,32,1.62,-2.62,-8.19


# Testing Best / Most Profitable Model Predictions

In [37]:
# Select columns from predictions table
predictions = ['FT1_x', 'FTX_x', 'FT2_x', 'DC1X_x', 'DC12_x', 'DCX2_x', 
               '1.5O_x', '2.5O_x', '3.5U_x', '4.5U_x', 'BTTS_x', 
               'HT1_x', 'HTX_x', 'HT2_x', 'HTDC1X', 'HTDC12', 'HTDCX2', 
               'HT0.5O_x', 'HT1.5U_x']

# Select columns from betting odds table
results = ['FT1', 'FTX', 'FT2', 'FT1X', 'FT12', 'FTX2', 
           '1.5O', '2.5O', '3.5U', '4.5U', 'BTTS',
            'HT1', 'HTX', 'HT2', 'HT1X', 'HT12', 'HTX2', 
            'HT0.5O', 'df2_HT1.5U']

# Select columns ending with 'P' (profit columns)
profits = ['FT1P', 'FTXP', 'FT2P', 'FT1XP', 'FT12P', 'FTX2P', 
           '1.5OP', '2.5OP', '3.5UP', '4.5UP', 'BTTSP',
            'HT1P', 'HTXP', 'HT2P', 'HT1XP', 'HT12P', 'HTX2P', 
            'HT0.5OP', 'HT1.5UP']

bet, percentage, profit = [], [], []

for i in range(len(final_df_unique)):
    my_list = []
    for j in predictions:
        my_list.append(final_df_unique[j].iloc[i])
    percentage.append(max(my_list))
    max_index = my_list.index(max(my_list))
    bet.append(results[max_index])
    profit_column = profits[max_index]
    profit.append(final_df_unique[profit_column].iloc[i])

# Create a DataFrame
model_recs = pd.DataFrame({
    'League': final_df_unique['League_x'],
    'Home': final_df_unique['Home'],
    'Away': final_df_unique['Away'],
    'BET': bet,
    'Percentage': percentage,
    'Profit': profit
})


print('Matches found: ', len(final_df_unique))
print(f"Correct Predictions: {len(model_recs[model_recs['Profit'] > 0])/len(model_recs)*100}")
print(f"Profit: {round(sum(model_recs['Profit']) - len(model_recs),2)} ROI: {round((sum(model_recs['Profit']) - len(model_recs)) / len(model_recs) * 100, 2)}%")
model_recs.tail()

Matches found:  3742
Correct Predictions: 83.61838588989845
Profit: -153.79 ROI: -4.11%


Unnamed: 0,League,Home,Away,BET,Percentage,Profit
3779,Mexico2,Mazatlan,Cruz Azul,4.5U,96.07,1.11
3780,Scotland,Dundee FC,St. Johnstone,HT1X,88.15,1.25
3781,Scotland,Ross County,Kilmarnock,4.5U,88.14,1.07
3782,Switzerland,FC Basel,Sion,FT1X,89.03,1.18
3783,Switzerland,Lausanne Sport,Servette,1.5O,83.23,0.0


In [40]:
final_df_unique['OTTS_x'] = 100 - final_df_unique['BTTS_x']
final_df_unique['1.5U_x'] = 100 - final_df_unique['1.5O_x']
final_df_unique['2.5U_x'] = 100 - final_df_unique['2.5O_x']
final_df_unique['3.5O_x'] = 100 - final_df_unique['3.5U_x']
final_df_unique['4.5O_x'] = 100 - final_df_unique['4.5U_x']
final_df_unique['HT0.5U_x'] = 100 - final_df_unique['HT0.5O_x']
final_df_unique['HT1.5O_x'] = 100 - final_df_unique['HT1.5U_x'] 

# Select columns starting with 'df1_'
predictions = ['FT1_x', 'FTX_x', 'FT2_x', 'DC1X_x', 'DC12_x', 'DCX2_x', 
               '1.5O_x', '1.5U_x', '2.5O_x','2.5U_x','3.5O_x', '3.5U_x', 
               '4.5O_x', '4.5U_x', 'BTTS_x', 'OTTS_x',
               'HT1_x', 'HTX_x', 'HT2_x', 'HTDC1X', 'HTDC12', 'HTDCX2', 
               'HT0.5O_x', 'HT0.5U_x', 'HT1.5O_x', 'HT1.5U_x']

# Select columns starting with 'df2_'
odds = ['FT1_y', 'FTX_y', 'FT2_y', 'DC1X_y', 'DC12_y', 'DCX2_y', 
           '1.5O_y', '1.5U', '2.5O_y','2.5U', '3.5O','3.5U_y', 
           '4.5O', '4.5U_y', 'BTTS_y', 'OTTS',
            'HT1_y', 'HTX_y', 'HT2_y', 'HT1X', 'HT12', 'HTX2', 
            'HT0.5O_y', 'HT0.5U', 'HT1.5O', 'HT1.5U_y']

# Select columns ending with 'P'
profit = ['FT1P', 'FTXP', 'FT2P', 'FT1XP', 'FT12P', 'FTX2P', 
           '1.5OP', '1.5UP', '2.5OP', '2.5UP', '3.5OP', '3.5UP', '4.5OP', '4.5UP', 
           'BTTSP', 'OTTSP', 'HT1P', 'HTXP', 'HT2P', 'HT1XP', 'HT12P', 'HTX2P', 
            'HT0.5OP', 'HT0.5UP', 'HT1.5OP','HT1.5UP']

bets, percentages, profits, difference = [], [], [], [] 
for i in range(len(final_df_unique)):
    my_list = []
    valid_indices = []  # To keep track of indices where profit is not negative
    for j in range(len(predictions)):
        pred_column = predictions[j]
        odds_column = odds[j]
        profit_column = profit[j]  # Corresponding profit column
        
        # Calculate the value
        my_value = (100 / final_df_unique[pred_column].iloc[i]) - final_df_unique[odds_column].iloc[i]
        
        # Only add to the list if the corresponding profit is non-negative
        if final_df_unique[profit_column].iloc[i] >= 0:
            my_list.append(my_value)
            valid_indices.append(j)
        else:
            my_list.append(float('-inf'))  # Set to negative infinity to ignore in max()
    
    if valid_indices:  # Ensure there is at least one valid index
        max_index = my_list.index(max(my_list))
        rec_bet = profit[max_index]
        percent_bet = predictions[max_index]
        bets.append(rec_bet)
        percentages.append(final_df_unique[percent_bet].iloc[i])
        rec_profit = profit[max_index]
        profits.append(final_df_unique[rec_profit].iloc[i])
        difference.append(round(max(my_list), 2))
    else:
        # Handle case where no valid profits are found for this match
        bets.append(None)
        percentages.append(None)
        profits.append(None)
        difference.append(None)

# Create a DataFrame
model_recs = pd.DataFrame({
    'League': final_df_unique['League_x'],
    'Home': final_df_unique['Home'],
    'Away': final_df_unique['Away'],
    'BET': bets,
    'Percentage': percentages,
    'Profit': profits,
    'Difference': difference
}).dropna()  # Drop rows with None values

print('Matches found: ', len(final_df_unique))
print(f"Correct Predictions: {len(model_recs[model_recs['Profit'] > 0])/len(model_recs)*100}")
print(f"Profit: {round(sum(model_recs['Profit']) - len(model_recs), 2)} ROI: {round((sum(model_recs['Profit']) - len(model_recs)) / len(model_recs) * 100, 2)}%")
model_recs.tail()

Matches found:  3742
Correct Predictions: 67.79796900053448
Profit: -146.46 ROI: -3.91%


Unnamed: 0,League,Home,Away,BET,Percentage,Profit,Difference
3779,Mexico2,Mazatlan,Cruz Azul,FT2P,37.81,0.0,0.89
3780,Scotland,Dundee FC,St. Johnstone,3.5UP,51.12,1.33,0.63
3781,Scotland,Ross County,Kilmarnock,HT0.5OP,53.05,1.5,0.39
3782,Switzerland,FC Basel,Sion,3.5UP,51.22,0.0,0.51
3783,Switzerland,Lausanne Sport,Servette,3.5UP,60.33,1.4,0.26


# Combining Different Bets

In [None]:
# Define the list of base columns and multiplier columns
base_columns = ['FT1_x', 'FTX_x', 'FT2_x']  
multiplier_columns = ['1.5O_x', '2.5O_x', 'df1_2.5U', '3.5U_x', '4.5U_x']

# Nested loop: Iterate over each base column and multiplier column
for base_column in base_columns:
    for multiplier in multiplier_columns:
        # Create a new column name dynamically
        new_column = f"{base_column}/{multiplier.split('_')[1]}"
        
        # Perform the calculation and assign it to the new column
        final_df_unique[new_column] = round(final_df_unique[base_column] * final_df_unique[multiplier] / 100, 2)

final_df_unique.tail()

Unnamed: 0,df1_League,Home,Away,df1_FT1,df1_FTX,df1_FT2,df1_FTR,df1_DC1X,df1_DC12,df1_DCX2,df1_1.5O,df1_2.5O,df1_3.5U,df1_4.5U,df1_BTTS,df1_HT1,df1_HTX,df1_HT2,df1_HTR,df1_HTDC1X,df1_HTDC12,df1_HTDCX2,df1_HT0.5O,df1_HT1.5U,df1_H0.5O,df1_A0.5O,df1_H1.5O,df1_A1.5O,df1_H2.5U,df1_A2.5U,df2_Home,df2_Away,df2_FT1,df2_FTX,df2_FT2,df2_DC1X,df2_DC12,df2_DCX2,df2_HT1,df2_HTX,df2_HT2,df2_HT1X,df2_HT12,df2_HTX2,df2_BTTS,df2_OTTS,df2_1.5O,df2_1.5U,df2_2.5O,df2_2.5U,df2_3.5O,df2_3.5U,df2_4.5O,df2_4.5U,df2_HT0.5O,df2_HT0.5U,df2_HT1.5O,df2_HT1.5U,Date,League,FT,HT,FTHG,FTAG,FTTG,HTHG,HTAG,HTTG,FT1,FTX,FT2,FT1X,FT12,FTX2,1.5O,1.5U,2.5O,2.5U,3.5O,3.5U,4.5O,4.5U,BTTS,OTTS,HT1,HTX,HT2,HT1X,HT12,HTX2,HT0.5O,HT0.5U,HT1.5O,HT1.5U,FT1P,FTXP,FT2P,FT1XP,FT12P,FTX2P,1.5OP,1.5UP,2.5OP,2.5UP,3.5OP,3.5UP,4.5OP,4.5UP,BTTSP,OTTSP,HT1P,HTXP,HT2P,HT1XP,HT12P,HTX2P,HT0.5OP,HT0.5UP,HT1.5OP,HT1.5UP,df1_OTTS,df1_1.5U,df1_2.5U,df1_3.5O,df1_4.5O,df1_HT0.5U,df1_HT1.5O,df1_FT1/1.5O,df1_FT1/2.5O,df1_FT1/2.5U,df1_FT1/3.5U,df1_FT1/4.5U,df1_FTX/1.5O,df1_FTX/2.5O,df1_FTX/2.5U,df1_FTX/3.5U,df1_FTX/4.5U,df1_FT2/1.5O,df1_FT2/2.5O,df1_FT2/2.5U,df1_FT2/3.5U,df1_FT2/4.5U
2489,UEL,PAOK,FCSB,47.21,27.96,24.82,1-0,75.17,72.03,52.78,65.28,38.53,81.34,92.42,43.5,20.1,57.22,22.65,0-0,77.32,42.75,79.87,58.04,67.84,73.83,58.9,38.75,22.36,84.76,93.89,PAOK,FCSB,1.67,4.0,4.75,1.18,1.22,2.15,2.3,2.2,5.5,1.13,1.62,1.57,1.95,1.8,1.3,3.5,2.01,1.89,3.5,1.3,6.5,1.11,1.4,2.75,3.0,1.36,10/3/2024,UEL,0 - 1,(0-1),0.0,1.0,1.0,0.0,1.0,1.0,0,0,1,0,1,1,0,1,0,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0.0,-1.0,-1.0,0.0,-1.0,-1.0,0.0,-1.0,-1.0,1.89,-1.0,1.3,-1.0,1.11,-1.0,1.8,-1.0,0.0,-1.0,-1.0,-1.0,1.57,1.4,-1.0,-1.0,1.36,56.5,34.72,61.47,18.66,7.58,41.96,32.16,30.82,18.19,29.02,38.4,43.63,18.25,10.77,17.19,22.74,25.84,16.2,9.56,15.26,20.19,22.94
2490,Scotland2,Falkirk,Ayr Utd,1.21,1.89,66.58,1-7,3.1,67.79,68.47,69.52,68.92,2.5,6.33,54.44,0.15,0.46,11.99,0-4,0.61,12.14,12.45,12.57,0.24,54.5,69.6,31.37,69.08,55.93,2.46,Falkirk,Ayr,1.85,3.6,4.0,1.22,1.29,1.83,2.4,2.2,4.33,1.2,1.62,1.53,1.73,2.0,1.25,3.75,1.85,2.0,3.0,1.36,5.5,1.13,1.36,3.0,2.75,1.4,2024-10-05 00:00:00,Scotland2,2 - 0,(1-0),2.0,0.0,2.0,1.0,0.0,1.0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,-1.0,-1.0,0.0,-1.0,-1.0,0.0,1.25,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,0.0,-1.0,0.0,0.0,-1.0,45.56,30.48,31.08,97.5,93.67,87.43,99.76,0.84,0.83,0.38,0.03,0.08,1.31,1.3,0.59,0.05,0.12,46.29,45.89,20.69,1.66,4.21
2491,Scotland2,Queens Park,Hamilton,0.69,1.15,56.34,1-8,1.84,57.03,57.49,58.1,57.79,1.38,3.78,45.44,0.07,0.26,8.59,0-4,0.33,8.66,8.85,8.9,0.15,45.47,58.15,26.13,57.89,46.75,1.29,Queen's Park,Hamilton,2.15,3.3,3.4,1.3,1.33,1.67,2.63,2.25,3.5,1.29,1.57,1.44,1.57,2.25,1.2,4.33,1.73,2.08,2.63,1.44,4.5,1.17,1.33,3.25,2.63,1.44,2024-10-05 00:00:00,Scotland2,1 - 0,(0-0),1.0,0.0,1.0,0.0,0.0,0.0,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,-1.0,-1.0,0.0,-1.0,-1.0,0.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,-1.0,2.25,-1.0,-1.0,0.0,-1.0,-1.0,1.44,-1.0,3.25,0.0,-1.0,54.56,41.9,42.21,98.62,96.22,91.1,99.85,0.4,0.4,0.29,0.01,0.03,0.67,0.66,0.49,0.02,0.04,32.73,32.56,23.78,0.78,2.13
2492,Scotland3,Inverness,Stenhousemuir,0.64,1.27,69.37,1-7,1.91,70.01,70.64,71.04,70.15,3.6,8.67,47.88,0.11,0.42,14.77,0-4,0.53,14.88,15.19,15.25,0.38,47.94,71.21,21.88,70.63,63.95,2.7,Inverness,Stenhousemuir,1.95,3.5,3.25,1.25,1.22,1.7,2.6,2.1,4.0,1.22,1.67,1.5,1.83,1.83,1.3,3.4,2.0,1.85,3.4,1.3,6.5,1.1,1.4,2.75,3.0,1.36,2024-09-28 00:00:00,Scotland3,0 - 0,(0-0),0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,-1.0,-1.0,0.0,-1.0,-1.0,1.7,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,-1.0,1.83,-1.0,-1.0,0.0,-1.0,-1.0,1.5,-1.0,2.75,0.0,-1.0,52.12,28.96,29.85,96.4,91.33,84.75,99.62,0.45,0.45,0.19,0.02,0.06,0.9,0.89,0.38,0.05,0.11,49.28,48.66,20.71,2.5,6.01
2493,Scotland3,Dumbarton,Montrose,0.37,0.74,55.74,1-8,1.11,56.11,56.48,56.76,56.4,1.61,4.31,40.02,0.05,0.21,9.83,0-4,0.26,9.88,10.04,10.07,0.2,40.04,56.82,19.55,56.59,49.77,1.2,Dumbarton,Montrose,2.3,3.3,2.6,1.4,1.25,1.5,3.0,2.1,3.25,1.33,1.67,1.36,1.73,2.0,1.29,3.5,1.9,1.95,3.25,1.33,5.5,1.13,1.4,2.75,2.75,1.4,2024-09-21 00:00:00,Scotland3,0 - 1,(0-1),0.0,1.0,1.0,0.0,1.0,1.0,0,0,1,0,1,1,0,1,0,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,-1.0,-1.0,2.6,-1.0,-1.0,1.5,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,-1.0,2.0,-1.0,-1.0,3.25,-1.0,-1.0,1.36,-1.0,0.0,0.0,-1.0,59.98,43.24,43.6,98.39,95.69,89.93,99.8,0.21,0.21,0.16,0.01,0.02,0.42,0.42,0.32,0.01,0.03,31.64,31.44,24.3,0.9,2.4
