# Reading Model Predictions and Bet365 Odds

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import warnings

warnings.filterwarnings('ignore')
pd.options.mode.chained_assignment = None
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

csv_url = "https://docs.google.com/spreadsheets/d/1WfEG-1icUjj6k7TGePJQEXH-w0TLEIcN/export?format=csv"
uefa = pd.read_csv(csv_url, dtype={'FTHG': 'Int64', 'FTAG': 'Int64', 'HTHG': 'Int64', 'HTAG': 'Int64'})
uefa['FT'] = uefa['FTHG'].astype(str) + ' - ' + uefa['FTAG'].astype(str)
uefa['HT'] = '(' + uefa['HTHG'].astype(str) + '-' + uefa['HTAG'].astype(str) + ')'
uefa['FTTG'] = uefa['FTHG'] + uefa['FTAG']
uefa['HTTG'] = uefa['HTHG'] + uefa['HTAG']

predictions = pd.read_excel('C:/Users/99451/Desktop/MODEL/2025/dixon_coles_model_predictions/_predictions.xlsx')
bet365_odds = pd.read_excel('C:/Users/99451/Desktop/MODEL/2025/dixon_coles_model_predictions/final_odds.xlsx')
bet365_odds.tail()

Unnamed: 0,Home,Away,FT1,FTX,FT2,DC1X,DC12,DCX2,HT1,HTX,HT2,HT1X,HT12,HTX2,BTTS,OTTS,1.5O,1.5U,2.5O,2.5U,3.5O,3.5U,4.5O,4.5U,HT0.5O,HT0.5U,HT1.5O,HT1.5U
3137,Ajax,Lazio,2.25,3.6,3.0,1.36,1.29,1.62,2.88,2.3,3.4,1.3,1.57,1.4,1.57,2.25,1.2,4.5,1.67,2.2,2.63,1.5,4.5,1.2,1.33,3.25,2.5,1.5
3138,Maccabi Tel Aviv,RFS,1.55,4.2,6.0,1.13,1.22,2.38,2.1,2.3,6.0,1.11,1.57,1.67,1.91,1.91,1.25,4.0,1.84,2.06,3.0,1.4,5.5,1.14,1.36,3.0,2.63,1.44
3139,Real Sociedad,Dyn. Kyiv,1.27,6.0,11.0,1.05,1.13,3.75,1.73,2.5,10.0,1.03,1.5,2.0,2.38,1.53,1.25,4.0,1.8,2.0,3.0,1.4,5.5,1.14,1.33,3.25,2.63,1.44
3140,Elfsborg,Qarabag,2.55,3.5,2.63,1.44,1.29,1.5,3.0,2.38,3.1,1.36,1.53,1.36,1.5,2.5,1.17,5.0,1.57,2.38,2.38,1.57,4.33,1.22,1.29,3.5,2.38,1.53
3141,Bodo/Glimt,Besiktas,1.75,4.0,4.33,1.2,1.22,2.0,2.3,2.5,4.0,1.22,1.5,1.57,1.5,2.5,1.14,5.5,1.5,2.63,2.2,1.67,4.0,1.25,1.25,3.75,2.2,1.62


# Merging 2 DataFrames for similarity of values

In [2]:
from fuzzywuzzy import fuzz
from fuzzywuzzy import process

# Function for fuzzy matching on both columns
def fuzzy_merge_on_two_columns(df1, df2, key1_home, key1_away, key2_home, key2_away, threshold=80):
    """
    Merge two DataFrames based on fuzzy matching of both Home and Away columns.
    - df1, df2: DataFrames to merge
    - key1_home, key1_away: column names for 'Home' and 'Away' in df1
    - key2_home, key2_away: column names for 'Home' and 'Away' in df2
    - threshold: minimum similarity score for a match
    """
    matches = []
    
    for i, row1 in df1.iterrows():
        home_team1, away_team1 = row1[key1_home], row1[key1_away]
        
        # Find the best match in df2 for both Home and Away teams
        best_match = None
        best_score = 0
        
        for j, row2 in df2.iterrows():
            home_team2, away_team2 = row2[key2_home], row2[key2_away]
            
            # Calculate similarity for both Home and Away columns
            home_score = fuzz.ratio(home_team1, home_team2)
            away_score = fuzz.ratio(away_team1, away_team2)
            
            # Average similarity score for the pair
            avg_score = (home_score + away_score) / 2
            
            # Check if this is the best match
            if avg_score >= threshold and avg_score > best_score:
                best_match = j
                best_score = avg_score
        
        # If a match was found above the threshold, save the indices
        if best_match is not None:
            matches.append((i, best_match))

    # Create matched DataFrames based on indices
    matched_df1 = df1.loc[[i for i, _ in matches]].reset_index(drop=True)
    matched_df2 = df2.loc[[j for _, j in matches]].reset_index(drop=True)
    
    # Concatenate the matched data side by side
    return pd.concat([matched_df1, matched_df2], axis=1, keys=["df1", "df2"])

# Use the function to merge
merged_df = fuzzy_merge_on_two_columns(predictions, bet365_odds, 'Home', 'Away', 'Home', 'Away', threshold=80)
merged_df.tail()

Unnamed: 0_level_0,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2
Unnamed: 0_level_1,League,Home,Away,FT1,FTX,FT2,FTR,DC1X,DC12,DCX2,1.5O,2.5O,3.5U,4.5U,BTTS,HT1,HTX,HT2,HTR,HTDC1X,HTDC12,HTDCX2,HT0.5O,HT1.5U,H0.5O,A0.5O,H1.5O,A1.5O,H2.5U,A2.5U,Home,Away,FT1,FTX,FT2,DC1X,DC12,DCX2,HT1,HTX,HT2,HT1X,HT12,HTX2,BTTS,OTTS,1.5O,1.5U,2.5O,2.5U,3.5O,3.5U,4.5O,4.5U,HT0.5O,HT0.5U,HT1.5O,HT1.5U
2620,UFCL,Gent,TSC,63.83,23.11,13.05,1-0,86.94,76.88,36.16,66.82,40.41,79.92,91.62,37.07,39.27,59.29,1.42,0-0,98.56,40.69,60.71,42.23,88.62,81.59,45.55,50.44,12.45,75.92,97.6,Gent,TSC,1.33,5.0,8.5,1.08,1.17,3.25,1.8,2.75,7.0,1.1,1.4,1.91,1.73,2.0,1.13,6.0,1.44,2.7,2.1,1.67,3.5,1.29,1.25,3.75,2.1,1.67
2621,UFCL,Mlada Boleslav,Jagiellonia,19.51,25.47,55.01,0-1,44.98,74.52,80.48,67.67,41.41,79.15,91.17,43.08,0.0,80.52,19.48,0-0,80.52,19.48,100.0,19.48,97.97,54.98,78.56,19.06,45.55,95.28,79.87,Mlada Boleslav,Jagiellonia,2.15,3.5,3.2,1.36,1.29,1.67,2.75,2.3,3.5,1.29,1.57,1.4,1.53,2.38,1.18,4.5,1.65,2.2,2.5,1.5,4.5,1.18,1.3,3.4,2.5,1.5
2622,UFCL,Omonia,Rapid,27.87,28.02,44.11,0-1,55.89,71.98,72.13,65.55,38.99,81.0,92.23,44.55,0.0,58.49,41.49,0-0,58.49,41.49,99.98,41.49,89.86,61.71,72.39,24.95,36.86,92.69,86.01,Omonia,SK Rapid,2.9,3.4,2.35,1.57,1.3,1.4,3.5,2.2,3.0,1.36,1.62,1.29,1.67,2.1,1.25,3.75,1.85,2.0,3.0,1.36,6.0,1.13,1.36,3.0,2.75,1.4
2623,UFCL,Pafos,Celje,78.18,4.93,4.03,5-1,83.11,82.21,8.96,86.65,85.06,6.06,13.48,71.09,25.21,8.72,6.3,4-2,33.93,31.51,15.02,39.75,0.15,86.83,71.35,85.0,44.38,7.38,65.8,Pafos,Celje,1.53,4.33,5.75,1.14,1.2,2.38,2.05,2.5,5.5,1.14,1.5,1.73,1.67,2.1,1.17,5.0,1.57,2.35,2.38,1.53,4.33,1.2,1.29,3.5,2.38,1.53
2624,UFCL,TNS,Panathinaikos,28.39,52.29,19.32,0-0,80.68,47.71,71.61,18.84,4.66,99.11,99.86,10.4,2.56,22.66,66.37,0-2,25.22,68.93,89.03,81.76,19.56,36.87,28.39,7.83,4.48,98.85,99.52,TNS,Panathinaikos,9.5,4.75,1.33,3.25,1.18,1.07,8.0,2.5,1.8,1.91,1.5,1.06,2.0,1.73,1.2,4.33,1.67,2.15,2.63,1.44,5.0,1.17,1.3,3.4,2.5,1.5


# Scraping SoccerStats For Match Results

In [4]:
final =  pd.DataFrame()
liqa = ''
unique_leagues = predictions['League'].unique().tolist()

# Convert to lowercase and exclude UEFA competitions
uefa_list = ['unl', 'uel', 'ucl', 'ufcl']
unique_leagues = [league.lower() for league in unique_leagues if league.lower() not in uefa_list]

for i in unique_leagues:
    URL = "https://www.soccerstats.com/results.asp?league=" + i + "&pmtype=bydate"
    page = requests.get(URL)
    liqa = i
    soup = BeautifulSoup(page.content, "html.parser")
    results = soup.find(id="btable")
    sth = results.find_all("tr", class_="odd")
    sth


    date, league, home, away, ft, ht = [], [], [], [], [],[]
    for i in sth:
        date.append(i.find_all("td", align = 'right')[0].get_text(strip=True))
        league.append(liqa.capitalize())
        home.append(i.find_all("td", align = 'right')[1].get_text(strip=True))
        away.append(i.find("td", align = "left").get_text(strip = True))
        ft.append(i.find_all("td", align = 'center')[0].get_text(strip = True))
        try:
            ht.append(i.find_all("td", align = 'center')[2].get_text(strip = True))
        except IndexError as e:
            ht.append('NA')#print("Last output before error occurred:", i.find_all("td", align = 'center'))

    data = {'Date': date, 'League': league,'Home': home, 'Away': away, 'FT': ft, 'HT': ht}

# Create a DataFrame from the dictionary
    df = pd.DataFrame(data)

# Replace empty strings with NaN
    #next_df = df[(df['Date'] == formatted_date) & (df['HT'] == '')]
    df.replace('', pd.NA, inplace=True)

# Drop rows with NaN values
    df_cleaned = df.dropna()

#For Half-Time Results
    hthg, htag = [], []
    for i in df_cleaned['HT']:
        if i == 'NA':
            hthg.append('NA')
            htag.append('NA')
        elif i == '+' or i == '-':
            hthg.append('NA')
            htag.append('NA')
        else:
            try:
                hthg.append(int(i[1]))
                htag.append(int(i[3]))
            except IndexError as e:
                print("Last output before error occurred:", i)



#For Full-Time Results
    hg, ag, tg = [], [], []
    for i in df_cleaned['FT']:
        if len(i) < 5 or ':' in i:
            hg.append('NA')
            ag.append('NA')
            tg.append('NA')
        else:
            try:
                hghg = int(i.split(' - ')[0])
                hg.append(hghg)
                agag = int(i.split(' - ')[1])
                ag.append(agag)
                tg.append(hghg + agag)
            except:
                print(hghg + agag)

    
    df_cleaned['FTHG'], df_cleaned['FTAG'], df_cleaned['FTTG'] = hg, ag, tg
    df_cleaned['HTHG'], df_cleaned['HTAG'] = hthg, htag
    df_cleaned['HTTG'] = df_cleaned['HTHG'] + df_cleaned['HTAG']
    
    final = pd.concat([final, df_cleaned], ignore_index=True)
    
final = final[final['HT'] != 'NA']

# Example: Fix the 'date' column by removing the weekday and extra punctuation
final['Date'] = final['Date'].str.extract(r'(\d{1,2} \w{3})')  # Extract day and month part
final['Date'] = final['Date'] + ' 2024'  # Append the year

# Convert to datetime format
final['Date'] = pd.to_datetime(final['Date'], format='%d %b %Y', errors='coerce')

# Filter rows before September 6th, 2024
final_filtered = final[final['Date'] >= pd.Timestamp('2024-09-17')]

# Align columns of uefa to match final_filtered
uefa = uefa[final_filtered.columns]

# Concatenate
final_filtered = pd.concat([uefa, final_filtered], ignore_index=True)

combined = pd.concat([final_filtered.head(), final_filtered.tail()])

combined

Unnamed: 0,Date,League,Home,Away,FT,HT,FTHG,FTAG,FTTG,HTHG,HTAG,HTTG
0,9/17/2024,UCL,Juventus,PSV,3 - 1,(2-0),3,1,4,2,0,2
1,9/17/2024,UCL,Young Boys,Aston Villa,0 - 3,(0-2),0,3,3,0,2,2
2,9/17/2024,UCL,Bayern,Dinamo Zagreb,9 - 2,(3-0),9,2,11,3,0,3
3,9/17/2024,UCL,Milan,Liverpool,1 - 3,(1-2),1,3,4,1,2,3
4,9/17/2024,UCL,Real Madrid,Stuttgart,3 - 1,(0-0),3,1,4,0,0,0
6630,2024-12-08 00:00:00,Portugal2,Pacos Ferreira,Portimonense,0 - 1,(0-0),0,1,1,0,0,0
6631,2024-12-08 00:00:00,Portugal2,Oliveirense,Alverca,1 - 4,(1-3),1,4,5,1,3,4
6632,2024-12-08 00:00:00,Portugal2,Mafra,Maritimo,2 - 3,(1-1),2,3,5,1,1,2
6633,2024-12-08 00:00:00,Portugal2,Chaves,FC Porto B,2 - 1,(2-0),2,1,3,2,0,2
6634,2024-12-08 00:00:00,Portugal2,Vizela,Tondela,1 - 1,(0-0),1,1,2,0,0,0


# Merging with Predictions + Odds Dataframes

In [7]:
# Flatten multi-level columns
merged_df.columns = ['_'.join(col).strip() if isinstance(col, tuple) else col for col in merged_df.columns]

# Rename two columns
merged_df = merged_df.rename(columns={'df1_Home': 'Home', 'df1_Away': 'Away'})

# Merge on multiple columns
final_df = pd.merge(merged_df, final_filtered, on=['Home', 'Away'], how='inner')

# Drop duplicates based on the 'Home' and 'Away' columns
final_df_unique = final_df.drop_duplicates(subset=['Home', 'Away'])
final_df_unique = final_df_unique.dropna()

# Reset the index afterward
final_df_unique.reset_index(drop=True, inplace=True)

print('Number of games matched: ', len(final_df_unique))
final_df_unique.tail()

Number of games matched:  2532


Unnamed: 0,df1_League,Home,Away,df1_FT1,df1_FTX,df1_FT2,df1_FTR,df1_DC1X,df1_DC12,df1_DCX2,df1_1.5O,df1_2.5O,df1_3.5U,df1_4.5U,df1_BTTS,df1_HT1,df1_HTX,df1_HT2,df1_HTR,df1_HTDC1X,df1_HTDC12,df1_HTDCX2,df1_HT0.5O,df1_HT1.5U,df1_H0.5O,df1_A0.5O,df1_H1.5O,df1_A1.5O,df1_H2.5U,df1_A2.5U,df2_Home,df2_Away,df2_FT1,df2_FTX,df2_FT2,df2_DC1X,df2_DC12,df2_DCX2,df2_HT1,df2_HTX,df2_HT2,df2_HT1X,df2_HT12,df2_HTX2,df2_BTTS,df2_OTTS,df2_1.5O,df2_1.5U,df2_2.5O,df2_2.5U,df2_3.5O,df2_3.5U,df2_4.5O,df2_4.5U,df2_HT0.5O,df2_HT0.5U,df2_HT1.5O,df2_HT1.5U,Date,League,FT,HT,FTHG,FTAG,FTTG,HTHG,HTAG,HTTG
2527,England2,Sheffield Wed,Blackburn,29.67,31.01,39.33,1-1,60.68,69.0,70.34,70.2,42.3,78.45,90.76,49.6,16.34,55.86,27.78,0-0,72.2,44.12,83.64,49.74,85.11,66.27,72.34,29.61,36.8,90.31,86.05,Sheffield Wed,Blackburn,2.2,3.5,3.1,1.36,1.29,1.67,2.88,2.1,3.75,1.25,1.67,1.4,1.8,1.91,1.3,3.5,2.0,1.85,3.5,1.3,6.5,1.11,1.4,2.75,3.0,1.36,2024-12-10 00:00:00,England2,0 - 1,(0-0),0,1,1,0,0,0
2528,England2,Sunderland,Bristol City,63.67,24.74,11.58,1-0,88.41,75.25,36.32,71.32,44.09,77.01,89.89,41.02,45.29,50.12,4.54,0-0,95.41,49.83,54.66,52.51,82.95,83.35,47.67,53.51,13.78,73.24,97.18,Sunderland,Bristol City,1.9,3.4,4.1,1.22,1.3,1.91,2.6,2.05,4.75,1.17,1.73,1.5,1.91,1.8,1.36,3.2,2.1,1.73,4.0,1.25,8.0,1.08,1.44,2.63,3.25,1.33,2024-12-10 00:00:00,England2,1 - 1,(0-0),1,1,2,0,0,0
2529,England2,Leeds Utd,Middlesbrough,61.27,24.53,14.19,2-0,85.8,75.46,38.72,75.94,49.95,72.01,86.69,48.01,42.9,32.71,23.93,1-0,75.61,66.83,56.64,81.39,50.12,84.57,55.2,55.74,19.23,71.19,95.2,Leeds,Middlesbrough,1.83,3.7,4.1,1.22,1.29,1.91,2.4,2.25,4.5,1.17,1.57,1.53,1.73,2.0,1.25,4.0,1.85,2.0,3.2,1.36,5.5,1.14,1.36,3.0,2.63,1.44,2024-12-10 00:00:00,England2,3 - 1,(1-0),3,1,4,1,0,1
2530,England5,Solihull Moors,Aldershot Town,60.0,18.76,21.1,2-1,78.76,81.1,39.86,91.56,77.17,41.83,61.35,72.13,41.75,32.27,25.69,1-0,74.02,67.44,57.96,79.63,57.0,92.19,77.81,72.5,44.5,52.65,80.51,Solihull Moors,Aldershot,1.85,3.7,3.9,1.25,1.29,1.91,2.4,2.38,4.0,1.22,1.53,1.53,1.57,2.25,1.18,4.5,1.62,2.25,2.5,1.5,4.5,1.18,1.3,3.4,2.5,1.5,2024-12-10 00:00:00,England5,2 - 1,(1-0),2,1,3,1,0,1
2531,England5,Wealdstone,Altrincham,29.18,28.48,42.34,1-1,57.66,71.52,70.82,69.89,43.21,77.73,90.33,49.01,28.44,39.82,31.68,0-0,68.26,60.12,71.5,69.93,69.53,65.6,73.81,28.89,38.72,90.69,84.79,Wealdstone,Altrincham,2.8,3.4,2.38,1.53,1.3,1.4,3.5,2.2,3.1,1.36,1.62,1.29,1.7,2.05,1.29,3.5,1.9,1.9,3.25,1.33,6.5,1.11,1.4,2.75,2.75,1.4,2024-12-10 00:00:00,England5,3 - 3,(2-2),3,3,6,2,2,4


# Creating Results Columns

In [8]:
import numpy as np

# Add the new columns based on the condition
final_df_unique['FT1'] = np.where(final_df_unique['FTHG'] > final_df_unique['FTAG'], 1, 0)
final_df_unique['FTX'] = np.where(final_df_unique['FTHG'] == final_df_unique['FTAG'], 1, 0)
final_df_unique['FT2'] = np.where(final_df_unique['FTHG'] < final_df_unique['FTAG'], 1, 0)

final_df_unique['FT1X'] = np.where(final_df_unique['FTHG'] >= final_df_unique['FTAG'], 1, 0)
final_df_unique['FT12'] = np.where(final_df_unique['FTX'] == 0, 1, 0)
final_df_unique['FTX2'] = np.where(final_df_unique['FTHG'] <= final_df_unique['FTAG'], 1, 0)

final_df_unique['1.5O'] = np.where(final_df_unique['FTTG'] > 1.5, 1, 0)
final_df_unique['1.5U'] = np.where(final_df_unique['FTTG'] < 1.5, 1, 0)
final_df_unique['2.5O'] = np.where(final_df_unique['FTTG'] > 2.5, 1, 0)
final_df_unique['2.5U'] = np.where(final_df_unique['FTTG'] < 2.5, 1, 0)
final_df_unique['3.5O'] = np.where(final_df_unique['FTTG'] > 3.5, 1, 0)
final_df_unique['3.5U'] = np.where(final_df_unique['FTTG'] < 3.5, 1, 0)
final_df_unique['4.5O'] = np.where(final_df_unique['FTTG'] > 4.5, 1, 0)
final_df_unique['4.5U'] = np.where(final_df_unique['FTTG'] < 4.5, 1, 0)

final_df_unique['BTTS'] = np.where((final_df_unique['FTHG'] != 0) & (final_df_unique['FTAG'] != 0), 1, 0)
final_df_unique['OTTS'] = np.where(final_df_unique['BTTS'] == 0, 1, 0)

final_df_unique['HT1'] = np.where(final_df_unique['HTHG'] > final_df_unique['HTAG'], 1, 0)
final_df_unique['HTX'] = np.where(final_df_unique['HTHG'] == final_df_unique['HTAG'], 1, 0)
final_df_unique['HT2'] = np.where(final_df_unique['HTHG'] < final_df_unique['HTAG'], 1, 0)

final_df_unique['HT1X'] = np.where(final_df_unique['HTHG'] >= final_df_unique['HTAG'], 1, 0)
final_df_unique['HT12'] = np.where(final_df_unique['HTX'] == 0, 1, 0)
final_df_unique['HTX2'] = np.where(final_df_unique['HTHG'] <= final_df_unique['HTAG'], 1, 0)

final_df_unique['HT0.5O'] = np.where(final_df_unique['HTTG'] > 0.5, 1, 0)
final_df_unique['HT0.5U'] = np.where(final_df_unique['HTTG'] < 0.5, 1, 0)
final_df_unique['HT1.5O'] = np.where(final_df_unique['HTTG'] > 1.5, 1, 0)
final_df_unique['HT1.5U'] = np.where(final_df_unique['HTTG'] < 1.5, 1, 0)

print('Games Found: ', len(final_df_unique))
final_df_unique.tail()

Games Found:  2532


Unnamed: 0,df1_League,Home,Away,df1_FT1,df1_FTX,df1_FT2,df1_FTR,df1_DC1X,df1_DC12,df1_DCX2,df1_1.5O,df1_2.5O,df1_3.5U,df1_4.5U,df1_BTTS,df1_HT1,df1_HTX,df1_HT2,df1_HTR,df1_HTDC1X,df1_HTDC12,df1_HTDCX2,df1_HT0.5O,df1_HT1.5U,df1_H0.5O,df1_A0.5O,df1_H1.5O,df1_A1.5O,df1_H2.5U,df1_A2.5U,df2_Home,df2_Away,df2_FT1,df2_FTX,df2_FT2,df2_DC1X,df2_DC12,df2_DCX2,df2_HT1,df2_HTX,df2_HT2,df2_HT1X,df2_HT12,df2_HTX2,df2_BTTS,df2_OTTS,df2_1.5O,df2_1.5U,df2_2.5O,df2_2.5U,df2_3.5O,df2_3.5U,df2_4.5O,df2_4.5U,df2_HT0.5O,df2_HT0.5U,df2_HT1.5O,df2_HT1.5U,Date,League,FT,HT,FTHG,FTAG,FTTG,HTHG,HTAG,HTTG,FT1,FTX,FT2,FT1X,FT12,FTX2,1.5O,1.5U,2.5O,2.5U,3.5O,3.5U,4.5O,4.5U,BTTS,OTTS,HT1,HTX,HT2,HT1X,HT12,HTX2,HT0.5O,HT0.5U,HT1.5O,HT1.5U
2527,England2,Sheffield Wed,Blackburn,29.67,31.01,39.33,1-1,60.68,69.0,70.34,70.2,42.3,78.45,90.76,49.6,16.34,55.86,27.78,0-0,72.2,44.12,83.64,49.74,85.11,66.27,72.34,29.61,36.8,90.31,86.05,Sheffield Wed,Blackburn,2.2,3.5,3.1,1.36,1.29,1.67,2.88,2.1,3.75,1.25,1.67,1.4,1.8,1.91,1.3,3.5,2.0,1.85,3.5,1.3,6.5,1.11,1.4,2.75,3.0,1.36,2024-12-10 00:00:00,England2,0 - 1,(0-0),0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1
2528,England2,Sunderland,Bristol City,63.67,24.74,11.58,1-0,88.41,75.25,36.32,71.32,44.09,77.01,89.89,41.02,45.29,50.12,4.54,0-0,95.41,49.83,54.66,52.51,82.95,83.35,47.67,53.51,13.78,73.24,97.18,Sunderland,Bristol City,1.9,3.4,4.1,1.22,1.3,1.91,2.6,2.05,4.75,1.17,1.73,1.5,1.91,1.8,1.36,3.2,2.1,1.73,4.0,1.25,8.0,1.08,1.44,2.63,3.25,1.33,2024-12-10 00:00:00,England2,1 - 1,(0-0),1,1,2,0,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1
2529,England2,Leeds Utd,Middlesbrough,61.27,24.53,14.19,2-0,85.8,75.46,38.72,75.94,49.95,72.01,86.69,48.01,42.9,32.71,23.93,1-0,75.61,66.83,56.64,81.39,50.12,84.57,55.2,55.74,19.23,71.19,95.2,Leeds,Middlesbrough,1.83,3.7,4.1,1.22,1.29,1.91,2.4,2.25,4.5,1.17,1.57,1.53,1.73,2.0,1.25,4.0,1.85,2.0,3.2,1.36,5.5,1.14,1.36,3.0,2.63,1.44,2024-12-10 00:00:00,England2,3 - 1,(1-0),3,1,4,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,1
2530,England5,Solihull Moors,Aldershot Town,60.0,18.76,21.1,2-1,78.76,81.1,39.86,91.56,77.17,41.83,61.35,72.13,41.75,32.27,25.69,1-0,74.02,67.44,57.96,79.63,57.0,92.19,77.81,72.5,44.5,52.65,80.51,Solihull Moors,Aldershot,1.85,3.7,3.9,1.25,1.29,1.91,2.4,2.38,4.0,1.22,1.53,1.53,1.57,2.25,1.18,4.5,1.62,2.25,2.5,1.5,4.5,1.18,1.3,3.4,2.5,1.5,2024-12-10 00:00:00,England5,2 - 1,(1-0),2,1,3,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1
2531,England5,Wealdstone,Altrincham,29.18,28.48,42.34,1-1,57.66,71.52,70.82,69.89,43.21,77.73,90.33,49.01,28.44,39.82,31.68,0-0,68.26,60.12,71.5,69.93,69.53,65.6,73.81,28.89,38.72,90.69,84.79,Wealdstone,Altrincham,2.8,3.4,2.38,1.53,1.3,1.4,3.5,2.2,3.1,1.36,1.62,1.29,1.7,2.05,1.29,3.5,1.9,1.9,3.25,1.33,6.5,1.11,1.4,2.75,2.75,1.4,2024-12-10 00:00:00,England5,3 - 3,(2-2),3,3,6,2,2,4,0,1,0,1,0,1,1,0,1,0,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,0


# Creating Profit Columns for Initial Model Predictions

In [9]:
ft1p, ftxp, ft2p, ft1xp, ft12p, ftx2p = [], [], [], [], [], []
over15, under15, over25, under25, over35, under35, over45, under45 = [], [], [], [], [], [], [], []
btts, otts, ht1p, htxp, ht2p, ht1xp, ht12p, htx2p = [], [], [], [], [], [], [], []
htover05, htunder05, htover15, htunder15 = [], [], [], []

#if prediction == result -> coefficient, elif prediction != result -> 0, else -> -1
for i in range(len(final_df_unique)):
    ft_list = [final_df_unique['df1_FT1'].iloc[i], final_df_unique['df1_FTX'].iloc[i], final_df_unique['df1_FT2'].iloc[i]]
    if (ft_list.index(max(ft_list)) == 0) and (final_df_unique['FT1'].iloc[i] == 1):
        ft1p.append(final_df_unique['df2_FT1'].iloc[i])
    elif (ft_list.index(max(ft_list)) == 0) and (final_df_unique['FT1'].iloc[i] != 1):
        ft1p.append(0)
    else:
        ft1p.append(-1)
    
    if (ft_list.index(max(ft_list)) == 1) and (final_df_unique['FTX'].iloc[i] == 1):
        ftxp.append(final_df_unique['df2_FTX'].iloc[i])
    elif (ft_list.index(max(ft_list)) == 1) and (final_df_unique['FTX'].iloc[i] != 1):
        ftxp.append(0)
    else:
        ftxp.append(-1)
    
    if (ft_list.index(max(ft_list)) == 2) and (final_df_unique['FT2'].iloc[i] == 1):
        ft2p.append(final_df_unique['df2_FT2'].iloc[i])
    elif (ft_list.index(max(ft_list)) == 2) and (final_df_unique['FT2'].iloc[i] != 1):
        ft2p.append(0)
    else:
        ft2p.append(-1)

    dc_list = [final_df_unique['df1_DC1X'].iloc[i], final_df_unique['df1_DC12'].iloc[i], final_df_unique['df1_DCX2'].iloc[i]]
    if (dc_list.index(max(dc_list)) == 0) and (final_df_unique['FT1X'].iloc[i] == 1):
        ft1xp.append(final_df_unique['df2_DC1X'].iloc[i])
    elif (dc_list.index(max(dc_list)) == 0) and (final_df_unique['FT1X'].iloc[i] != 1):
        ft1xp.append(0)
    else:
        ft1xp.append(-1)
    
    if (dc_list.index(max(dc_list)) == 1) and (final_df_unique['FT12'].iloc[i] == 1):
        ft12p.append(final_df_unique['df2_DC12'].iloc[i])
    elif (dc_list.index(max(dc_list)) == 1) and (final_df_unique['FT12'].iloc[i] != 1):
        ft12p.append(0)
    else:
        ft12p.append(-1)
    
    if (dc_list.index(max(dc_list)) == 2) and (final_df_unique['FTX2'].iloc[i] == 1):
        ftx2p.append(final_df_unique['df2_DCX2'].iloc[i])
    elif (dc_list.index(max(dc_list)) == 2) and (final_df_unique['FTX2'].iloc[i] != 1):
        ftx2p.append(0)
    else:
        ftx2p.append(-1)
    
    if (final_df_unique['df1_1.5O'].iloc[i] >= 50) and (final_df_unique['1.5O'].iloc[i] == 1):
        over15.append(final_df_unique['df2_1.5O'].iloc[i])
    elif (final_df_unique['df1_1.5O'].iloc[i] >= 50) and (final_df_unique['1.5O'].iloc[i] != 1):
        over15.append(0)
    else:
        over15.append(-1)
    
    if (final_df_unique['df1_1.5O'].iloc[i] < 50) and (final_df_unique['1.5O'].iloc[i] != 1):
        under15.append(final_df_unique['df2_1.5U'].iloc[i])
    elif (final_df_unique['df1_1.5O'].iloc[i] < 50) and (final_df_unique['1.5O'].iloc[i] == 1):
        under15.append(0)
    else:
        under15.append(-1)
    
    if (final_df_unique['df1_2.5O'].iloc[i] >= 50) and (final_df_unique['2.5O'].iloc[i] == 1):
        over25.append(final_df_unique['df2_2.5O'].iloc[i])
    elif (final_df_unique['df1_2.5O'].iloc[i] >= 50) and (final_df_unique['2.5O'].iloc[i] != 1):
        over25.append(0)
    else:
        over25.append(-1)
    
    if (final_df_unique['df1_2.5O'].iloc[i] < 50) and (final_df_unique['2.5O'].iloc[i] != 1):
        under25.append(final_df_unique['df2_2.5U'].iloc[i])
    elif (final_df_unique['df1_2.5O'].iloc[i] < 50) and (final_df_unique['2.5O'].iloc[i] == 1):
        under25.append(0)
    else:
        under25.append(-1)
    
    if (final_df_unique['df1_3.5U'].iloc[i] <= 50) and (final_df_unique['3.5O'].iloc[i] == 1):
        over35.append(final_df_unique['df2_3.5O'].iloc[i])
    elif (final_df_unique['df1_3.5U'].iloc[i] <= 50) and (final_df_unique['3.5O'].iloc[i] != 1):
        over35.append(0)
    else:
        over35.append(-1)
    
    if (final_df_unique['df1_3.5U'].iloc[i] > 50) and (final_df_unique['3.5O'].iloc[i] != 1):
        under35.append(final_df_unique['df2_3.5U'].iloc[i])
    elif (final_df_unique['df1_3.5U'].iloc[i] > 50) and (final_df_unique['3.5O'].iloc[i] == 1):
        under35.append(0)
    else:
        under35.append(-1)
    
    if (final_df_unique['df1_4.5U'].iloc[i] <= 50) and (final_df_unique['4.5O'].iloc[i] == 1):
        over45.append(final_df_unique['df2_4.5O'].iloc[i])
    elif (final_df_unique['df1_4.5U'].iloc[i] <= 50) and (final_df_unique['4.5O'].iloc[i] != 1):
        over45.append(0)
    else:
        over45.append(-1)
    
    if (final_df_unique['df1_4.5U'].iloc[i] > 50) and (final_df_unique['4.5O'].iloc[i] != 1):
        under45.append(final_df_unique['df2_4.5U'].iloc[i])
    elif (final_df_unique['df1_4.5U'].iloc[i] > 50) and (final_df_unique['4.5O'].iloc[i] == 1):
        under45.append(0)
    else:
        under45.append(-1)
    
    if (final_df_unique['df1_BTTS'].iloc[i] >= 50) and (final_df_unique['BTTS'].iloc[i] == 1):
        btts.append(final_df_unique['df2_BTTS'].iloc[i])
    elif (final_df_unique['df1_BTTS'].iloc[i] >= 50) and (final_df_unique['BTTS'].iloc[i] != 1):
        btts.append(0)
    else:
        btts.append(-1)
    
    if (final_df_unique['df1_BTTS'].iloc[i] < 50) and (final_df_unique['BTTS'].iloc[i] != 1):
        otts.append(final_df_unique['df2_OTTS'].iloc[i])
    elif (final_df_unique['df1_BTTS'].iloc[i] < 50) and (final_df_unique['BTTS'].iloc[i] == 1):
        otts.append(0)
    else:
        otts.append(-1)
    
    ht_list = [final_df_unique['df1_HT1'].iloc[i], final_df_unique['df1_HTX'].iloc[i], final_df_unique['df1_HT2'].iloc[i]]
    if (ht_list.index(max(ht_list)) == 0) and (final_df_unique['HT1'].iloc[i] == 1):
        ht1p.append(final_df_unique['df2_HT1'].iloc[i])
    elif (ht_list.index(max(ht_list)) == 0) and (final_df_unique['HT1'].iloc[i] != 1):
        ht1p.append(0)
    else:
        ht1p.append(-1)
    
    if (ht_list.index(max(ht_list)) == 1) and (final_df_unique['HTX'].iloc[i] == 1):
        htxp.append(final_df_unique['df2_HTX'].iloc[i])
    elif (ht_list.index(max(ht_list)) == 1) and (final_df_unique['HTX'].iloc[i] != 1):
        htxp.append(0)
    else:
        htxp.append(-1)
    
    if (ht_list.index(max(ht_list)) == 2) and (final_df_unique['HT2'].iloc[i] == 1):
        ht2p.append(final_df_unique['df2_HT2'].iloc[i])
    elif (ht_list.index(max(ht_list)) == 2) and (final_df_unique['HT2'].iloc[i] != 1):
        ht2p.append(0)
    else:
        ht2p.append(-1)
    
    htdc_list = [final_df_unique['df1_HTDC1X'].iloc[i], final_df_unique['df1_HTDC12'].iloc[i], final_df_unique['df1_HTDCX2'].iloc[i]]
    if (htdc_list.index(max(htdc_list)) == 0) and (final_df_unique['HT1X'].iloc[i] == 1):
        ht1xp.append(final_df_unique['df2_HT1X'].iloc[i])
    elif (htdc_list.index(max(htdc_list)) == 0) and (final_df_unique['HT1X'].iloc[i] != 1):
        ht1xp.append(0)
    else:
        ht1xp.append(-1)
    
    if (htdc_list.index(max(htdc_list)) == 1) and (final_df_unique['HT12'].iloc[i] == 1):
        ht12p.append(final_df_unique['df2_HT12'].iloc[i])
    elif (htdc_list.index(max(htdc_list)) == 1) and (final_df_unique['HT12'].iloc[i] != 1):
        ht12p.append(0)
    else:
        ht12p.append(-1)
    
    if (htdc_list.index(max(htdc_list)) == 2) and (final_df_unique['HTX2'].iloc[i] == 1):
        htx2p.append(final_df_unique['df2_HTX2'].iloc[i])
    elif (htdc_list.index(max(htdc_list)) == 2) and (final_df_unique['HTX2'].iloc[i] != 1):
        htx2p.append(0)
    else:
        htx2p.append(-1)
    
    if (final_df_unique['df1_HT0.5O'].iloc[i] >= 50) and (final_df_unique['HT0.5O'].iloc[i] == 1):
        htover05.append(final_df_unique['df2_HT0.5O'].iloc[i])
    elif (final_df_unique['df1_HT0.5O'].iloc[i] >= 50) and (final_df_unique['HT0.5O'].iloc[i] != 1):
        htover05.append(0)
    else:
        htover05.append(-1)
    
    if (final_df_unique['df1_HT0.5O'].iloc[i] < 50) and (final_df_unique['HT0.5O'].iloc[i] != 1):
        htunder05.append(final_df_unique['df2_HT0.5U'].iloc[i])
    elif (final_df_unique['df1_HT0.5O'].iloc[i] < 50) and (final_df_unique['HT0.5O'].iloc[i] == 1):
        htunder05.append(0)
    else:
        htunder05.append(-1)
    
    if (final_df_unique['df1_HT1.5U'].iloc[i] < 50) and (final_df_unique['HT1.5O'].iloc[i] == 1):
        htover15.append(final_df_unique['df2_HT1.5O'].iloc[i])
    elif (final_df_unique['df1_HT1.5U'].iloc[i] < 50) and (final_df_unique['HT1.5O'].iloc[i] != 1):
        htover15.append(0)
    else:
        htover15.append(-1)
    
    if (final_df_unique['df1_HT1.5U'].iloc[i] >= 50) and (final_df_unique['HT1.5O'].iloc[i] != 1):
        htunder15.append(final_df_unique['df2_HT1.5U'].iloc[i])
    elif (final_df_unique['df1_HT1.5U'].iloc[i] >= 50) and (final_df_unique['HT1.5O'].iloc[i] == 1):
        htunder15.append(0)
    else:
        htunder15.append(-1)

final_df_unique['FT1P'], final_df_unique['FTXP'], final_df_unique['FT2P'] = ft1p, ftxp, ft2p
final_df_unique['FT1XP'], final_df_unique['FT12P'], final_df_unique['FTX2P'] = ft1xp, ft12p, ftx2p
final_df_unique['1.5OP'], final_df_unique['1.5UP'], final_df_unique['2.5OP'], final_df_unique['2.5UP'] = over15, under15, over25, under25
final_df_unique['3.5OP'], final_df_unique['3.5UP'], final_df_unique['4.5OP'], final_df_unique['4.5UP'] = over35, under35, over45, under45
final_df_unique['BTTSP'], final_df_unique['OTTSP'] = btts, otts
final_df_unique['HT1P'], final_df_unique['HTXP'], final_df_unique['HT2P'] = ht1p, htxp, ht2p
final_df_unique['HT1XP'], final_df_unique['HT12P'], final_df_unique['HTX2P'] = ht1xp, ht12p, htx2p
final_df_unique['HT0.5OP'], final_df_unique['HT0.5UP'] = htover05, htunder05
final_df_unique['HT1.5OP'], final_df_unique['HT1.5UP'] = htover15, htunder15

print('Games Found: ', len(final_df_unique))
final_df_unique.tail()

Games Found:  2532


Unnamed: 0,df1_League,Home,Away,df1_FT1,df1_FTX,df1_FT2,df1_FTR,df1_DC1X,df1_DC12,df1_DCX2,df1_1.5O,df1_2.5O,df1_3.5U,df1_4.5U,df1_BTTS,df1_HT1,df1_HTX,df1_HT2,df1_HTR,df1_HTDC1X,df1_HTDC12,df1_HTDCX2,df1_HT0.5O,df1_HT1.5U,df1_H0.5O,df1_A0.5O,df1_H1.5O,df1_A1.5O,df1_H2.5U,df1_A2.5U,df2_Home,df2_Away,df2_FT1,df2_FTX,df2_FT2,df2_DC1X,df2_DC12,df2_DCX2,df2_HT1,df2_HTX,df2_HT2,df2_HT1X,df2_HT12,df2_HTX2,df2_BTTS,df2_OTTS,df2_1.5O,df2_1.5U,df2_2.5O,df2_2.5U,df2_3.5O,df2_3.5U,df2_4.5O,df2_4.5U,df2_HT0.5O,df2_HT0.5U,df2_HT1.5O,df2_HT1.5U,Date,League,FT,HT,FTHG,FTAG,FTTG,HTHG,HTAG,HTTG,FT1,FTX,FT2,FT1X,FT12,FTX2,1.5O,1.5U,2.5O,2.5U,3.5O,3.5U,4.5O,4.5U,BTTS,OTTS,HT1,HTX,HT2,HT1X,HT12,HTX2,HT0.5O,HT0.5U,HT1.5O,HT1.5U,FT1P,FTXP,FT2P,FT1XP,FT12P,FTX2P,1.5OP,1.5UP,2.5OP,2.5UP,3.5OP,3.5UP,4.5OP,4.5UP,BTTSP,OTTSP,HT1P,HTXP,HT2P,HT1XP,HT12P,HTX2P,HT0.5OP,HT0.5UP,HT1.5OP,HT1.5UP
2527,England2,Sheffield Wed,Blackburn,29.67,31.01,39.33,1-1,60.68,69.0,70.34,70.2,42.3,78.45,90.76,49.6,16.34,55.86,27.78,0-0,72.2,44.12,83.64,49.74,85.11,66.27,72.34,29.61,36.8,90.31,86.05,Sheffield Wed,Blackburn,2.2,3.5,3.1,1.36,1.29,1.67,2.88,2.1,3.75,1.25,1.67,1.4,1.8,1.91,1.3,3.5,2.0,1.85,3.5,1.3,6.5,1.11,1.4,2.75,3.0,1.36,2024-12-10 00:00:00,England2,0 - 1,(0-0),0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,-1.0,-1.0,3.1,-1.0,-1.0,1.67,0.0,-1.0,-1.0,1.85,-1.0,1.3,-1.0,1.11,-1.0,1.91,-1.0,2.1,-1.0,-1.0,-1.0,1.4,-1.0,2.75,-1.0,1.36
2528,England2,Sunderland,Bristol City,63.67,24.74,11.58,1-0,88.41,75.25,36.32,71.32,44.09,77.01,89.89,41.02,45.29,50.12,4.54,0-0,95.41,49.83,54.66,52.51,82.95,83.35,47.67,53.51,13.78,73.24,97.18,Sunderland,Bristol City,1.9,3.4,4.1,1.22,1.3,1.91,2.6,2.05,4.75,1.17,1.73,1.5,1.91,1.8,1.36,3.2,2.1,1.73,4.0,1.25,8.0,1.08,1.44,2.63,3.25,1.33,2024-12-10 00:00:00,England2,1 - 1,(0-0),1,1,2,0,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0.0,-1.0,-1.0,1.22,-1.0,-1.0,1.36,-1.0,-1.0,1.73,-1.0,1.25,-1.0,1.08,-1.0,0.0,-1.0,2.05,-1.0,1.17,-1.0,-1.0,0.0,-1.0,-1.0,1.33
2529,England2,Leeds Utd,Middlesbrough,61.27,24.53,14.19,2-0,85.8,75.46,38.72,75.94,49.95,72.01,86.69,48.01,42.9,32.71,23.93,1-0,75.61,66.83,56.64,81.39,50.12,84.57,55.2,55.74,19.23,71.19,95.2,Leeds,Middlesbrough,1.83,3.7,4.1,1.22,1.29,1.91,2.4,2.25,4.5,1.17,1.57,1.53,1.73,2.0,1.25,4.0,1.85,2.0,3.2,1.36,5.5,1.14,1.36,3.0,2.63,1.44,2024-12-10 00:00:00,England2,3 - 1,(1-0),3,1,4,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1.83,-1.0,-1.0,1.22,-1.0,-1.0,1.25,-1.0,-1.0,0.0,-1.0,0.0,-1.0,1.14,-1.0,0.0,2.4,-1.0,-1.0,1.17,-1.0,-1.0,1.36,-1.0,-1.0,1.44
2530,England5,Solihull Moors,Aldershot Town,60.0,18.76,21.1,2-1,78.76,81.1,39.86,91.56,77.17,41.83,61.35,72.13,41.75,32.27,25.69,1-0,74.02,67.44,57.96,79.63,57.0,92.19,77.81,72.5,44.5,52.65,80.51,Solihull Moors,Aldershot,1.85,3.7,3.9,1.25,1.29,1.91,2.4,2.38,4.0,1.22,1.53,1.53,1.57,2.25,1.18,4.5,1.62,2.25,2.5,1.5,4.5,1.18,1.3,3.4,2.5,1.5,2024-12-10 00:00:00,England5,2 - 1,(1-0),2,1,3,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1.85,-1.0,-1.0,-1.0,1.29,-1.0,1.18,-1.0,1.62,-1.0,0.0,-1.0,-1.0,1.18,1.57,-1.0,2.4,-1.0,-1.0,1.22,-1.0,-1.0,1.3,-1.0,-1.0,1.5
2531,England5,Wealdstone,Altrincham,29.18,28.48,42.34,1-1,57.66,71.52,70.82,69.89,43.21,77.73,90.33,49.01,28.44,39.82,31.68,0-0,68.26,60.12,71.5,69.93,69.53,65.6,73.81,28.89,38.72,90.69,84.79,Wealdstone,Altrincham,2.8,3.4,2.38,1.53,1.3,1.4,3.5,2.2,3.1,1.36,1.62,1.29,1.7,2.05,1.29,3.5,1.9,1.9,3.25,1.33,6.5,1.11,1.4,2.75,2.75,1.4,2024-12-10 00:00:00,England5,3 - 3,(2-2),3,3,6,2,2,4,0,1,0,1,0,1,1,0,1,0,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,0,-1.0,-1.0,0.0,-1.0,0.0,-1.0,1.29,-1.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,2.2,-1.0,-1.0,-1.0,1.29,1.4,-1.0,-1.0,0.0


# Checking For ROI of Profit Columns

In [10]:
# Select columns that end with 'P'
columns = [col for col in final_df_unique.columns if col.endswith('P')]

# Initialize lists to store results and games
results, games_list = [],  []

for col in columns:
    my_df = final_df_unique[final_df_unique[col] >= 0]
    numerator = np.sum(my_df[col]) - len(my_df)
    result = round(numerator / len(my_df) * 100, 2)
    
    # Append results and games to respective lists
    results.append(result)
    games_list.append(len(my_df))

# Convert results to a DataFrame with an additional column for Games
results_df = pd.DataFrame({
    'Column': columns,
    'ROI': results,
    'Games': games_list
})
results_df

Unnamed: 0,Column,ROI,Games
0,FT1P,-5.61,1450
1,FTXP,-3.0,230
2,FT2P,-15.01,852
3,FT1XP,-3.08,1174
4,FT12P,-5.99,707
5,FTX2P,-8.16,651
6,1.5OP,-5.66,2284
7,1.5UP,-3.81,248
8,2.5OP,-4.98,1184
9,2.5UP,-5.01,1348


# ROI of Profit Columns According To Leagues

In [11]:
# Step 1: Filter leagues with at least 10 games
league_counts = final_df_unique['League'].value_counts()
leagues_with_10_games = league_counts[league_counts >= 10].index
filtered_df = final_df_unique[final_df_unique['League'].isin(leagues_with_10_games)]

# Group by 'League' and calculate results for each group
grouped_results = {}
for league, group in filtered_df.groupby('League'):
    group_results = {}
    for col in columns:
        my_df = group[group[col] >= 0]
        numerator = np.sum(my_df[col]) - len(my_df)
        group_results[col] = round(numerator / len(my_df) * 100, 2)
    # Add the number of games for this league
    group_results['Games'] = round(len(group),2)
    grouped_results[league] = group_results

# Convert grouped results to a DataFrame for better visualization
grouped_results_df = pd.DataFrame(grouped_results).T

# Define a function to apply conditional formatting
def highlight_positive(val):
    # Highlight background to red if the value is positive
    color = 'background-color: red' if isinstance(val, (int, float)) and val > 0 else ''
    return color

# Apply the function to the DataFrame
styled_df = (
    grouped_results_df.style
    .applymap(highlight_positive)
    .format("{:.2f}")  # Format only numeric columns, excluding 'Games'
)

# Save the styled DataFrame to Excel
styled_df.to_excel("ROI_leagues.xlsx", index=True)

# Display the styled DataFrame
styled_df

Unnamed: 0,FT1P,FTXP,FT2P,FT1XP,FT12P,FTX2P,1.5OP,1.5UP,2.5OP,2.5UP,3.5OP,3.5UP,4.5OP,4.5UP,BTTSP,OTTSP,HT1P,HTXP,HT2P,HT1XP,HT12P,HTX2P,HT0.5OP,HT0.5UP,HT1.5OP,HT1.5UP,Games
Argentina,-15.51,21.12,-70.79,-10.1,2.0,-17.12,-12.16,10.88,-22.67,1.55,,1.85,,-1.25,-41.5,-9.42,-23.82,17.5,-100.0,-3.48,,-1.56,-1.59,34.96,-8.33,-8.98,130.0
Austria,-28.26,-100.0,6.8,-10.37,13.41,-32.8,-1.78,300.0,15.77,-17.77,-2.92,12.8,-100.0,2.22,-15.93,-7.0,-59.64,0.25,64.17,1.1,,7.05,-4.38,21.5,2.6,17.32,52.0
Belgium,2.61,18.89,-53.95,8.45,-7.64,-23.94,0.91,-59.38,-4.12,-10.19,-33.0,2.72,-100.0,-1.19,-25.26,1.65,12.0,-5.93,8.5,-3.53,-70.0,-4.71,-2.23,-21.55,-74.0,-12.77,77.0
Brazil,-22.69,-100.0,-17.67,-4.39,2.31,-5.7,-4.65,175.0,-3.96,-12.17,,-4.46,,-1.31,0.31,-20.74,-17.08,4.38,,-6.77,,6.48,-1.29,41.29,-100.0,-9.69,105.0
Denmark,7.81,75.0,-57.17,10.53,-22.62,-11.25,-1.24,,-9.97,-2.0,0.44,-10.0,33.33,-1.7,7.15,-1.62,45.28,-5.0,-64.44,11.96,-24.25,-0.19,0.52,50.0,23.79,11.19,46.0
England,-1.63,-8.33,-35.97,3.41,24.29,-7.11,-4.43,-100.0,-5.17,-1.79,-23.78,-4.57,-100.0,-2.73,9.31,-17.32,-29.5,14.58,24.25,-4.4,27.14,-15.79,-0.83,-14.29,3.38,3.11,76.0
England2,-9.68,10.0,-37.19,5.18,-2.12,-2.97,-13.96,29.63,-19.54,7.95,-83.0,-1.05,-100.0,2.61,-21.34,0.0,10.46,-13.07,-77.27,-7.95,,-13.63,-9.78,15.94,15.67,1.3,121.0
England3,20.36,-2.0,-12.77,-10.78,-2.42,-9.97,-5.49,-10.88,8.19,10.46,-41.29,-3.57,-58.33,-5.44,0.65,6.52,-13.53,-27.02,-0.03,-6.22,9.0,-9.56,-2.83,-13.46,-7.56,-5.1,127.0
England4,-37.42,118.33,-3.04,-13.24,-9.3,-1.08,-11.86,0.47,-12.1,-1.64,9.58,1.82,116.67,2.72,-18.22,-14.07,-25.7,-6.7,-58.67,-16.22,-26.8,-21.17,-12.39,-12.4,-13.17,-2.43,112.0
England5,-10.94,-100.0,-24.56,-1.73,-11.89,-15.0,-2.34,-59.38,-6.02,-28.84,-60.54,-12.68,-100.0,-2.9,-13.5,-28.37,2.8,-19.7,-1.07,-7.25,-33.0,-9.36,-3.88,-56.69,-39.0,-8.41,116.0


# Creating Optimum Threshold for Each Prediction Column

In [12]:
# Assuming `df` is your DataFrame and it contains the columns for percentages and correctness
def calculate_threshold(percentages, predictions):
    # Ensure inputs are pandas Series
    percentages = pd.Series(percentages)
    predictions = pd.Series(predictions)
    
    thresholds = percentages.unique()
    best_threshold = 0
    best_j_stat = -np.inf  # Start with negative infinity for comparison
    
    for threshold in thresholds:
        # Predict 1s based on the threshold
        predicted_1s = (percentages >= threshold).astype(int)
        
        # Calculate true positives, true negatives, false positives, false negatives
        true_positives = ((predicted_1s == 1) & (predictions == 1)).sum()
        true_negatives = ((predicted_1s == 0) & (predictions == 0)).sum()
        false_positives = ((predicted_1s == 1) & (predictions == 0)).sum()
        false_negatives = ((predicted_1s == 0) & (predictions == 1)).sum()
        
        # Calculate Sensitivity (Recall) and Specificity
        sensitivity = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
        specificity = true_negatives / (true_negatives + false_positives) if (true_negatives + false_positives) > 0 else 0
        
        # Calculate Youden's J Statistic
        j_stat = sensitivity + specificity - 1
        
        # Update best threshold if J statistic improves
        if j_stat > best_j_stat:
            best_j_stat = j_stat
            best_threshold = threshold
    
    return best_threshold, round(best_j_stat, 2)

# Select only numeric columns
numeric_columns = final_df_unique.select_dtypes(include=[np.number])

# Remove rows where any numeric value is greater than 100
final_df_unique = final_df_unique[(numeric_columns <= 100).all(axis=1)]

#Selecting dataframes with model predictions
ft1df = final_df_unique[final_df_unique['df1_FT1'] >= final_df_unique[['df1_FTX', 'df1_FT2']].max(axis=1)]
ftxdf = final_df_unique[final_df_unique['df1_FTX'] >= final_df_unique[['df1_FT1', 'df1_FT2']].max(axis=1)]
ft2df = final_df_unique[final_df_unique['df1_FT2'] >= final_df_unique[['df1_FTX', 'df1_FT1']].max(axis=1)]
dc1xdf = final_df_unique[final_df_unique['df1_DC1X'] >= final_df_unique[['df1_DC12', 'df1_DCX2']].max(axis=1)]
dc12df = final_df_unique[final_df_unique['df1_DC12'] >= final_df_unique[['df1_DC1X', 'df1_DCX2']].max(axis=1)]
dcx2df = final_df_unique[final_df_unique['df1_DCX2'] >= final_df_unique[['df1_DC1X', 'df1_DC12']].max(axis=1)]
over15df, under15df = final_df_unique[final_df_unique['df1_1.5O'] >= 50], final_df_unique[final_df_unique['df1_1.5O'] < 50]
over25df, under25df = final_df_unique[final_df_unique['df1_2.5O'] >= 50], final_df_unique[final_df_unique['df1_2.5O'] < 50]
over35df, under35df = final_df_unique[final_df_unique['df1_3.5U'] < 50], final_df_unique[final_df_unique['df1_3.5U'] >= 50]
over45df, under45df = final_df_unique[final_df_unique['df1_4.5U'] < 50], final_df_unique[final_df_unique['df1_4.5U'] >= 50]
bttsdf, ottsdf = final_df_unique[final_df_unique['df1_BTTS'] >= 50], final_df_unique[final_df_unique['df1_BTTS'] < 50]
ht1df = final_df_unique[final_df_unique['df1_HT1'] >= final_df_unique[['df1_HTX', 'df1_HT2']].max(axis=1)]
htxdf = final_df_unique[final_df_unique['df1_HTX'] >= final_df_unique[['df1_HT1', 'df1_HT2']].max(axis=1)]
ht2df = final_df_unique[final_df_unique['df1_HT2'] >= final_df_unique[['df1_HT1', 'df1_HTX']].max(axis=1)]
ht1xdf = final_df_unique[final_df_unique['df1_HTDC1X'] >= final_df_unique[['df1_HTDC12', 'df1_HTDCX2']].max(axis=1)]
ht12df = final_df_unique[final_df_unique['df1_HTDC12'] >= final_df_unique[['df1_HTDC1X', 'df1_HTDCX2']].max(axis=1)]
htx2df = final_df_unique[final_df_unique['df1_HTDCX2'] >= final_df_unique[['df1_HTDC1X', 'df1_HTDC12']].max(axis=1)]
htover05df, htunder05df = final_df_unique[final_df_unique['df1_HT0.5O'] >= 50], final_df_unique[final_df_unique['df1_HT0.5O'] < 50]
htover15df, htunder15df = final_df_unique[final_df_unique['df1_HT1.5U'] < 50], final_df_unique[final_df_unique['df1_HT1.5U'] >= 50]

ft1t, ft1a = calculate_threshold(ft1df['df1_FT1'], ft1df['FT1'])
ftxt, ftxa = calculate_threshold(ftxdf['df1_FTX'], ftxdf['FTX'])
ft2t, ft2a = calculate_threshold(ft2df['df1_FT2'], ft2df['FT2'])
ft1xt, ft1xa = calculate_threshold(dc1xdf['df1_DC1X'], dc1xdf['FT1X'])
ft12t, ft12a = calculate_threshold(dc12df['df1_DC12'], dc12df['FT12'])
ftx2t, ftx2a = calculate_threshold(dcx2df['df1_DCX2'], dcx2df['FTX2'])
over15t, over15a = calculate_threshold(over15df['df1_1.5O'], over15df['1.5O'])
under15t, under15a = calculate_threshold(under15df['df1_1.5O'], under15df['1.5U'])
over25t, over25a = calculate_threshold(over25df['df1_2.5O'], over25df['2.5O'])
under25t, under25a = calculate_threshold(under25df['df1_2.5O'], under25df['2.5U'])
over35t, over35a = calculate_threshold(over35df['df1_3.5U'], over35df['3.5O'])
under35t, under35a = calculate_threshold(under35df['df1_3.5U'], under35df['3.5U'])
over45t, over45a = calculate_threshold(over45df['df1_4.5U'], over45df['4.5O'])
under45t, under45a = calculate_threshold(under45df['df1_4.5U'], under45df['4.5U'])
bttst, bttsa = calculate_threshold(bttsdf['df1_BTTS'], bttsdf['BTTS'])
ottst, ottsa = calculate_threshold(ottsdf['df1_BTTS'], ottsdf['OTTS'])
ht1t, ht1a = calculate_threshold(ht1df['df1_HT1'], ht1df['HT1'])
htxt, htxa = calculate_threshold(htxdf['df1_HTX'], htxdf['HTX'])
ht2t, ht2a = calculate_threshold(ht2df['df1_HT2'], ht2df['HT2'])
ht1xt, ht1xa = calculate_threshold(ht1xdf['df1_HTDC1X'], ht1xdf['HT1X'])
ht12t, ht12a = calculate_threshold(ht12df['df1_HTDC12'], ht12df['HT12'])
htx2t, htx2a = calculate_threshold(htx2df['df1_HTDCX2'], htx2df['HTX2'])
htover05t, htover05a = calculate_threshold(htover05df['df1_HT0.5O'], htover05df['HT0.5O'])
htunder05t, htunder05a = calculate_threshold(htunder05df['df1_HT0.5O'], htunder05df['HT0.5U'])
htover15t, htover15a = calculate_threshold(htover15df['df1_HT1.5U'], htover15df['HT1.5O'])
htunder15t, htunder15a = calculate_threshold(htunder15df['df1_HT1.5U'], htunder15df['HT1.5U'])

new_ft1df, new_ftxdf, new_ft2df = ft1df[ft1df['df1_FT1'] >= ft1t],ftxdf[ftxdf['df1_FTX'] >= ftxt],ft2df[ft2df['df1_FT2'] >= ft2t]
new_ft1xdf, new_ft12df, new_ftx2df = dc1xdf[dc1xdf['df1_DC1X'] >= ft1xt],dc12df[dc12df['df1_DC12'] >= ft12t],dcx2df[dcx2df['df1_DCX2'] >= ftx2t] 
new_over15, new_under15 = over15df[over15df['df1_1.5O'] >= over15t], under15df[under15df['df1_1.5O'] <= under15t]
new_over25, new_under25 = over25df[over25df['df1_2.5O'] >= over25t], under25df[under25df['df1_2.5O'] <= under25t]
new_over35, new_under35 = over35df[over35df['df1_3.5U'] <= over35t], under35df[under35df['df1_3.5U'] >= under35t]
new_over45, new_under45 = over45df[over45df['df1_4.5U'] <= over45t], under45df[under45df['df1_4.5U'] >= under45t]
new_btts, new_otts = bttsdf[bttsdf['df1_BTTS'] >= bttst], ottsdf[ottsdf['df1_BTTS'] <= ottst]
new_ht1df, new_htxdf, new_ht2df = ht1df[ht1df['df1_HT1'] >= ht1t],htxdf[htxdf['df1_HTX'] >= htxt],ht2df[ht2df['df1_HT2'] >= ht2t]
new_ht1xdf, new_ht12df, new_htx2df = ht1xdf[ht1xdf['df1_HTDC1X'] >= ht1xt],ht12df[ht12df['df1_HTDC12'] >= ht12t],htx2df[htx2df['df1_HTDCX2'] >= htx2t]
new_htover05, new_htunder05 = htover05df[htover05df['df1_HT0.5O'] >= htover05t], htunder05df[htunder05df['df1_HT0.5O'] <= htunder05t]
new_htover15, new_htunder15 = htover15df[htover15df['df1_HT1.5U'] <= htover15t], htunder15df[htunder15df['df1_HT1.5U'] >= htunder15t]

# Store the results in a list
results = [
('FT1', ft1t, ft1a, len(new_ft1df), round(len(new_ft1df)/len(ft1df)*100,2), np.sum(new_ft1df['FT1P']) - len(new_ft1df)),
('FTX', ftxt, ftxa, len(new_ftxdf), round(len(new_ftxdf)/len(ftxdf)*100,2), np.sum(new_ftxdf['FTXP']) - len(new_ftxdf)),
('FT2', ft2t, ft2a, len(new_ft2df), round(len(new_ft2df)/len(ft2df)*100,2), np.sum(new_ft2df['FT2P']) - len(new_ft2df)),
('FT1X', ft1xt, ft1xa, len(new_ft1xdf), round(len(new_ft1xdf)/len(dc1xdf)*100,2), np.sum(new_ft1xdf['FT1XP']) - len(new_ft1xdf)),
('FT12', ft12t, ft12a, len(new_ft12df), round(len(new_ft12df)/len(dc12df)*100,2), np.sum(new_ft12df['FT12P']) - len(new_ft12df)),
('FTX2', ftx2t, ftx2a, len(new_ftx2df), round(len(new_ftx2df)/len(dcx2df)*100,2), np.sum(new_ftx2df['FTX2P']) - len(new_ftx2df)),
('1.5O', over15t, over15a, len(new_over15), round(len(new_over15)/len(over15df)*100,2), np.sum(new_over15['1.5OP'])-len(new_over15)),
('1.5U', under15t, under15a, len(new_under15), round(len(new_under15)/len(under15df)*100,2), np.sum(new_under15['1.5UP'])-len(new_under15)),
('2.5O', over25t, over25a, len(new_over25), round(len(new_over25)/len(over25df)*100,2), np.sum(new_over25['2.5OP'])-len(new_over25)),
('2.5U', under25t, under25a, len(new_under25), round(len(new_under25)/len(under25df)*100,2), np.sum(new_under25['2.5UP'])-len(new_under25)),
('3.5O', over35t, over35a, len(new_over35), round(len(new_over35)/len(over35df)*100,2), np.sum(new_over35['3.5OP'])-len(new_over35)),
('3.5U', under35t, under35a, len(new_under35), round(len(new_under35)/len(under35df)*100,2), np.sum(new_under35['3.5UP'])-len(new_under35)),
('4.5O', over45t, over45a, len(new_over45), round(len(new_over45)/len(over45df)*100,2), np.sum(new_over45['4.5OP'])-len(new_over45)),
('4.5U', under45t, under45a, len(new_under45), round(len(new_under45)/len(under45df)*100,2), np.sum(new_under45['4.5UP'])-len(new_under45)),
('BTTS', bttst, bttsa, len(new_btts), round(len(new_btts)/len(bttsdf)*100,2), np.sum(new_btts['BTTSP'])-len(new_btts)),
('OTTS', ottst, ottsa, len(new_otts), round(len(new_otts)/len(ottsdf)*100,2), np.sum(new_otts['OTTSP'])-len(new_otts)),
('HT1', ht1t, ht1a, len(new_ht1df), round(len(new_ht1df)/len(ht1df)*100,2), np.sum(new_ht1df['HT1P']) - len(new_ht1df)),
('HTX', htxt, htxa, len(new_htxdf), round(len(new_htxdf)/len(htxdf)*100,2), np.sum(new_htxdf['HTXP']) - len(new_htxdf)),
('HT2', ht2t, ht2a, len(new_ht2df), round(len(new_ht2df)/len(ht2df)*100,2), np.sum(new_ht2df['HT2P']) - len(new_ht2df)),
('HT1X', ht1xt, ht1xa, len(new_ht1xdf), round(len(new_ht1xdf)/len(ht1xdf)*100,2), np.sum(new_ht1xdf['HT1XP']) - len(new_ht1xdf)),
('HT12', ht12t, ht12a, len(new_ht12df), round(len(new_ht12df)/len(ht12df)*100,2), np.sum(new_ht12df['HT12P']) - len(new_ht12df)),
('HTX2', htx2t, htx2a, len(new_htx2df), round(len(new_htx2df)/len(htx2df)*100,2), np.sum(new_htx2df['HTX2P']) - len(new_htx2df)),
('HT0.5O', htover05t, htover05a, len(new_htover05), round(len(new_htover05)/len(htover05df)*100,2), np.sum(new_htover05['HT0.5OP'])-len(new_htover05)),
('HT0.5U', htunder05t, htunder05a, len(new_htunder05), round(len(new_htunder05)/len(htunder05df)*100,2), np.sum(new_htunder05['HT0.5UP'])-len(new_htunder05)),
('HT1.5O', htover15t, htover15a, len(new_htover15), round(len(new_htover15)/len(htover15df)*100,2), np.sum(new_htover15['HT1.5OP'])-len(new_htover15)),
('HT1.5U', htunder15t, htunder15a, len(new_htunder15), round(len(new_htunder15)/len(htunder15df)*100,2), np.sum(new_htunder15['HT1.5UP'])-len(new_htunder15))
]

# Create a DataFrame from the results
results_df = pd.DataFrame(results, columns=['Prediction', 'Threshold', 'J-Stat', 'Games', 'Games%', 'Profit'])
results_df['ROI'] = round(results_df['Profit'] / results_df['Games'] * 100, 2)
print('Number of matches: ', len(final_df_unique))
results_df

Number of matches:  2486


Unnamed: 0,Prediction,Threshold,J-Stat,Games,Games%,Profit,ROI
0,FT1,59.31,0.18,595,41.64,9.21,1.55
1,FTX,37.03,0.12,189,88.73,8.54,4.52
2,FT2,57.25,0.11,270,31.95,-34.35,-12.72
3,FT1X,87.6,0.15,430,37.62,-5.25,-1.22
4,FT12,77.4,0.02,329,46.53,-16.39,-4.98
5,FTX2,80.19,0.1,375,58.87,-25.39,-6.77
6,1.5O,77.58,0.08,1034,46.12,-50.49,-4.88
7,1.5U,35.13,0.03,61,25.0,-4.52,-7.41
8,2.5O,60.87,0.05,691,59.31,-29.38,-4.25
9,2.5U,49.43,0.01,1295,98.03,-73.31,-5.66


# Testing Best / Most Profitable Model Predictions

In [20]:
# Select columns starting with 'df1_'
predictions = ['df1_FT1', 'df1_FTX', 'df1_FT2', 'df1_DC1X', 'df1_DC12', 'df1_DCX2', 
               'df1_1.5O', 'df1_2.5O', 'df1_3.5U', 'df1_4.5U', 'df1_BTTS', 
               'df1_HT1', 'df1_HTX', 'df1_HT2', 'df1_HTDC1X', 'df1_HTDC12', 'df1_HTDCX2', 
               'df1_HT0.5O', 'df1_HT1.5U']

# Select columns starting with 'df2_'
results = ['FT1', 'FTX', 'FT2', 'FT1X', 'FT12', 'FTX2', 
           '1.5O', '2.5O', '3.5U', '4.5U', 'BTTS',
            'HT1', 'HTX', 'HT2', 'HT1X', 'HT12', 'HTX2', 
            'HT0.5O', 'df2_HT1.5U']

# Select columns ending with 'P'
profits = ['FT1P', 'FTXP', 'FT2P', 'FT1XP', 'FT12P', 'FTX2P', 
           '1.5OP', '2.5OP', '3.5UP', '4.5UP', 'BTTSP',
            'HT1P', 'HTXP', 'HT2P', 'HT1XP', 'HT12P', 'HTX2P', 
            'HT0.5OP', 'HT1.5UP']

bet, percentage, profit = [], [], []

for i in range(len(final_df_unique)):
    my_list = []
    for j in predictions:
        my_list.append(final_df_unique[j].iloc[i])
    percentage.append(max(my_list))
    max_index = my_list.index(max(my_list))
    bet.append(results[max_index])
    profit_column = profits[max_index]
    profit.append(final_df_unique[profit_column].iloc[i])

# Create a DataFrame
model_recs = pd.DataFrame({
    'League': final_df_unique['df1_League'],
    'Home': final_df_unique['Home'],
    'Away': final_df_unique['Away'],
    'BET': bet,
    'Percentage': percentage,
    'Profit': profit
})


print('Matches found: ', len(final_df_unique))
print(f"Correct Predictions: {len(model_recs[model_recs['Profit'] > 0])/len(model_recs)*100}")
print(f"Profit: {round(sum(model_recs['Profit']) - len(model_recs),2)} ROI: {round((sum(model_recs['Profit']) - len(model_recs)) / len(model_recs) * 100, 2)}%")
model_recs.tail()

Matches found:  2486
Correct Predictions: 83.5880933226066
Profit: -85.08 ROI: -3.42%


Unnamed: 0,League,Home,Away,BET,Percentage,Profit
2527,England2,Sheffield Wed,Blackburn,4.5U,90.76,1.11
2528,England2,Sunderland,Bristol City,HT1X,95.41,1.17
2529,England2,Leeds Utd,Middlesbrough,4.5U,86.69,1.14
2530,England5,Solihull Moors,Aldershot Town,1.5O,91.56,1.18
2531,England5,Wealdstone,Altrincham,4.5U,90.33,0.0


In [27]:
final_df_unique['df1_OTTS'] = 100 - final_df_unique['df1_BTTS']
final_df_unique['df1_1.5U'] = 100 - final_df_unique['df1_1.5O']
final_df_unique['df1_2.5U'] = 100 - final_df_unique['df1_2.5O']
final_df_unique['df1_3.5O'] = 100 - final_df_unique['df1_3.5U']
final_df_unique['df1_4.5O'] = 100 - final_df_unique['df1_4.5U']
final_df_unique['df1_HT0.5U'] = 100 - final_df_unique['df1_HT0.5O']
final_df_unique['df1_HT1.5O'] = 100 - final_df_unique['df1_HT1.5U'] 

# Select columns starting with 'df1_'
predictions = ['df1_FT1', 'df1_FTX', 'df1_FT2', 'df1_DC1X', 'df1_DC12', 'df1_DCX2', 
               'df1_1.5O', 'df1_1.5U', 'df1_2.5O','df1_2.5U','df1_3.5O', 'df1_3.5U', 
               'df1_4.5O', 'df1_4.5U', 'df1_BTTS', 'df1_OTTS',
               'df1_HT1', 'df1_HTX', 'df1_HT2', 'df1_HTDC1X', 'df1_HTDC12', 'df1_HTDCX2', 
               'df1_HT0.5O', 'df1_HT0.5U', 'df1_HT1.5O', 'df1_HT1.5U']

# Select columns starting with 'df2_'
odds = ['df2_FT1', 'df2_FTX', 'df2_FT2', 'df2_DC1X', 'df2_DC12', 'df2_DCX2', 
           'df2_1.5O', 'df2_1.5U', 'df2_2.5O','df2_2.5U', 'df2_3.5O','df2_3.5U', 
           'df2_4.5O', 'df2_4.5U', 'df2_BTTS', 'df2_OTTS',
            'df2_HT1', 'df2_HTX', 'df2_HT2', 'df2_HT1X', 'df2_HT12', 'df2_HTX2', 
            'df2_HT0.5O', 'df2_HT0.5U', 'df2_HT1.5O', 'df2_HT1.5U']

# Select columns ending with 'P'
profit = ['FT1P', 'FTXP', 'FT2P', 'FT1XP', 'FT12P', 'FTX2P', 
           '1.5OP', '1.5UP', '2.5OP', '2.5UP', '3.5OP', '3.5UP', '4.5OP', '4.5UP', 
           'BTTSP', 'OTTSP', 'HT1P', 'HTXP', 'HT2P', 'HT1XP', 'HT12P', 'HTX2P', 
            'HT0.5OP', 'HT0.5UP', 'HT1.5OP','HT1.5UP']

bets, percentages, profits, difference = [], [], [], [] 
for i in range(len(final_df_unique)):
    my_list = []
    valid_indices = []  # To keep track of indices where profit is not negative
    for j in range(len(predictions)):
        pred_column = predictions[j]
        odds_column = odds[j]
        profit_column = profit[j]  # Corresponding profit column
        
        # Calculate the value
        my_value = (100 / final_df_unique[pred_column].iloc[i]) - final_df_unique[odds_column].iloc[i]
        
        # Only add to the list if the corresponding profit is non-negative
        if final_df_unique[profit_column].iloc[i] >= 0:
            my_list.append(my_value)
            valid_indices.append(j)
        else:
            my_list.append(float('-inf'))  # Set to negative infinity to ignore in max()
    
    if valid_indices:  # Ensure there is at least one valid index
        max_index = my_list.index(max(my_list))
        rec_bet = profit[max_index]
        percent_bet = predictions[max_index]
        bets.append(rec_bet)
        percentages.append(final_df_unique[percent_bet].iloc[i])
        rec_profit = profit[max_index]
        profits.append(final_df_unique[rec_profit].iloc[i])
        difference.append(round(max(my_list), 2))
    else:
        # Handle case where no valid profits are found for this match
        bets.append(None)
        percentages.append(None)
        profits.append(None)
        difference.append(None)

# Create a DataFrame
model_recs = pd.DataFrame({
    'League': final_df_unique['df1_League'],
    'Home': final_df_unique['Home'],
    'Away': final_df_unique['Away'],
    'BET': bets,
    'Percentage': percentages,
    'Profit': profits,
    'Difference': difference
}).dropna()  # Drop rows with None values

print('Matches found: ', len(final_df_unique))
print(f"Correct Predictions: {len(model_recs[model_recs['Profit'] > 0])/len(model_recs)*100}")
print(f"Profit: {round(sum(model_recs['Profit']) - len(model_recs), 2)} ROI: {round((sum(model_recs['Profit']) - len(model_recs)) / len(model_recs) * 100, 2)}%")
model_recs.tail()

Matches found:  2486
Correct Predictions: 69.79082864038617
Profit: -57.86 ROI: -2.33%


Unnamed: 0,League,Home,Away,BET,Percentage,Profit,Difference
2527,England2,Sheffield Wed,Blackburn,1.5OP,70.2,0.0,0.12
2528,England2,Sunderland,Bristol City,HT0.5OP,52.51,0.0,0.46
2529,England2,Leeds Utd,Middlesbrough,HT1.5UP,50.12,1.44,0.56
2530,England5,Solihull Moors,Aldershot Town,4.5UP,61.35,1.18,0.45
2531,England5,Wealdstone,Altrincham,HTXP,39.82,2.2,0.31


# Combining Different Bets

In [32]:
# Define the list of base columns and multiplier columns
base_columns = ['df1_FT1', 'df1_FTX', 'df1_FT2']  
multiplier_columns = ['df1_1.5O', 'df1_2.5O', 'df1_2.5U', 'df1_3.5U', 'df1_4.5U']

# Nested loop: Iterate over each base column and multiplier column
for base_column in base_columns:
    for multiplier in multiplier_columns:
        # Create a new column name dynamically
        new_column = f"{base_column}/{multiplier.split('_')[1]}"
        
        # Perform the calculation and assign it to the new column
        final_df_unique[new_column] = round(final_df_unique[base_column] * final_df_unique[multiplier] / 100, 2)

final_df_unique.tail()

Unnamed: 0,df1_League,Home,Away,df1_FT1,df1_FTX,df1_FT2,df1_FTR,df1_DC1X,df1_DC12,df1_DCX2,df1_1.5O,df1_2.5O,df1_3.5U,df1_4.5U,df1_BTTS,df1_HT1,df1_HTX,df1_HT2,df1_HTR,df1_HTDC1X,df1_HTDC12,df1_HTDCX2,df1_HT0.5O,df1_HT1.5U,df1_H0.5O,df1_A0.5O,df1_H1.5O,df1_A1.5O,df1_H2.5U,df1_A2.5U,df2_Home,df2_Away,df2_FT1,df2_FTX,df2_FT2,df2_DC1X,df2_DC12,df2_DCX2,df2_HT1,df2_HTX,df2_HT2,df2_HT1X,df2_HT12,df2_HTX2,df2_BTTS,df2_OTTS,df2_1.5O,df2_1.5U,df2_2.5O,df2_2.5U,df2_3.5O,df2_3.5U,df2_4.5O,df2_4.5U,df2_HT0.5O,df2_HT0.5U,df2_HT1.5O,df2_HT1.5U,Date,League,FT,HT,FTHG,FTAG,FTTG,HTHG,HTAG,HTTG,FT1,FTX,FT2,FT1X,FT12,FTX2,1.5O,1.5U,2.5O,2.5U,3.5O,3.5U,4.5O,4.5U,BTTS,OTTS,HT1,HTX,HT2,HT1X,HT12,HTX2,HT0.5O,HT0.5U,HT1.5O,HT1.5U,FT1P,FTXP,FT2P,FT1XP,FT12P,FTX2P,1.5OP,1.5UP,2.5OP,2.5UP,3.5OP,3.5UP,4.5OP,4.5UP,BTTSP,OTTSP,HT1P,HTXP,HT2P,HT1XP,HT12P,HTX2P,HT0.5OP,HT0.5UP,HT1.5OP,HT1.5UP,df1_OTTS,df1_1.5U,df1_2.5U,df1_3.5O,df1_4.5O,df1_HT0.5U,df1_HT1.5O,df1_FT1/1.5O,df1_FT1/2.5O,df1_FT1/2.5U,df1_FT1/3.5U,df1_FT1/4.5U,df1_FTX/1.5O,df1_FTX/2.5O,df1_FTX/2.5U,df1_FTX/3.5U,df1_FTX/4.5U,df1_FT2/1.5O,df1_FT2/2.5O,df1_FT2/2.5U,df1_FT2/3.5U,df1_FT2/4.5U
2527,England2,Sheffield Wed,Blackburn,29.67,31.01,39.33,1-1,60.68,69.0,70.34,70.2,42.3,78.45,90.76,49.6,16.34,55.86,27.78,0-0,72.2,44.12,83.64,49.74,85.11,66.27,72.34,29.61,36.8,90.31,86.05,Sheffield Wed,Blackburn,2.2,3.5,3.1,1.36,1.29,1.67,2.88,2.1,3.75,1.25,1.67,1.4,1.8,1.91,1.3,3.5,2.0,1.85,3.5,1.3,6.5,1.11,1.4,2.75,3.0,1.36,2024-12-10 00:00:00,England2,0 - 1,(0-0),0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,-1.0,-1.0,3.1,-1.0,-1.0,1.67,0.0,-1.0,-1.0,1.85,-1.0,1.3,-1.0,1.11,-1.0,1.91,-1.0,2.1,-1.0,-1.0,-1.0,1.4,-1.0,2.75,-1.0,1.36,50.4,29.8,57.7,21.55,9.24,50.26,14.89,20.83,12.55,17.12,23.28,26.93,21.77,13.12,17.89,24.33,28.14,27.61,16.64,22.69,30.85,35.7
2528,England2,Sunderland,Bristol City,63.67,24.74,11.58,1-0,88.41,75.25,36.32,71.32,44.09,77.01,89.89,41.02,45.29,50.12,4.54,0-0,95.41,49.83,54.66,52.51,82.95,83.35,47.67,53.51,13.78,73.24,97.18,Sunderland,Bristol City,1.9,3.4,4.1,1.22,1.3,1.91,2.6,2.05,4.75,1.17,1.73,1.5,1.91,1.8,1.36,3.2,2.1,1.73,4.0,1.25,8.0,1.08,1.44,2.63,3.25,1.33,2024-12-10 00:00:00,England2,1 - 1,(0-0),1,1,2,0,0,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0.0,-1.0,-1.0,1.22,-1.0,-1.0,1.36,-1.0,-1.0,1.73,-1.0,1.25,-1.0,1.08,-1.0,0.0,-1.0,2.05,-1.0,1.17,-1.0,-1.0,0.0,-1.0,-1.0,1.33,58.98,28.68,55.91,22.99,10.11,47.49,17.05,45.41,28.07,35.6,49.03,57.23,17.64,10.91,13.83,19.05,22.24,8.26,5.11,6.47,8.92,10.41
2529,England2,Leeds Utd,Middlesbrough,61.27,24.53,14.19,2-0,85.8,75.46,38.72,75.94,49.95,72.01,86.69,48.01,42.9,32.71,23.93,1-0,75.61,66.83,56.64,81.39,50.12,84.57,55.2,55.74,19.23,71.19,95.2,Leeds,Middlesbrough,1.83,3.7,4.1,1.22,1.29,1.91,2.4,2.25,4.5,1.17,1.57,1.53,1.73,2.0,1.25,4.0,1.85,2.0,3.2,1.36,5.5,1.14,1.36,3.0,2.63,1.44,2024-12-10 00:00:00,England2,3 - 1,(1-0),3,1,4,1,0,1,1,0,0,1,1,0,1,0,1,0,1,0,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1.83,-1.0,-1.0,1.22,-1.0,-1.0,1.25,-1.0,-1.0,0.0,-1.0,0.0,-1.0,1.14,-1.0,0.0,2.4,-1.0,-1.0,1.17,-1.0,-1.0,1.36,-1.0,-1.0,1.44,51.99,24.06,50.05,27.99,13.31,18.61,49.88,46.53,30.6,30.67,44.12,53.11,18.63,12.25,12.28,17.66,21.27,10.78,7.09,7.1,10.22,12.3
2530,England5,Solihull Moors,Aldershot Town,60.0,18.76,21.1,2-1,78.76,81.1,39.86,91.56,77.17,41.83,61.35,72.13,41.75,32.27,25.69,1-0,74.02,67.44,57.96,79.63,57.0,92.19,77.81,72.5,44.5,52.65,80.51,Solihull Moors,Aldershot,1.85,3.7,3.9,1.25,1.29,1.91,2.4,2.38,4.0,1.22,1.53,1.53,1.57,2.25,1.18,4.5,1.62,2.25,2.5,1.5,4.5,1.18,1.3,3.4,2.5,1.5,2024-12-10 00:00:00,England5,2 - 1,(1-0),2,1,3,1,0,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,1,1.85,-1.0,-1.0,-1.0,1.29,-1.0,1.18,-1.0,1.62,-1.0,0.0,-1.0,-1.0,1.18,1.57,-1.0,2.4,-1.0,-1.0,1.22,-1.0,-1.0,1.3,-1.0,-1.0,1.5,27.87,8.44,22.83,58.17,38.65,20.37,43.0,54.94,46.3,13.7,25.1,36.81,17.18,14.48,4.28,7.85,11.51,19.32,16.28,4.82,8.83,12.94
2531,England5,Wealdstone,Altrincham,29.18,28.48,42.34,1-1,57.66,71.52,70.82,69.89,43.21,77.73,90.33,49.01,28.44,39.82,31.68,0-0,68.26,60.12,71.5,69.93,69.53,65.6,73.81,28.89,38.72,90.69,84.79,Wealdstone,Altrincham,2.8,3.4,2.38,1.53,1.3,1.4,3.5,2.2,3.1,1.36,1.62,1.29,1.7,2.05,1.29,3.5,1.9,1.9,3.25,1.33,6.5,1.11,1.4,2.75,2.75,1.4,2024-12-10 00:00:00,England5,3 - 3,(2-2),3,3,6,2,2,4,0,1,0,1,0,1,1,0,1,0,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,0,-1.0,-1.0,0.0,-1.0,0.0,-1.0,1.29,-1.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,2.2,-1.0,-1.0,-1.0,1.29,1.4,-1.0,-1.0,0.0,50.99,30.11,56.79,22.27,9.67,30.07,30.47,20.39,12.61,16.57,22.68,26.36,19.9,12.31,16.17,22.14,25.73,29.59,18.3,24.04,32.91,38.25
