# Reading Model Predictions and Bet365 Odds

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import warnings

warnings.filterwarnings('ignore')
pd.options.mode.chained_assignment = None
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

csv_url = "https://docs.google.com/spreadsheets/d/1WfEG-1icUjj6k7TGePJQEXH-w0TLEIcN/export?format=csv"
uefa = pd.read_csv(csv_url, dtype={'FTHG': 'Int64', 'FTAG': 'Int64', 'HTHG': 'Int64', 'HTAG': 'Int64'})
uefa['FT'] = uefa['FTHG'].astype(str) + ' - ' + uefa['FTAG'].astype(str)
uefa['HT'] = '(' + uefa['HTHG'].astype(str) + '-' + uefa['HTAG'].astype(str) + ')'
uefa['FTTG'] = uefa['FTHG'] + uefa['FTAG']
uefa['HTTG'] = uefa['HTHG'] + uefa['HTAG']

predictions = pd.read_excel('C:/Users/99451/Desktop/MODEL/2025/dixon_coles_model_predictions/_predictions.xlsx')
bet365_odds = pd.read_excel('C:/Users/99451/Desktop/MODEL/2025/dixon_coles_model_predictions/final_odds.xlsx')
bet365_odds.tail()

Unnamed: 0,Home,Away,FT1,FTX,FT2,DC1X,DC12,DCX2,HT1,HTX,HT2,HT1X,HT12,HTX2,BTTS,OTTS,1.5O,1.5U,2.5O,2.5U,3.5O,3.5U,4.5O,4.5U,HT0.5O,HT0.5U,HT1.5O,HT1.5U
4805,Livingston,Morton,1.48,4.33,6.0,1.11,1.2,2.63,2.0,2.2,6.5,1.1,1.62,1.73,2.2,1.62,1.3,3.4,2.03,1.83,3.5,1.29,6.5,1.1,1.4,2.75,3.0,1.36
4806,Arbroath,Kelty Hearts,1.91,3.4,3.6,1.22,1.25,1.73,2.5,2.2,4.0,1.22,1.62,1.5,1.67,2.1,1.22,4.0,1.8,2.0,2.75,1.4,5.0,1.14,1.36,3.0,2.63,1.44
4807,Spartans,Forfar Athletic,2.0,3.1,3.4,1.25,1.29,1.67,2.6,2.25,3.6,1.29,1.57,1.5,1.57,2.25,1.2,4.33,1.7,2.1,2.63,1.44,4.5,1.17,1.33,3.25,2.5,1.5
4808,Celtic,Aberdeen,1.17,8.0,12.0,1.05,1.08,5.0,1.53,3.1,9.5,1.04,1.36,2.38,1.91,1.91,1.1,7.0,1.33,3.4,1.8,2.0,2.75,1.44,1.18,4.5,1.83,1.83
4809,Chelsea,Southampton,1.22,7.0,9.5,1.07,1.1,4.0,1.57,3.1,8.0,1.06,1.36,2.25,1.7,2.05,1.08,8.0,1.3,3.5,1.8,2.0,2.75,1.44,1.18,4.5,1.95,1.9


# Merging 2 DataFrames for similarity of values

In [2]:
from fuzzywuzzy import fuzz
from fuzzywuzzy import process

# Function for fuzzy matching on both columns
def fuzzy_merge_on_two_columns(df1, df2, key1_home, key1_away, key2_home, key2_away, threshold=80):
    """
    Merge two DataFrames based on fuzzy matching of both Home and Away columns.
    - df1, df2: DataFrames to merge
    - key1_home, key1_away: column names for 'Home' and 'Away' in df1
    - key2_home, key2_away: column names for 'Home' and 'Away' in df2
    - threshold: minimum similarity score for a match
    """
    matches = []
    
    for i, row1 in df1.iterrows():
        home_team1, away_team1 = row1[key1_home], row1[key1_away]
        
        # Find the best match in df2 for both Home and Away teams
        best_match = None
        best_score = 0
        
        for j, row2 in df2.iterrows():
            home_team2, away_team2 = row2[key2_home], row2[key2_away]
            
            # Calculate similarity for both Home and Away columns
            home_score = fuzz.ratio(home_team1, home_team2)
            away_score = fuzz.ratio(away_team1, away_team2)
            
            # Average similarity score for the pair
            avg_score = (home_score + away_score) / 2
            
            # Check if this is the best match
            if avg_score >= threshold and avg_score > best_score:
                best_match = j
                best_score = avg_score
        
        # If a match was found above the threshold, save the indices
        if best_match is not None:
            matches.append((i, best_match))

    # Create matched DataFrames based on indices
    matched_df1 = df1.loc[[i for i, _ in matches]].reset_index(drop=True)
    matched_df2 = df2.loc[[j for _, j in matches]].reset_index(drop=True)
    
    # Concatenate the matched data side by side
    return pd.concat([matched_df1, matched_df2], axis=1, keys=["df1", "df2"])

# Use the function to merge
merged_df = fuzzy_merge_on_two_columns(predictions, bet365_odds, 'Home', 'Away', 'Home', 'Away', threshold=80)
merged_df.tail()

Unnamed: 0_level_0,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2
Unnamed: 0_level_1,League,Home,Away,FT1,FTX,FT2,FTR,DC1X,DC12,DCX2,1.5O,2.5O,3.5U,4.5U,BTTS,HT1,HTX,HT2,HTR,HTDC1X,HTDC12,HTDCX2,HT0.5O,HT1.5U,H0.5O,A0.5O,H1.5O,A1.5O,H2.5U,A2.5U,Home,Away,FT1,FTX,FT2,DC1X,DC12,DCX2,HT1,HTX,HT2,HT1X,HT12,HTX2,BTTS,OTTS,1.5O,1.5U,2.5O,2.5U,3.5O,3.5U,4.5O,4.5U,HT0.5O,HT0.5U,HT1.5O,HT1.5U
3955,Saudiarabia,Al Raed,Al Shabab,24.89,16.07,59.02,0-1,40.96,83.91,75.09,81.29,63.58,58.53,76.67,58.97,27.06,40.49,32.37,0-0,67.55,59.43,72.86,70.17,67.77,70.58,87.14,34.61,60.79,87.39,66.23,Al Raed,Al Shabab,4.0,3.8,1.73,2.0,1.25,1.22,4.33,2.25,2.3,1.57,1.57,1.2,1.73,2.0,1.22,4.0,1.73,2.08,2.75,1.4,5.0,1.14,1.33,3.25,2.63,1.44
3956,Saudiarabia,Al Hilal,Al Kholood,84.81,6.97,6.5,3-1,91.78,91.31,13.47,93.09,84.98,27.87,45.74,61.81,69.09,19.52,7.92,1-0,88.61,77.01,27.44,85.62,37.15,96.15,64.0,87.96,27.9,26.08,89.39,Al Hilal,Al Kholood,1.13,8.5,11.0,1.02,1.04,4.5,1.44,3.2,9.5,1.04,1.36,2.63,1.83,1.83,1.07,7.5,1.29,3.5,1.67,2.1,2.5,1.5,1.17,4.5,1.83,1.98
3957,Saudiarabia,Al Wahda,Al Nassr,8.88,8.76,81.6,1-3,17.64,90.48,90.36,91.28,80.37,36.48,55.71,60.24,8.84,29.98,60.42,0-1,38.82,69.26,90.4,76.64,57.73,63.67,95.72,27.17,83.91,90.8,35.08,Al Wehda,Al Nassr,5.25,5.25,1.42,2.75,1.14,1.14,5.5,2.5,1.83,1.83,1.5,1.13,1.73,2.0,1.17,4.5,1.53,2.38,2.25,1.57,4.0,1.22,1.25,3.75,2.2,1.62
3958,Saudiarabia,Al Ahli Jeddah,Al Quadisiya,39.73,24.58,35.68,1-0,64.31,75.41,60.26,52.88,29.52,87.54,95.63,33.83,21.98,51.6,26.41,0-0,73.58,48.39,78.01,55.1,81.89,62.69,59.75,25.9,23.12,92.23,93.55,Al Ahli SC,Al Qadisiya,2.2,3.5,2.9,1.4,1.29,1.62,2.75,2.25,3.4,1.3,1.57,1.4,1.62,2.2,1.22,4.0,1.7,2.1,2.63,1.44,4.5,1.17,1.33,3.25,2.63,1.44
3959,Scotland,Celtic,Aberdeen,89.03,7.71,2.73,3-0,96.74,91.76,10.44,86.7,69.05,51.69,70.8,33.0,70.87,24.06,3.3,1-0,94.93,74.17,27.36,78.47,51.86,95.24,34.58,81.88,6.86,38.73,98.53,Celtic,Aberdeen,1.18,7.5,12.0,1.05,1.1,4.5,1.53,3.1,9.5,1.04,1.36,2.38,1.91,1.91,1.11,6.5,1.36,3.2,1.91,1.91,3.2,1.36,1.2,4.33,2.03,1.83


# Scraping SoccerStats For Match Results

In [3]:
final =  pd.DataFrame()
liqa = ''
unique_leagues = predictions['League'].unique().tolist()

# Convert to lowercase and exclude UEFA competitions
uefa_list = ['unl', 'uel', 'ucl', 'ufcl']
unique_leagues = [league.lower() for league in unique_leagues if league.lower() not in uefa_list]

for i in unique_leagues:
    URL = "https://www.soccerstats.com/results.asp?league=" + i + "&pmtype=bydate"
    page = requests.get(URL)
    liqa = i
    soup = BeautifulSoup(page.content, "html.parser")
    results = soup.find(id="btable")
    sth = results.find_all("tr", class_="odd")
    sth


    date, league, home, away, ft, ht = [], [], [], [], [],[]
    for i in sth:
        date.append(i.find_all("td", align = 'right')[0].get_text(strip=True))
        league.append(liqa.capitalize())
        home.append(i.find_all("td", align = 'right')[1].get_text(strip=True))
        away.append(i.find("td", align = "left").get_text(strip = True))
        ft.append(i.find_all("td", align = 'center')[0].get_text(strip = True))
        try:
            ht.append(i.find_all("td", align = 'center')[2].get_text(strip = True))
        except IndexError as e:
            ht.append('NA')#print("Last output before error occurred:", i.find_all("td", align = 'center'))

    data = {'Date': date, 'League': league,'Home': home, 'Away': away, 'FT': ft, 'HT': ht}

# Create a DataFrame from the dictionary
    df = pd.DataFrame(data)

# Replace empty strings with NaN
    #next_df = df[(df['Date'] == formatted_date) & (df['HT'] == '')]
    df.replace('', pd.NA, inplace=True)

# Drop rows with NaN values
    df_cleaned = df.dropna()

#For Half-Time Results
    hthg, htag = [], []
    for i in df_cleaned['HT']:
        if i == 'NA':
            hthg.append('NA')
            htag.append('NA')
        elif i == '+' or i == '-':
            hthg.append('NA')
            htag.append('NA')
        else:
            try:
                hthg.append(int(i[1]))
                htag.append(int(i[3]))
            except IndexError as e:
                print("Last output before error occurred:", i)



#For Full-Time Results
    hg, ag, tg = [], [], []
    for i in df_cleaned['FT']:
        if len(i) < 5 or ':' in i:
            hg.append('NA')
            ag.append('NA')
            tg.append('NA')
        else:
            try:
                hghg = int(i.split(' - ')[0])
                hg.append(hghg)
                agag = int(i.split(' - ')[1])
                ag.append(agag)
                tg.append(hghg + agag)
            except:
                print(hghg + agag)

    
    df_cleaned['FTHG'], df_cleaned['FTAG'], df_cleaned['FTTG'] = hg, ag, tg
    df_cleaned['HTHG'], df_cleaned['HTAG'] = hthg, htag
    df_cleaned['HTTG'] = df_cleaned['HTHG'] + df_cleaned['HTAG']
    
    final = pd.concat([final, df_cleaned], ignore_index=True)
    
final = final[final['HT'] != 'NA']

# Example: Fix the 'date' column by removing the weekday and extra punctuation
final['Date'] = final['Date'].str.extract(r'(\d{1,2} \w{3})')  # Extract day and month part
final['Date'] = final['Date'] + ' 2024'  # Append the year

# Convert to datetime format
final['Date'] = pd.to_datetime(final['Date'], format='%d %b %Y', errors='coerce')

# Filter rows before September 6th, 2024
final_filtered = final[final['Date'] >= pd.Timestamp('2024-09-17')]

# Align columns of uefa to match final_filtered
uefa = uefa[final_filtered.columns]

# Concatenate
final_filtered = pd.concat([uefa, final_filtered], ignore_index=True)

combined = pd.concat([final_filtered.head(), final_filtered.tail()])

combined

Unnamed: 0,Date,League,Home,Away,FT,HT,FTHG,FTAG,FTTG,HTHG,HTAG,HTTG
0,9/17/2024,UCL,Juventus,PSV,3 - 1,(2-0),3.0,1.0,4.0,2.0,0.0,2.0
1,9/17/2024,UCL,Young Boys,Aston Villa,0 - 3,(0-2),0.0,3.0,3.0,0.0,2.0,2.0
2,9/17/2024,UCL,Bayern,Dinamo Zagreb,9 - 2,(3-0),9.0,2.0,11.0,3.0,0.0,3.0
3,9/17/2024,UCL,Milan,Liverpool,1 - 3,(1-2),1.0,3.0,4.0,1.0,2.0,3.0
4,9/17/2024,UCL,Real Madrid,Stuttgart,3 - 1,(0-0),3.0,1.0,4.0,0.0,0.0,0.0
7297,2024-12-06 00:00:00,Saudiarabia,Al Orubah,Al Riyadh,0 - 1,(0-0),0.0,1.0,1.0,0.0,0.0,0.0
7298,2024-12-06 00:00:00,Saudiarabia,Al Ittihad,Al Nassr,2 - 1,(0-0),2.0,1.0,3.0,0.0,0.0,0.0
7299,2024-12-07 00:00:00,Saudiarabia,Al Khaleej,Al Ittifaq,1 - 2,(1-1),1.0,2.0,3.0,1.0,1.0,2.0
7300,2024-12-07 00:00:00,Saudiarabia,Al Hilal,Al Raed,3 - 2,(1-1),3.0,2.0,5.0,1.0,1.0,2.0
7301,2024-12-07 00:00:00,Saudiarabia,Al Taawon,Al Ahli Jeddah,2 - 4,(2-1),2.0,4.0,6.0,2.0,1.0,3.0


# Merging with Predictions + Odds Dataframes

In [4]:
# Flatten multi-level columns
merged_df.columns = ['_'.join(col).strip() if isinstance(col, tuple) else col for col in merged_df.columns]

# Rename two columns
merged_df = merged_df.rename(columns={'df1_Home': 'Home', 'df1_Away': 'Away'})

# Merge on multiple columns
final_df = pd.merge(merged_df, final_filtered, on=['Home', 'Away'], how='inner')

# Drop duplicates based on the 'Home' and 'Away' columns
final_df_unique = final_df.drop_duplicates(subset=['Home', 'Away'])
final_df_unique = final_df_unique.dropna()

# Reset the index afterward
final_df_unique.reset_index(drop=True, inplace=True)

print('Number of games matched: ', len(final_df_unique))
final_df_unique.tail()

Number of games matched:  2532


Unnamed: 0,df1_League,Home,Away,df1_FT1,df1_FTX,df1_FT2,df1_FTR,df1_DC1X,df1_DC12,df1_DCX2,df1_1.5O,df1_2.5O,df1_3.5U,df1_4.5U,df1_BTTS,df1_HT1,df1_HTX,df1_HT2,df1_HTR,df1_HTDC1X,df1_HTDC12,df1_HTDCX2,df1_HT0.5O,df1_HT1.5U,df1_H0.5O,df1_A0.5O,df1_H1.5O,df1_A1.5O,df1_H2.5U,df1_A2.5U,df2_Home,df2_Away,df2_FT1,df2_FTX,df2_FT2,df2_DC1X,df2_DC12,df2_DCX2,df2_HT1,df2_HTX,df2_HT2,df2_HT1X,df2_HT12,df2_HTX2,df2_BTTS,df2_OTTS,df2_1.5O,df2_1.5U,df2_2.5O,df2_2.5U,df2_3.5O,df2_3.5U,df2_4.5O,df2_4.5U,df2_HT0.5O,df2_HT0.5U,df2_HT1.5O,df2_HT1.5U,Date,League,FT,HT,FTHG,FTAG,FTTG,HTHG,HTAG,HTTG
2527,UFCL,APOEL,Celje,69.52,15.99,14.2,2-1,85.51,83.72,30.19,92.09,78.46,39.85,59.3,68.68,73.55,22.18,1.67,1-0,95.73,75.22,23.85,79.48,46.74,94.13,72.44,78.03,37.09,44.9,85.53,APOEL,Celje,1.5,4.2,5.75,1.14,1.22,2.38,2.05,2.4,5.5,1.13,1.53,1.73,1.73,2.0,1.2,4.33,1.67,2.15,2.63,1.44,5.0,1.17,1.33,3.25,2.5,1.5,02/20/2025,UFCL,0 - 2,(0-1),0.0,2.0,2.0,0.0,1.0,1.0
2528,UFCL,Jagiellonia,TSC,81.81,12.17,5.7,2-0,93.98,87.51,17.87,87.46,69.53,51.37,70.51,47.24,55.39,37.51,6.62,0-0,92.9,62.01,44.13,70.68,61.4,94.28,49.65,78.5,15.16,44.19,96.41,Jagiellonia,TSC,1.37,5.0,7.0,1.1,1.17,2.75,1.83,2.6,6.5,1.07,1.47,1.8,1.73,2.0,1.14,5.5,1.53,2.4,2.25,1.57,4.0,1.22,1.29,3.5,2.25,1.57,02/20/2025,UFCL,3 - 1,(1-1),3.0,1.0,4.0,1.0,1.0,2.0
2529,UFCL,Panathinaikos,Vikingur,62.24,22.89,14.85,1-0,85.13,77.09,37.74,75.17,49.97,71.99,86.68,47.12,43.66,49.14,7.12,0-0,92.8,50.78,56.26,56.97,77.26,84.64,55.04,55.87,19.1,71.07,95.25,Panathinaikos,Vikingur Reykjavik,1.11,7.5,19.0,1.02,1.1,5.5,1.5,3.0,12.0,1.02,1.36,2.5,2.2,1.62,1.13,6.0,1.41,2.7,2.0,1.73,3.4,1.3,1.22,4.0,2.1,1.67,02/20/2025,UFCL,2 - 0,(0-0),2.0,0.0,2.0,0.0,0.0,0.0
2530,UFCL,Shamrock,Molde,88.86,5.92,2.61,4-0,94.78,91.47,8.53,93.14,84.12,27.83,45.69,51.07,69.31,7.12,0.73,3-0,76.43,70.04,7.85,73.63,13.01,95.81,51.79,89.26,17.19,21.72,93.33,Shamrock Rovers,Molde,5.0,3.8,1.7,2.1,1.25,1.17,5.0,2.2,2.38,1.53,1.62,1.14,1.8,1.91,1.29,3.5,1.93,1.93,3.25,1.33,6.5,1.11,1.4,2.75,2.75,1.4,02/20/2025,UFCL,0 - 1,(0-1),0.0,1.0,1.0,0.0,1.0,1.0
2531,Australia,Melbourne V.,Melbourne City,30.92,29.17,39.91,0-1,60.09,70.83,69.08,58.75,32.43,85.65,94.7,39.03,21.58,50.38,28.02,0-0,71.96,49.6,78.4,58.48,77.16,59.98,66.31,23.33,29.65,93.45,90.28,Melbourne Victory,Melbourne City,2.2,3.6,3.1,1.36,1.29,1.67,2.88,2.25,3.6,1.29,1.57,1.4,1.62,2.2,1.25,4.0,1.8,2.0,3.0,1.4,5.5,1.14,1.36,3.0,2.63,1.44,2024-12-21 00:00:00,Australia,1 - 1,(0-1),1.0,1.0,2.0,0.0,1.0,1.0


In [7]:
final_df_unique.to_excel("final.xlsx", index = False)

# Creating Results Columns

In [5]:
import numpy as np

# Add the new columns based on the condition
final_df_unique['FT1'] = np.where(final_df_unique['FTHG'] > final_df_unique['FTAG'], 1, 0)
final_df_unique['FTX'] = np.where(final_df_unique['FTHG'] == final_df_unique['FTAG'], 1, 0)
final_df_unique['FT2'] = np.where(final_df_unique['FTHG'] < final_df_unique['FTAG'], 1, 0)

final_df_unique['FT1X'] = np.where(final_df_unique['FTHG'] >= final_df_unique['FTAG'], 1, 0)
final_df_unique['FT12'] = np.where(final_df_unique['FTX'] == 0, 1, 0)
final_df_unique['FTX2'] = np.where(final_df_unique['FTHG'] <= final_df_unique['FTAG'], 1, 0)

final_df_unique['1.5O'] = np.where(final_df_unique['FTTG'] > 1.5, 1, 0)
final_df_unique['1.5U'] = np.where(final_df_unique['FTTG'] < 1.5, 1, 0)
final_df_unique['2.5O'] = np.where(final_df_unique['FTTG'] > 2.5, 1, 0)
final_df_unique['2.5U'] = np.where(final_df_unique['FTTG'] < 2.5, 1, 0)
final_df_unique['3.5O'] = np.where(final_df_unique['FTTG'] > 3.5, 1, 0)
final_df_unique['3.5U'] = np.where(final_df_unique['FTTG'] < 3.5, 1, 0)
final_df_unique['4.5O'] = np.where(final_df_unique['FTTG'] > 4.5, 1, 0)
final_df_unique['4.5U'] = np.where(final_df_unique['FTTG'] < 4.5, 1, 0)

final_df_unique['BTTS'] = np.where((final_df_unique['FTHG'] != 0) & (final_df_unique['FTAG'] != 0), 1, 0)
final_df_unique['OTTS'] = np.where(final_df_unique['BTTS'] == 0, 1, 0)

final_df_unique['HT1'] = np.where(final_df_unique['HTHG'] > final_df_unique['HTAG'], 1, 0)
final_df_unique['HTX'] = np.where(final_df_unique['HTHG'] == final_df_unique['HTAG'], 1, 0)
final_df_unique['HT2'] = np.where(final_df_unique['HTHG'] < final_df_unique['HTAG'], 1, 0)

final_df_unique['HT1X'] = np.where(final_df_unique['HTHG'] >= final_df_unique['HTAG'], 1, 0)
final_df_unique['HT12'] = np.where(final_df_unique['HTX'] == 0, 1, 0)
final_df_unique['HTX2'] = np.where(final_df_unique['HTHG'] <= final_df_unique['HTAG'], 1, 0)

final_df_unique['HT0.5O'] = np.where(final_df_unique['HTTG'] > 0.5, 1, 0)
final_df_unique['HT0.5U'] = np.where(final_df_unique['HTTG'] < 0.5, 1, 0)
final_df_unique['HT1.5O'] = np.where(final_df_unique['HTTG'] > 1.5, 1, 0)
final_df_unique['HT1.5U'] = np.where(final_df_unique['HTTG'] < 1.5, 1, 0)

print('Games Found: ', len(final_df_unique))
final_df_unique.tail()

Games Found:  2494


Unnamed: 0,df1_League,Home,Away,df1_FT1,df1_FTX,df1_FT2,df1_FTR,df1_DC1X,df1_DC12,df1_DCX2,df1_1.5O,df1_2.5O,df1_3.5U,df1_4.5U,df1_BTTS,df1_HT1,df1_HTX,df1_HT2,df1_HTR,df1_HTDC1X,df1_HTDC12,df1_HTDCX2,df1_HT0.5O,df1_HT1.5U,df1_H0.5O,df1_A0.5O,df1_H1.5O,df1_A1.5O,df1_H2.5U,df1_A2.5U,df2_Home,df2_Away,df2_FT1,df2_FTX,df2_FT2,df2_DC1X,df2_DC12,df2_DCX2,df2_HT1,df2_HTX,df2_HT2,df2_HT1X,df2_HT12,df2_HTX2,df2_BTTS,df2_OTTS,df2_1.5O,df2_1.5U,df2_2.5O,df2_2.5U,df2_3.5O,df2_3.5U,df2_4.5O,df2_4.5U,df2_HT0.5O,df2_HT0.5U,df2_HT1.5O,df2_HT1.5U,Date,League,FT,HT,FTHG,FTAG,FTTG,HTHG,HTAG,HTTG,FT1,FTX,FT2,FT1X,FT12,FTX2,1.5O,1.5U,2.5O,2.5U,3.5O,3.5U,4.5O,4.5U,BTTS,OTTS,HT1,HTX,HT2,HT1X,HT12,HTX2,HT0.5O,HT0.5U,HT1.5O,HT1.5U
2489,UEL,PAOK,FCSB,47.21,27.96,24.82,1-0,75.17,72.03,52.78,65.28,38.53,81.34,92.42,43.5,20.1,57.22,22.65,0-0,77.32,42.75,79.87,58.04,67.84,73.83,58.9,38.75,22.36,84.76,93.89,PAOK,FCSB,1.67,4.0,4.75,1.18,1.22,2.15,2.3,2.2,5.5,1.13,1.62,1.57,1.95,1.8,1.3,3.5,2.01,1.89,3.5,1.3,6.5,1.11,1.4,2.75,3.0,1.36,10/3/2024,UEL,0 - 1,(0-1),0.0,1.0,1.0,0.0,1.0,1.0,0,0,1,0,1,1,0,1,0,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1
2490,Scotland2,Falkirk,Ayr Utd,1.21,1.89,66.58,1-7,3.1,67.79,68.47,69.52,68.92,2.5,6.33,54.44,0.15,0.46,11.99,0-4,0.61,12.14,12.45,12.57,0.24,54.5,69.6,31.37,69.08,55.93,2.46,Falkirk,Ayr,1.85,3.6,4.0,1.22,1.29,1.83,2.4,2.2,4.33,1.2,1.62,1.53,1.73,2.0,1.25,3.75,1.85,2.0,3.0,1.36,5.5,1.13,1.36,3.0,2.75,1.4,2024-10-05 00:00:00,Scotland2,2 - 0,(1-0),2.0,0.0,2.0,1.0,0.0,1.0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1
2491,Scotland2,Queens Park,Hamilton,0.69,1.15,56.34,1-8,1.84,57.03,57.49,58.1,57.79,1.38,3.78,45.44,0.07,0.26,8.59,0-4,0.33,8.66,8.85,8.9,0.15,45.47,58.15,26.13,57.89,46.75,1.29,Queen's Park,Hamilton,2.15,3.3,3.4,1.3,1.33,1.67,2.63,2.25,3.5,1.29,1.57,1.44,1.57,2.25,1.2,4.33,1.73,2.08,2.63,1.44,4.5,1.17,1.33,3.25,2.63,1.44,2024-10-05 00:00:00,Scotland2,1 - 0,(0-0),1.0,0.0,1.0,0.0,0.0,0.0,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1
2492,Scotland3,Inverness,Stenhousemuir,0.64,1.27,69.37,1-7,1.91,70.01,70.64,71.04,70.15,3.6,8.67,47.88,0.11,0.42,14.77,0-4,0.53,14.88,15.19,15.25,0.38,47.94,71.21,21.88,70.63,63.95,2.7,Inverness,Stenhousemuir,1.95,3.5,3.25,1.25,1.22,1.7,2.6,2.1,4.0,1.22,1.67,1.5,1.83,1.83,1.3,3.4,2.0,1.85,3.4,1.3,6.5,1.1,1.4,2.75,3.0,1.36,2024-09-28 00:00:00,Scotland3,0 - 0,(0-0),0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1
2493,Scotland3,Dumbarton,Montrose,0.37,0.74,55.74,1-8,1.11,56.11,56.48,56.76,56.4,1.61,4.31,40.02,0.05,0.21,9.83,0-4,0.26,9.88,10.04,10.07,0.2,40.04,56.82,19.55,56.59,49.77,1.2,Dumbarton,Montrose,2.3,3.3,2.6,1.4,1.25,1.5,3.0,2.1,3.25,1.33,1.67,1.36,1.73,2.0,1.29,3.5,1.9,1.95,3.25,1.33,5.5,1.13,1.4,2.75,2.75,1.4,2024-09-21 00:00:00,Scotland3,0 - 1,(0-1),0.0,1.0,1.0,0.0,1.0,1.0,0,0,1,0,1,1,0,1,0,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1


# Creating Profit Columns for Initial Model Predictions

In [6]:
ft1p, ftxp, ft2p, ft1xp, ft12p, ftx2p = [], [], [], [], [], []
over15, under15, over25, under25, over35, under35, over45, under45 = [], [], [], [], [], [], [], []
btts, otts, ht1p, htxp, ht2p, ht1xp, ht12p, htx2p = [], [], [], [], [], [], [], []
htover05, htunder05, htover15, htunder15 = [], [], [], []

#if prediction == result -> coefficient, elif prediction != result -> 0, else -> -1
for i in range(len(final_df_unique)):
    ft_list = [final_df_unique['df1_FT1'].iloc[i], final_df_unique['df1_FTX'].iloc[i], final_df_unique['df1_FT2'].iloc[i]]
    if (ft_list.index(max(ft_list)) == 0) and (final_df_unique['FT1'].iloc[i] == 1):
        ft1p.append(final_df_unique['df2_FT1'].iloc[i])
    elif (ft_list.index(max(ft_list)) == 0) and (final_df_unique['FT1'].iloc[i] != 1):
        ft1p.append(0)
    else:
        ft1p.append(-1)
    
    if (ft_list.index(max(ft_list)) == 1) and (final_df_unique['FTX'].iloc[i] == 1):
        ftxp.append(final_df_unique['df2_FTX'].iloc[i])
    elif (ft_list.index(max(ft_list)) == 1) and (final_df_unique['FTX'].iloc[i] != 1):
        ftxp.append(0)
    else:
        ftxp.append(-1)
    
    if (ft_list.index(max(ft_list)) == 2) and (final_df_unique['FT2'].iloc[i] == 1):
        ft2p.append(final_df_unique['df2_FT2'].iloc[i])
    elif (ft_list.index(max(ft_list)) == 2) and (final_df_unique['FT2'].iloc[i] != 1):
        ft2p.append(0)
    else:
        ft2p.append(-1)

    dc_list = [final_df_unique['df1_DC1X'].iloc[i], final_df_unique['df1_DC12'].iloc[i], final_df_unique['df1_DCX2'].iloc[i]]
    if (dc_list.index(max(dc_list)) == 0) and (final_df_unique['FT1X'].iloc[i] == 1):
        ft1xp.append(final_df_unique['df2_DC1X'].iloc[i])
    elif (dc_list.index(max(dc_list)) == 0) and (final_df_unique['FT1X'].iloc[i] != 1):
        ft1xp.append(0)
    else:
        ft1xp.append(-1)
    
    if (dc_list.index(max(dc_list)) == 1) and (final_df_unique['FT12'].iloc[i] == 1):
        ft12p.append(final_df_unique['df2_DC12'].iloc[i])
    elif (dc_list.index(max(dc_list)) == 1) and (final_df_unique['FT12'].iloc[i] != 1):
        ft12p.append(0)
    else:
        ft12p.append(-1)
    
    if (dc_list.index(max(dc_list)) == 2) and (final_df_unique['FTX2'].iloc[i] == 1):
        ftx2p.append(final_df_unique['df2_DCX2'].iloc[i])
    elif (dc_list.index(max(dc_list)) == 2) and (final_df_unique['FTX2'].iloc[i] != 1):
        ftx2p.append(0)
    else:
        ftx2p.append(-1)
    
    if (final_df_unique['df1_1.5O'].iloc[i] >= 50) and (final_df_unique['1.5O'].iloc[i] == 1):
        over15.append(final_df_unique['df2_1.5O'].iloc[i])
    elif (final_df_unique['df1_1.5O'].iloc[i] >= 50) and (final_df_unique['1.5O'].iloc[i] != 1):
        over15.append(0)
    else:
        over15.append(-1)
    
    if (final_df_unique['df1_1.5O'].iloc[i] < 50) and (final_df_unique['1.5O'].iloc[i] != 1):
        under15.append(final_df_unique['df2_1.5U'].iloc[i])
    elif (final_df_unique['df1_1.5O'].iloc[i] < 50) and (final_df_unique['1.5O'].iloc[i] == 1):
        under15.append(0)
    else:
        under15.append(-1)
    
    if (final_df_unique['df1_2.5O'].iloc[i] >= 50) and (final_df_unique['2.5O'].iloc[i] == 1):
        over25.append(final_df_unique['df2_2.5O'].iloc[i])
    elif (final_df_unique['df1_2.5O'].iloc[i] >= 50) and (final_df_unique['2.5O'].iloc[i] != 1):
        over25.append(0)
    else:
        over25.append(-1)
    
    if (final_df_unique['df1_2.5O'].iloc[i] < 50) and (final_df_unique['2.5O'].iloc[i] != 1):
        under25.append(final_df_unique['df2_2.5U'].iloc[i])
    elif (final_df_unique['df1_2.5O'].iloc[i] < 50) and (final_df_unique['2.5O'].iloc[i] == 1):
        under25.append(0)
    else:
        under25.append(-1)
    
    if (final_df_unique['df1_3.5U'].iloc[i] <= 50) and (final_df_unique['3.5O'].iloc[i] == 1):
        over35.append(final_df_unique['df2_3.5O'].iloc[i])
    elif (final_df_unique['df1_3.5U'].iloc[i] <= 50) and (final_df_unique['3.5O'].iloc[i] != 1):
        over35.append(0)
    else:
        over35.append(-1)
    
    if (final_df_unique['df1_3.5U'].iloc[i] > 50) and (final_df_unique['3.5O'].iloc[i] != 1):
        under35.append(final_df_unique['df2_3.5U'].iloc[i])
    elif (final_df_unique['df1_3.5U'].iloc[i] > 50) and (final_df_unique['3.5O'].iloc[i] == 1):
        under35.append(0)
    else:
        under35.append(-1)
    
    if (final_df_unique['df1_4.5U'].iloc[i] <= 50) and (final_df_unique['4.5O'].iloc[i] == 1):
        over45.append(final_df_unique['df2_4.5O'].iloc[i])
    elif (final_df_unique['df1_4.5U'].iloc[i] <= 50) and (final_df_unique['4.5O'].iloc[i] != 1):
        over45.append(0)
    else:
        over45.append(-1)
    
    if (final_df_unique['df1_4.5U'].iloc[i] > 50) and (final_df_unique['4.5O'].iloc[i] != 1):
        under45.append(final_df_unique['df2_4.5U'].iloc[i])
    elif (final_df_unique['df1_4.5U'].iloc[i] > 50) and (final_df_unique['4.5O'].iloc[i] == 1):
        under45.append(0)
    else:
        under45.append(-1)
    
    if (final_df_unique['df1_BTTS'].iloc[i] >= 50) and (final_df_unique['BTTS'].iloc[i] == 1):
        btts.append(final_df_unique['df2_BTTS'].iloc[i])
    elif (final_df_unique['df1_BTTS'].iloc[i] >= 50) and (final_df_unique['BTTS'].iloc[i] != 1):
        btts.append(0)
    else:
        btts.append(-1)
    
    if (final_df_unique['df1_BTTS'].iloc[i] < 50) and (final_df_unique['BTTS'].iloc[i] != 1):
        otts.append(final_df_unique['df2_OTTS'].iloc[i])
    elif (final_df_unique['df1_BTTS'].iloc[i] < 50) and (final_df_unique['BTTS'].iloc[i] == 1):
        otts.append(0)
    else:
        otts.append(-1)
    
    ht_list = [final_df_unique['df1_HT1'].iloc[i], final_df_unique['df1_HTX'].iloc[i], final_df_unique['df1_HT2'].iloc[i]]
    if (ht_list.index(max(ht_list)) == 0) and (final_df_unique['HT1'].iloc[i] == 1):
        ht1p.append(final_df_unique['df2_HT1'].iloc[i])
    elif (ht_list.index(max(ht_list)) == 0) and (final_df_unique['HT1'].iloc[i] != 1):
        ht1p.append(0)
    else:
        ht1p.append(-1)
    
    if (ht_list.index(max(ht_list)) == 1) and (final_df_unique['HTX'].iloc[i] == 1):
        htxp.append(final_df_unique['df2_HTX'].iloc[i])
    elif (ht_list.index(max(ht_list)) == 1) and (final_df_unique['HTX'].iloc[i] != 1):
        htxp.append(0)
    else:
        htxp.append(-1)
    
    if (ht_list.index(max(ht_list)) == 2) and (final_df_unique['HT2'].iloc[i] == 1):
        ht2p.append(final_df_unique['df2_HT2'].iloc[i])
    elif (ht_list.index(max(ht_list)) == 2) and (final_df_unique['HT2'].iloc[i] != 1):
        ht2p.append(0)
    else:
        ht2p.append(-1)
    
    htdc_list = [final_df_unique['df1_HTDC1X'].iloc[i], final_df_unique['df1_HTDC12'].iloc[i], final_df_unique['df1_HTDCX2'].iloc[i]]
    if (htdc_list.index(max(htdc_list)) == 0) and (final_df_unique['HT1X'].iloc[i] == 1):
        ht1xp.append(final_df_unique['df2_HT1X'].iloc[i])
    elif (htdc_list.index(max(htdc_list)) == 0) and (final_df_unique['HT1X'].iloc[i] != 1):
        ht1xp.append(0)
    else:
        ht1xp.append(-1)
    
    if (htdc_list.index(max(htdc_list)) == 1) and (final_df_unique['HT12'].iloc[i] == 1):
        ht12p.append(final_df_unique['df2_HT12'].iloc[i])
    elif (htdc_list.index(max(htdc_list)) == 1) and (final_df_unique['HT12'].iloc[i] != 1):
        ht12p.append(0)
    else:
        ht12p.append(-1)
    
    if (htdc_list.index(max(htdc_list)) == 2) and (final_df_unique['HTX2'].iloc[i] == 1):
        htx2p.append(final_df_unique['df2_HTX2'].iloc[i])
    elif (htdc_list.index(max(htdc_list)) == 2) and (final_df_unique['HTX2'].iloc[i] != 1):
        htx2p.append(0)
    else:
        htx2p.append(-1)
    
    if (final_df_unique['df1_HT0.5O'].iloc[i] >= 50) and (final_df_unique['HT0.5O'].iloc[i] == 1):
        htover05.append(final_df_unique['df2_HT0.5O'].iloc[i])
    elif (final_df_unique['df1_HT0.5O'].iloc[i] >= 50) and (final_df_unique['HT0.5O'].iloc[i] != 1):
        htover05.append(0)
    else:
        htover05.append(-1)
    
    if (final_df_unique['df1_HT0.5O'].iloc[i] < 50) and (final_df_unique['HT0.5O'].iloc[i] != 1):
        htunder05.append(final_df_unique['df2_HT0.5U'].iloc[i])
    elif (final_df_unique['df1_HT0.5O'].iloc[i] < 50) and (final_df_unique['HT0.5O'].iloc[i] == 1):
        htunder05.append(0)
    else:
        htunder05.append(-1)
    
    if (final_df_unique['df1_HT1.5U'].iloc[i] < 50) and (final_df_unique['HT1.5O'].iloc[i] == 1):
        htover15.append(final_df_unique['df2_HT1.5O'].iloc[i])
    elif (final_df_unique['df1_HT1.5U'].iloc[i] < 50) and (final_df_unique['HT1.5O'].iloc[i] != 1):
        htover15.append(0)
    else:
        htover15.append(-1)
    
    if (final_df_unique['df1_HT1.5U'].iloc[i] >= 50) and (final_df_unique['HT1.5O'].iloc[i] != 1):
        htunder15.append(final_df_unique['df2_HT1.5U'].iloc[i])
    elif (final_df_unique['df1_HT1.5U'].iloc[i] >= 50) and (final_df_unique['HT1.5O'].iloc[i] == 1):
        htunder15.append(0)
    else:
        htunder15.append(-1)

final_df_unique['FT1P'], final_df_unique['FTXP'], final_df_unique['FT2P'] = ft1p, ftxp, ft2p
final_df_unique['FT1XP'], final_df_unique['FT12P'], final_df_unique['FTX2P'] = ft1xp, ft12p, ftx2p
final_df_unique['1.5OP'], final_df_unique['1.5UP'], final_df_unique['2.5OP'], final_df_unique['2.5UP'] = over15, under15, over25, under25
final_df_unique['3.5OP'], final_df_unique['3.5UP'], final_df_unique['4.5OP'], final_df_unique['4.5UP'] = over35, under35, over45, under45
final_df_unique['BTTSP'], final_df_unique['OTTSP'] = btts, otts
final_df_unique['HT1P'], final_df_unique['HTXP'], final_df_unique['HT2P'] = ht1p, htxp, ht2p
final_df_unique['HT1XP'], final_df_unique['HT12P'], final_df_unique['HTX2P'] = ht1xp, ht12p, htx2p
final_df_unique['HT0.5OP'], final_df_unique['HT0.5UP'] = htover05, htunder05
final_df_unique['HT1.5OP'], final_df_unique['HT1.5UP'] = htover15, htunder15

print('Games Found: ', len(final_df_unique))
final_df_unique.tail()

Games Found:  2494


Unnamed: 0,df1_League,Home,Away,df1_FT1,df1_FTX,df1_FT2,df1_FTR,df1_DC1X,df1_DC12,df1_DCX2,df1_1.5O,df1_2.5O,df1_3.5U,df1_4.5U,df1_BTTS,df1_HT1,df1_HTX,df1_HT2,df1_HTR,df1_HTDC1X,df1_HTDC12,df1_HTDCX2,df1_HT0.5O,df1_HT1.5U,df1_H0.5O,df1_A0.5O,df1_H1.5O,df1_A1.5O,df1_H2.5U,df1_A2.5U,df2_Home,df2_Away,df2_FT1,df2_FTX,df2_FT2,df2_DC1X,df2_DC12,df2_DCX2,df2_HT1,df2_HTX,df2_HT2,df2_HT1X,df2_HT12,df2_HTX2,df2_BTTS,df2_OTTS,df2_1.5O,df2_1.5U,df2_2.5O,df2_2.5U,df2_3.5O,df2_3.5U,df2_4.5O,df2_4.5U,df2_HT0.5O,df2_HT0.5U,df2_HT1.5O,df2_HT1.5U,Date,League,FT,HT,FTHG,FTAG,FTTG,HTHG,HTAG,HTTG,FT1,FTX,FT2,FT1X,FT12,FTX2,1.5O,1.5U,2.5O,2.5U,3.5O,3.5U,4.5O,4.5U,BTTS,OTTS,HT1,HTX,HT2,HT1X,HT12,HTX2,HT0.5O,HT0.5U,HT1.5O,HT1.5U,FT1P,FTXP,FT2P,FT1XP,FT12P,FTX2P,1.5OP,1.5UP,2.5OP,2.5UP,3.5OP,3.5UP,4.5OP,4.5UP,BTTSP,OTTSP,HT1P,HTXP,HT2P,HT1XP,HT12P,HTX2P,HT0.5OP,HT0.5UP,HT1.5OP,HT1.5UP
2489,UEL,PAOK,FCSB,47.21,27.96,24.82,1-0,75.17,72.03,52.78,65.28,38.53,81.34,92.42,43.5,20.1,57.22,22.65,0-0,77.32,42.75,79.87,58.04,67.84,73.83,58.9,38.75,22.36,84.76,93.89,PAOK,FCSB,1.67,4.0,4.75,1.18,1.22,2.15,2.3,2.2,5.5,1.13,1.62,1.57,1.95,1.8,1.3,3.5,2.01,1.89,3.5,1.3,6.5,1.11,1.4,2.75,3.0,1.36,10/3/2024,UEL,0 - 1,(0-1),0.0,1.0,1.0,0.0,1.0,1.0,0,0,1,0,1,1,0,1,0,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0.0,-1.0,-1.0,0.0,-1.0,-1.0,0.0,-1.0,-1.0,1.89,-1.0,1.3,-1.0,1.11,-1.0,1.8,-1.0,0.0,-1.0,-1.0,-1.0,1.57,1.4,-1.0,-1.0,1.36
2490,Scotland2,Falkirk,Ayr Utd,1.21,1.89,66.58,1-7,3.1,67.79,68.47,69.52,68.92,2.5,6.33,54.44,0.15,0.46,11.99,0-4,0.61,12.14,12.45,12.57,0.24,54.5,69.6,31.37,69.08,55.93,2.46,Falkirk,Ayr,1.85,3.6,4.0,1.22,1.29,1.83,2.4,2.2,4.33,1.2,1.62,1.53,1.73,2.0,1.25,3.75,1.85,2.0,3.0,1.36,5.5,1.13,1.36,3.0,2.75,1.4,2024-10-05 00:00:00,Scotland2,2 - 0,(1-0),2.0,0.0,2.0,1.0,0.0,1.0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,-1.0,-1.0,0.0,-1.0,-1.0,0.0,1.25,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,0.0,-1.0,0.0,0.0,-1.0
2491,Scotland2,Queens Park,Hamilton,0.69,1.15,56.34,1-8,1.84,57.03,57.49,58.1,57.79,1.38,3.78,45.44,0.07,0.26,8.59,0-4,0.33,8.66,8.85,8.9,0.15,45.47,58.15,26.13,57.89,46.75,1.29,Queen's Park,Hamilton,2.15,3.3,3.4,1.3,1.33,1.67,2.63,2.25,3.5,1.29,1.57,1.44,1.57,2.25,1.2,4.33,1.73,2.08,2.63,1.44,4.5,1.17,1.33,3.25,2.63,1.44,2024-10-05 00:00:00,Scotland2,1 - 0,(0-0),1.0,0.0,1.0,0.0,0.0,0.0,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,-1.0,-1.0,0.0,-1.0,-1.0,0.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,-1.0,2.25,-1.0,-1.0,0.0,-1.0,-1.0,1.44,-1.0,3.25,0.0,-1.0
2492,Scotland3,Inverness,Stenhousemuir,0.64,1.27,69.37,1-7,1.91,70.01,70.64,71.04,70.15,3.6,8.67,47.88,0.11,0.42,14.77,0-4,0.53,14.88,15.19,15.25,0.38,47.94,71.21,21.88,70.63,63.95,2.7,Inverness,Stenhousemuir,1.95,3.5,3.25,1.25,1.22,1.7,2.6,2.1,4.0,1.22,1.67,1.5,1.83,1.83,1.3,3.4,2.0,1.85,3.4,1.3,6.5,1.1,1.4,2.75,3.0,1.36,2024-09-28 00:00:00,Scotland3,0 - 0,(0-0),0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,-1.0,-1.0,0.0,-1.0,-1.0,1.7,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,-1.0,1.83,-1.0,-1.0,0.0,-1.0,-1.0,1.5,-1.0,2.75,0.0,-1.0
2493,Scotland3,Dumbarton,Montrose,0.37,0.74,55.74,1-8,1.11,56.11,56.48,56.76,56.4,1.61,4.31,40.02,0.05,0.21,9.83,0-4,0.26,9.88,10.04,10.07,0.2,40.04,56.82,19.55,56.59,49.77,1.2,Dumbarton,Montrose,2.3,3.3,2.6,1.4,1.25,1.5,3.0,2.1,3.25,1.33,1.67,1.36,1.73,2.0,1.29,3.5,1.9,1.95,3.25,1.33,5.5,1.13,1.4,2.75,2.75,1.4,2024-09-21 00:00:00,Scotland3,0 - 1,(0-1),0.0,1.0,1.0,0.0,1.0,1.0,0,0,1,0,1,1,0,1,0,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,-1.0,-1.0,2.6,-1.0,-1.0,1.5,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,-1.0,2.0,-1.0,-1.0,3.25,-1.0,-1.0,1.36,-1.0,0.0,0.0,-1.0


# Checking For ROI of Profit Columns

In [7]:
# Select columns that end with 'P'
columns = [col for col in final_df_unique.columns if col.endswith('P')]

# Initialize lists to store results and games
results, games_list = [],  []

for col in columns:
    my_df = final_df_unique[final_df_unique[col] >= 0]
    numerator = np.sum(my_df[col]) - len(my_df)
    result = round(numerator / len(my_df) * 100, 2)
    
    # Append results and games to respective lists
    results.append(result)
    games_list.append(len(my_df))

# Convert results to a DataFrame with an additional column for Games
results_df = pd.DataFrame({
    'Column': columns,
    'ROI': results,
    'Games': games_list
})
results_df

Unnamed: 0,Column,ROI,Games
0,FT1P,-3.09,1378
1,FTXP,-7.21,229
2,FT2P,-17.19,887
3,FT1XP,-2.82,1133
4,FT12P,-6.09,643
5,FTX2P,-9.24,718
6,1.5OP,-5.61,2241
7,1.5UP,1.93,253
8,2.5OP,-6.32,1182
9,2.5UP,-7.11,1312


# ROI of Profit Columns According To Leagues

In [8]:
# Step 1: Filter leagues with at least 10 games
league_counts = final_df_unique['League'].value_counts()
leagues_with_10_games = league_counts[league_counts >= 10].index
filtered_df = final_df_unique[final_df_unique['League'].isin(leagues_with_10_games)]

# Group by 'League' and calculate results for each group
grouped_results = {}
for league, group in filtered_df.groupby('League'):
    group_results = {}
    for col in columns:
        my_df = group[group[col] >= 0]
        numerator = np.sum(my_df[col]) - len(my_df)
        group_results[col] = round(numerator / len(my_df) * 100, 2)
    # Add the number of games for this league
    group_results['Games'] = round(len(group),2)
    grouped_results[league] = group_results

# Convert grouped results to a DataFrame for better visualization
grouped_results_df = pd.DataFrame(grouped_results).T

# Define a function to apply conditional formatting
def highlight_positive(val):
    # Highlight background to red if the value is positive
    color = 'background-color: red' if isinstance(val, (int, float)) and val > 0 else ''
    return color

# Apply the function to the DataFrame
styled_df = (
    grouped_results_df.style
    .applymap(highlight_positive)
    .format("{:.2f}")  # Format only numeric columns, excluding 'Games'
)

# Save the styled DataFrame to Excel
styled_df.to_excel("ROI_leagues.xlsx", index=True)

# Display the styled DataFrame
styled_df

Unnamed: 0,FT1P,FTXP,FT2P,FT1XP,FT12P,FTX2P,1.5OP,1.5UP,2.5OP,2.5UP,3.5OP,3.5UP,4.5OP,4.5UP,BTTSP,OTTSP,HT1P,HTXP,HT2P,HT1XP,HT12P,HTX2P,HT0.5OP,HT0.5UP,HT1.5OP,HT1.5UP,Games
Austria,-26.9,-100.0,6.8,-9.71,13.41,-32.8,-1.48,300.0,16.68,-17.77,-2.92,14.12,-100.0,2.75,-18.65,-7.0,-51.2,0.25,64.17,1.23,,7.05,-3.7,21.5,9.94,17.32,53.0
Belgium,1.74,7.0,-45.25,8.07,-7.64,-20.61,1.0,-59.38,-10.66,-12.62,-33.0,3.72,-100.0,0.04,-27.09,-0.67,19.23,-8.34,17.05,-1.85,-70.0,-1.59,-1.1,-21.55,-74.0,-11.97,83.0
Brazil,-22.69,-100.0,-17.67,-4.39,2.31,-5.7,-4.65,175.0,-3.96,-12.17,,-4.46,,-1.31,0.31,-20.74,-17.08,4.38,,-6.77,,6.48,-1.29,41.29,-100.0,-9.69,105.0
Denmark,7.81,75.0,-57.17,10.53,-22.62,-11.25,-1.24,,-9.97,-2.0,0.44,-10.0,33.33,-1.7,7.15,-1.62,45.28,-5.0,-64.44,11.96,-24.25,-0.19,0.52,50.0,23.79,11.19,46.0
England,-10.61,-8.33,-30.44,-1.37,24.37,-6.36,-4.07,-100.0,-1.47,-4.6,-16.73,-8.07,-100.0,-3.3,9.96,-13.34,-23.66,6.39,28.33,-9.76,28.75,-14.53,0.09,-14.29,-1.32,2.16,84.0
England2,-7.18,-5.71,-35.78,3.98,-2.12,1.8,-10.49,36.3,-20.02,8.65,-83.0,-0.15,-100.0,2.42,-16.52,0.69,19.05,-14.98,-77.27,-5.56,,-20.44,-7.19,9.84,6.77,0.85,133.0
England3,12.4,-2.0,-12.96,-8.78,-2.08,-11.56,-4.78,-10.88,9.11,5.09,-33.35,-4.05,-58.33,-5.6,1.1,4.91,-12.45,-24.2,4.13,-6.59,15.0,-10.71,-2.29,-20.12,0.36,-9.39,138.0
England4,-29.59,63.75,-2.44,-9.3,-10.66,-5.2,-14.01,0.47,-17.27,0.15,9.58,3.53,116.67,3.1,-25.49,-12.68,-18.76,-4.06,-42.5,-14.14,-26.8,-17.27,-11.12,-7.24,-17.74,-1.52,119.0
England5,-4.9,-100.0,-29.0,-0.11,-14.31,-14.03,-4.53,-59.38,-8.78,-26.19,-53.6,-12.86,-100.0,-3.38,-12.28,-25.34,3.45,-20.15,-7.67,-6.35,-41.37,-10.83,-4.24,-56.69,-18.0,-7.01,126.0
France,10.82,-34.0,-6.48,-7.82,1.71,-10.36,-0.48,-100.0,-1.14,-0.97,-16.86,-4.53,-38.14,3.14,-22.32,-11.29,-8.93,-37.68,-14.14,-15.82,53.0,-23.61,0.2,-100.0,32.92,0.48,76.0


# Creating Optimum Threshold for Each Prediction Column

In [9]:
# Assuming `df` is your DataFrame and it contains the columns for percentages and correctness
def calculate_threshold(percentages, predictions):
    # Ensure inputs are pandas Series
    percentages = pd.Series(percentages)
    predictions = pd.Series(predictions)
    
    thresholds = percentages.unique()
    best_threshold = 0
    best_j_stat = -np.inf  # Start with negative infinity for comparison
    
    for threshold in thresholds:
        # Predict 1s based on the threshold
        predicted_1s = (percentages >= threshold).astype(int)
        
        # Calculate true positives, true negatives, false positives, false negatives
        true_positives = ((predicted_1s == 1) & (predictions == 1)).sum()
        true_negatives = ((predicted_1s == 0) & (predictions == 0)).sum()
        false_positives = ((predicted_1s == 1) & (predictions == 0)).sum()
        false_negatives = ((predicted_1s == 0) & (predictions == 1)).sum()
        
        # Calculate Sensitivity (Recall) and Specificity
        sensitivity = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
        specificity = true_negatives / (true_negatives + false_positives) if (true_negatives + false_positives) > 0 else 0
        
        # Calculate Youden's J Statistic
        j_stat = sensitivity + specificity - 1
        
        # Update best threshold if J statistic improves
        if j_stat > best_j_stat:
            best_j_stat = j_stat
            best_threshold = threshold
    
    return best_threshold, round(best_j_stat, 2)

# Select only numeric columns
numeric_columns = final_df_unique.select_dtypes(include=[np.number])

# Remove rows where any numeric value is greater than 100
final_df_unique = final_df_unique[(numeric_columns <= 100).all(axis=1)]

#Selecting dataframes with model predictions
ft1df = final_df_unique[final_df_unique['df1_FT1'] >= final_df_unique[['df1_FTX', 'df1_FT2']].max(axis=1)]
ftxdf = final_df_unique[final_df_unique['df1_FTX'] >= final_df_unique[['df1_FT1', 'df1_FT2']].max(axis=1)]
ft2df = final_df_unique[final_df_unique['df1_FT2'] >= final_df_unique[['df1_FTX', 'df1_FT1']].max(axis=1)]
dc1xdf = final_df_unique[final_df_unique['df1_DC1X'] >= final_df_unique[['df1_DC12', 'df1_DCX2']].max(axis=1)]
dc12df = final_df_unique[final_df_unique['df1_DC12'] >= final_df_unique[['df1_DC1X', 'df1_DCX2']].max(axis=1)]
dcx2df = final_df_unique[final_df_unique['df1_DCX2'] >= final_df_unique[['df1_DC1X', 'df1_DC12']].max(axis=1)]
over15df, under15df = final_df_unique[final_df_unique['df1_1.5O'] >= 50], final_df_unique[final_df_unique['df1_1.5O'] < 50]
over25df, under25df = final_df_unique[final_df_unique['df1_2.5O'] >= 50], final_df_unique[final_df_unique['df1_2.5O'] < 50]
over35df, under35df = final_df_unique[final_df_unique['df1_3.5U'] < 50], final_df_unique[final_df_unique['df1_3.5U'] >= 50]
over45df, under45df = final_df_unique[final_df_unique['df1_4.5U'] < 50], final_df_unique[final_df_unique['df1_4.5U'] >= 50]
bttsdf, ottsdf = final_df_unique[final_df_unique['df1_BTTS'] >= 50], final_df_unique[final_df_unique['df1_BTTS'] < 50]
ht1df = final_df_unique[final_df_unique['df1_HT1'] >= final_df_unique[['df1_HTX', 'df1_HT2']].max(axis=1)]
htxdf = final_df_unique[final_df_unique['df1_HTX'] >= final_df_unique[['df1_HT1', 'df1_HT2']].max(axis=1)]
ht2df = final_df_unique[final_df_unique['df1_HT2'] >= final_df_unique[['df1_HT1', 'df1_HTX']].max(axis=1)]
ht1xdf = final_df_unique[final_df_unique['df1_HTDC1X'] >= final_df_unique[['df1_HTDC12', 'df1_HTDCX2']].max(axis=1)]
ht12df = final_df_unique[final_df_unique['df1_HTDC12'] >= final_df_unique[['df1_HTDC1X', 'df1_HTDCX2']].max(axis=1)]
htx2df = final_df_unique[final_df_unique['df1_HTDCX2'] >= final_df_unique[['df1_HTDC1X', 'df1_HTDC12']].max(axis=1)]
htover05df, htunder05df = final_df_unique[final_df_unique['df1_HT0.5O'] >= 50], final_df_unique[final_df_unique['df1_HT0.5O'] < 50]
htover15df, htunder15df = final_df_unique[final_df_unique['df1_HT1.5U'] < 50], final_df_unique[final_df_unique['df1_HT1.5U'] >= 50]

ft1t, ft1a = calculate_threshold(ft1df['df1_FT1'], ft1df['FT1'])
ftxt, ftxa = calculate_threshold(ftxdf['df1_FTX'], ftxdf['FTX'])
ft2t, ft2a = calculate_threshold(ft2df['df1_FT2'], ft2df['FT2'])
ft1xt, ft1xa = calculate_threshold(dc1xdf['df1_DC1X'], dc1xdf['FT1X'])
ft12t, ft12a = calculate_threshold(dc12df['df1_DC12'], dc12df['FT12'])
ftx2t, ftx2a = calculate_threshold(dcx2df['df1_DCX2'], dcx2df['FTX2'])
over15t, over15a = calculate_threshold(over15df['df1_1.5O'], over15df['1.5O'])
under15t, under15a = calculate_threshold(under15df['df1_1.5O'], under15df['1.5U'])
over25t, over25a = calculate_threshold(over25df['df1_2.5O'], over25df['2.5O'])
under25t, under25a = calculate_threshold(under25df['df1_2.5O'], under25df['2.5U'])
over35t, over35a = calculate_threshold(over35df['df1_3.5U'], over35df['3.5O'])
under35t, under35a = calculate_threshold(under35df['df1_3.5U'], under35df['3.5U'])
over45t, over45a = calculate_threshold(over45df['df1_4.5U'], over45df['4.5O'])
under45t, under45a = calculate_threshold(under45df['df1_4.5U'], under45df['4.5U'])
bttst, bttsa = calculate_threshold(bttsdf['df1_BTTS'], bttsdf['BTTS'])
ottst, ottsa = calculate_threshold(ottsdf['df1_BTTS'], ottsdf['OTTS'])
ht1t, ht1a = calculate_threshold(ht1df['df1_HT1'], ht1df['HT1'])
htxt, htxa = calculate_threshold(htxdf['df1_HTX'], htxdf['HTX'])
ht2t, ht2a = calculate_threshold(ht2df['df1_HT2'], ht2df['HT2'])
ht1xt, ht1xa = calculate_threshold(ht1xdf['df1_HTDC1X'], ht1xdf['HT1X'])
ht12t, ht12a = calculate_threshold(ht12df['df1_HTDC12'], ht12df['HT12'])
htx2t, htx2a = calculate_threshold(htx2df['df1_HTDCX2'], htx2df['HTX2'])
htover05t, htover05a = calculate_threshold(htover05df['df1_HT0.5O'], htover05df['HT0.5O'])
htunder05t, htunder05a = calculate_threshold(htunder05df['df1_HT0.5O'], htunder05df['HT0.5U'])
htover15t, htover15a = calculate_threshold(htover15df['df1_HT1.5U'], htover15df['HT1.5O'])
htunder15t, htunder15a = calculate_threshold(htunder15df['df1_HT1.5U'], htunder15df['HT1.5U'])

new_ft1df, new_ftxdf, new_ft2df = ft1df[ft1df['df1_FT1'] >= ft1t],ftxdf[ftxdf['df1_FTX'] >= ftxt],ft2df[ft2df['df1_FT2'] >= ft2t]
new_ft1xdf, new_ft12df, new_ftx2df = dc1xdf[dc1xdf['df1_DC1X'] >= ft1xt],dc12df[dc12df['df1_DC12'] >= ft12t],dcx2df[dcx2df['df1_DCX2'] >= ftx2t] 
new_over15, new_under15 = over15df[over15df['df1_1.5O'] >= over15t], under15df[under15df['df1_1.5O'] <= under15t]
new_over25, new_under25 = over25df[over25df['df1_2.5O'] >= over25t], under25df[under25df['df1_2.5O'] <= under25t]
new_over35, new_under35 = over35df[over35df['df1_3.5U'] <= over35t], under35df[under35df['df1_3.5U'] >= under35t]
new_over45, new_under45 = over45df[over45df['df1_4.5U'] <= over45t], under45df[under45df['df1_4.5U'] >= under45t]
new_btts, new_otts = bttsdf[bttsdf['df1_BTTS'] >= bttst], ottsdf[ottsdf['df1_BTTS'] <= ottst]
new_ht1df, new_htxdf, new_ht2df = ht1df[ht1df['df1_HT1'] >= ht1t],htxdf[htxdf['df1_HTX'] >= htxt],ht2df[ht2df['df1_HT2'] >= ht2t]
new_ht1xdf, new_ht12df, new_htx2df = ht1xdf[ht1xdf['df1_HTDC1X'] >= ht1xt],ht12df[ht12df['df1_HTDC12'] >= ht12t],htx2df[htx2df['df1_HTDCX2'] >= htx2t]
new_htover05, new_htunder05 = htover05df[htover05df['df1_HT0.5O'] >= htover05t], htunder05df[htunder05df['df1_HT0.5O'] <= htunder05t]
new_htover15, new_htunder15 = htover15df[htover15df['df1_HT1.5U'] <= htover15t], htunder15df[htunder15df['df1_HT1.5U'] >= htunder15t]

# Store the results in a list
results = [
('FT1', ft1t, ft1a, len(new_ft1df), round(len(new_ft1df)/len(ft1df)*100,2), np.sum(new_ft1df['FT1P']) - len(new_ft1df)),
('FTX', ftxt, ftxa, len(new_ftxdf), round(len(new_ftxdf)/len(ftxdf)*100,2), np.sum(new_ftxdf['FTXP']) - len(new_ftxdf)),
('FT2', ft2t, ft2a, len(new_ft2df), round(len(new_ft2df)/len(ft2df)*100,2), np.sum(new_ft2df['FT2P']) - len(new_ft2df)),
('FT1X', ft1xt, ft1xa, len(new_ft1xdf), round(len(new_ft1xdf)/len(dc1xdf)*100,2), np.sum(new_ft1xdf['FT1XP']) - len(new_ft1xdf)),
('FT12', ft12t, ft12a, len(new_ft12df), round(len(new_ft12df)/len(dc12df)*100,2), np.sum(new_ft12df['FT12P']) - len(new_ft12df)),
('FTX2', ftx2t, ftx2a, len(new_ftx2df), round(len(new_ftx2df)/len(dcx2df)*100,2), np.sum(new_ftx2df['FTX2P']) - len(new_ftx2df)),
('1.5O', over15t, over15a, len(new_over15), round(len(new_over15)/len(over15df)*100,2), np.sum(new_over15['1.5OP'])-len(new_over15)),
('1.5U', under15t, under15a, len(new_under15), round(len(new_under15)/len(under15df)*100,2), np.sum(new_under15['1.5UP'])-len(new_under15)),
('2.5O', over25t, over25a, len(new_over25), round(len(new_over25)/len(over25df)*100,2), np.sum(new_over25['2.5OP'])-len(new_over25)),
('2.5U', under25t, under25a, len(new_under25), round(len(new_under25)/len(under25df)*100,2), np.sum(new_under25['2.5UP'])-len(new_under25)),
('3.5O', over35t, over35a, len(new_over35), round(len(new_over35)/len(over35df)*100,2), np.sum(new_over35['3.5OP'])-len(new_over35)),
('3.5U', under35t, under35a, len(new_under35), round(len(new_under35)/len(under35df)*100,2), np.sum(new_under35['3.5UP'])-len(new_under35)),
('4.5O', over45t, over45a, len(new_over45), round(len(new_over45)/len(over45df)*100,2), np.sum(new_over45['4.5OP'])-len(new_over45)),
('4.5U', under45t, under45a, len(new_under45), round(len(new_under45)/len(under45df)*100,2), np.sum(new_under45['4.5UP'])-len(new_under45)),
('BTTS', bttst, bttsa, len(new_btts), round(len(new_btts)/len(bttsdf)*100,2), np.sum(new_btts['BTTSP'])-len(new_btts)),
('OTTS', ottst, ottsa, len(new_otts), round(len(new_otts)/len(ottsdf)*100,2), np.sum(new_otts['OTTSP'])-len(new_otts)),
('HT1', ht1t, ht1a, len(new_ht1df), round(len(new_ht1df)/len(ht1df)*100,2), np.sum(new_ht1df['HT1P']) - len(new_ht1df)),
('HTX', htxt, htxa, len(new_htxdf), round(len(new_htxdf)/len(htxdf)*100,2), np.sum(new_htxdf['HTXP']) - len(new_htxdf)),
('HT2', ht2t, ht2a, len(new_ht2df), round(len(new_ht2df)/len(ht2df)*100,2), np.sum(new_ht2df['HT2P']) - len(new_ht2df)),
('HT1X', ht1xt, ht1xa, len(new_ht1xdf), round(len(new_ht1xdf)/len(ht1xdf)*100,2), np.sum(new_ht1xdf['HT1XP']) - len(new_ht1xdf)),
('HT12', ht12t, ht12a, len(new_ht12df), round(len(new_ht12df)/len(ht12df)*100,2), np.sum(new_ht12df['HT12P']) - len(new_ht12df)),
('HTX2', htx2t, htx2a, len(new_htx2df), round(len(new_htx2df)/len(htx2df)*100,2), np.sum(new_htx2df['HTX2P']) - len(new_htx2df)),
('HT0.5O', htover05t, htover05a, len(new_htover05), round(len(new_htover05)/len(htover05df)*100,2), np.sum(new_htover05['HT0.5OP'])-len(new_htover05)),
('HT0.5U', htunder05t, htunder05a, len(new_htunder05), round(len(new_htunder05)/len(htunder05df)*100,2), np.sum(new_htunder05['HT0.5UP'])-len(new_htunder05)),
('HT1.5O', htover15t, htover15a, len(new_htover15), round(len(new_htover15)/len(htover15df)*100,2), np.sum(new_htover15['HT1.5OP'])-len(new_htover15)),
('HT1.5U', htunder15t, htunder15a, len(new_htunder15), round(len(new_htunder15)/len(htunder15df)*100,2), np.sum(new_htunder15['HT1.5UP'])-len(new_htunder15))
]

# Create a DataFrame from the results
results_df = pd.DataFrame(results, columns=['Prediction', 'Threshold', 'J-Stat', 'Games', 'Games%', 'Profit'])
results_df['ROI'] = round(results_df['Profit'] / results_df['Games'] * 100, 2)
print('Number of matches: ', len(final_df_unique))
results_df

Number of matches:  2448


Unnamed: 0,Prediction,Threshold,J-Stat,Games,Games%,Profit,ROI
0,FT1,59.31,0.19,597,43.99,24.61,4.12
1,FTX,45.38,0.1,84,39.62,8.1,9.64
2,FT2,60.99,0.12,246,27.92,-27.46,-11.16
3,FT1X,87.6,0.18,445,40.38,0.05,0.01
4,FT12,77.4,0.05,299,46.57,-10.72,-3.59
5,FTX2,82.4,0.13,363,51.56,-24.2,-6.67
6,1.5O,79.73,0.07,900,40.93,-43.57,-4.84
7,1.5U,38.43,0.09,92,36.95,-9.31,-10.12
8,2.5O,70.66,0.06,383,32.93,-14.41,-3.76
9,2.5U,47.5,0.01,1174,91.36,-99.39,-8.47


# Testing Best / Most Profitable Model Predictions

In [10]:
# Select columns starting with 'df1_'
predictions = ['df1_FT1', 'df1_FTX', 'df1_FT2', 'df1_DC1X', 'df1_DC12', 'df1_DCX2', 
               'df1_1.5O', 'df1_2.5O', 'df1_3.5U', 'df1_4.5U', 'df1_BTTS', 
               'df1_HT1', 'df1_HTX', 'df1_HT2', 'df1_HTDC1X', 'df1_HTDC12', 'df1_HTDCX2', 
               'df1_HT0.5O', 'df1_HT1.5U']

# Select columns starting with 'df2_'
results = ['FT1', 'FTX', 'FT2', 'FT1X', 'FT12', 'FTX2', 
           '1.5O', '2.5O', '3.5U', '4.5U', 'BTTS',
            'HT1', 'HTX', 'HT2', 'HT1X', 'HT12', 'HTX2', 
            'HT0.5O', 'df2_HT1.5U']

# Select columns ending with 'P'
profits = ['FT1P', 'FTXP', 'FT2P', 'FT1XP', 'FT12P', 'FTX2P', 
           '1.5OP', '2.5OP', '3.5UP', '4.5UP', 'BTTSP',
            'HT1P', 'HTXP', 'HT2P', 'HT1XP', 'HT12P', 'HTX2P', 
            'HT0.5OP', 'HT1.5UP']

bet, percentage, profit = [], [], []

for i in range(len(final_df_unique)):
    my_list = []
    for j in predictions:
        my_list.append(final_df_unique[j].iloc[i])
    percentage.append(max(my_list))
    max_index = my_list.index(max(my_list))
    bet.append(results[max_index])
    profit_column = profits[max_index]
    profit.append(final_df_unique[profit_column].iloc[i])

# Create a DataFrame
model_recs = pd.DataFrame({
    'League': final_df_unique['df1_League'],
    'Home': final_df_unique['Home'],
    'Away': final_df_unique['Away'],
    'BET': bet,
    'Percentage': percentage,
    'Profit': profit
})


print('Matches found: ', len(final_df_unique))
print(f"Correct Predictions: {len(model_recs[model_recs['Profit'] > 0])/len(model_recs)*100}")
print(f"Profit: {round(sum(model_recs['Profit']) - len(model_recs),2)} ROI: {round((sum(model_recs['Profit']) - len(model_recs)) / len(model_recs) * 100, 2)}%")
model_recs.tail()

Matches found:  2448
Correct Predictions: 82.6797385620915
Profit: -102.47 ROI: -4.19%


Unnamed: 0,League,Home,Away,BET,Percentage,Profit
2489,UEL,PAOK,FCSB,4.5U,92.42,1.11
2490,Scotland2,Falkirk,Ayr Utd,1.5O,69.52,1.25
2491,Scotland2,Queens Park,Hamilton,1.5O,58.1,0.0
2492,Scotland3,Inverness,Stenhousemuir,1.5O,71.04,0.0
2493,Scotland3,Dumbarton,Montrose,1.5O,56.76,0.0


In [11]:
final_df_unique['df1_OTTS'] = 100 - final_df_unique['df1_BTTS']
final_df_unique['df1_1.5U'] = 100 - final_df_unique['df1_1.5O']
final_df_unique['df1_2.5U'] = 100 - final_df_unique['df1_2.5O']
final_df_unique['df1_3.5O'] = 100 - final_df_unique['df1_3.5U']
final_df_unique['df1_4.5O'] = 100 - final_df_unique['df1_4.5U']
final_df_unique['df1_HT0.5U'] = 100 - final_df_unique['df1_HT0.5O']
final_df_unique['df1_HT1.5O'] = 100 - final_df_unique['df1_HT1.5U'] 

# Select columns starting with 'df1_'
predictions = ['df1_FT1', 'df1_FTX', 'df1_FT2', 'df1_DC1X', 'df1_DC12', 'df1_DCX2', 
               'df1_1.5O', 'df1_1.5U', 'df1_2.5O','df1_2.5U','df1_3.5O', 'df1_3.5U', 
               'df1_4.5O', 'df1_4.5U', 'df1_BTTS', 'df1_OTTS',
               'df1_HT1', 'df1_HTX', 'df1_HT2', 'df1_HTDC1X', 'df1_HTDC12', 'df1_HTDCX2', 
               'df1_HT0.5O', 'df1_HT0.5U', 'df1_HT1.5O', 'df1_HT1.5U']

# Select columns starting with 'df2_'
odds = ['df2_FT1', 'df2_FTX', 'df2_FT2', 'df2_DC1X', 'df2_DC12', 'df2_DCX2', 
           'df2_1.5O', 'df2_1.5U', 'df2_2.5O','df2_2.5U', 'df2_3.5O','df2_3.5U', 
           'df2_4.5O', 'df2_4.5U', 'df2_BTTS', 'df2_OTTS',
            'df2_HT1', 'df2_HTX', 'df2_HT2', 'df2_HT1X', 'df2_HT12', 'df2_HTX2', 
            'df2_HT0.5O', 'df2_HT0.5U', 'df2_HT1.5O', 'df2_HT1.5U']

# Select columns ending with 'P'
profit = ['FT1P', 'FTXP', 'FT2P', 'FT1XP', 'FT12P', 'FTX2P', 
           '1.5OP', '1.5UP', '2.5OP', '2.5UP', '3.5OP', '3.5UP', '4.5OP', '4.5UP', 
           'BTTSP', 'OTTSP', 'HT1P', 'HTXP', 'HT2P', 'HT1XP', 'HT12P', 'HTX2P', 
            'HT0.5OP', 'HT0.5UP', 'HT1.5OP','HT1.5UP']

bets, percentages, profits, difference = [], [], [], [] 
for i in range(len(final_df_unique)):
    my_list = []
    valid_indices = []  # To keep track of indices where profit is not negative
    for j in range(len(predictions)):
        pred_column = predictions[j]
        odds_column = odds[j]
        profit_column = profit[j]  # Corresponding profit column
        
        # Calculate the value
        my_value = (100 / final_df_unique[pred_column].iloc[i]) - final_df_unique[odds_column].iloc[i]
        
        # Only add to the list if the corresponding profit is non-negative
        if final_df_unique[profit_column].iloc[i] >= 0:
            my_list.append(my_value)
            valid_indices.append(j)
        else:
            my_list.append(float('-inf'))  # Set to negative infinity to ignore in max()
    
    if valid_indices:  # Ensure there is at least one valid index
        max_index = my_list.index(max(my_list))
        rec_bet = profit[max_index]
        percent_bet = predictions[max_index]
        bets.append(rec_bet)
        percentages.append(final_df_unique[percent_bet].iloc[i])
        rec_profit = profit[max_index]
        profits.append(final_df_unique[rec_profit].iloc[i])
        difference.append(round(max(my_list), 2))
    else:
        # Handle case where no valid profits are found for this match
        bets.append(None)
        percentages.append(None)
        profits.append(None)
        difference.append(None)

# Create a DataFrame
model_recs = pd.DataFrame({
    'League': final_df_unique['df1_League'],
    'Home': final_df_unique['Home'],
    'Away': final_df_unique['Away'],
    'BET': bets,
    'Percentage': percentages,
    'Profit': profits,
    'Difference': difference
}).dropna()  # Drop rows with None values

print('Matches found: ', len(final_df_unique))
print(f"Correct Predictions: {len(model_recs[model_recs['Profit'] > 0])/len(model_recs)*100}")
print(f"Profit: {round(sum(model_recs['Profit']) - len(model_recs), 2)} ROI: {round((sum(model_recs['Profit']) - len(model_recs)) / len(model_recs) * 100, 2)}%")
model_recs.tail()

Matches found:  2448
Correct Predictions: 70.13888888888889
Profit: -68.61 ROI: -2.8%


Unnamed: 0,League,Home,Away,BET,Percentage,Profit,Difference
2489,UEL,PAOK,FCSB,FT1P,47.21,0.0,0.45
2490,Scotland2,Falkirk,Ayr Utd,HTX2P,12.45,0.0,6.5
2491,Scotland2,Queens Park,Hamilton,HTX2P,8.85,1.44,9.86
2492,Scotland3,Inverness,Stenhousemuir,HTX2P,15.19,1.5,5.08
2493,Scotland3,Dumbarton,Montrose,HTX2P,10.04,1.36,8.6


# Combining Different Bets

In [12]:
# Define the list of base columns and multiplier columns
base_columns = ['df1_FT1', 'df1_FTX', 'df1_FT2']  
multiplier_columns = ['df1_1.5O', 'df1_2.5O', 'df1_2.5U', 'df1_3.5U', 'df1_4.5U']

# Nested loop: Iterate over each base column and multiplier column
for base_column in base_columns:
    for multiplier in multiplier_columns:
        # Create a new column name dynamically
        new_column = f"{base_column}/{multiplier.split('_')[1]}"
        
        # Perform the calculation and assign it to the new column
        final_df_unique[new_column] = round(final_df_unique[base_column] * final_df_unique[multiplier] / 100, 2)

final_df_unique.tail()

Unnamed: 0,df1_League,Home,Away,df1_FT1,df1_FTX,df1_FT2,df1_FTR,df1_DC1X,df1_DC12,df1_DCX2,df1_1.5O,df1_2.5O,df1_3.5U,df1_4.5U,df1_BTTS,df1_HT1,df1_HTX,df1_HT2,df1_HTR,df1_HTDC1X,df1_HTDC12,df1_HTDCX2,df1_HT0.5O,df1_HT1.5U,df1_H0.5O,df1_A0.5O,df1_H1.5O,df1_A1.5O,df1_H2.5U,df1_A2.5U,df2_Home,df2_Away,df2_FT1,df2_FTX,df2_FT2,df2_DC1X,df2_DC12,df2_DCX2,df2_HT1,df2_HTX,df2_HT2,df2_HT1X,df2_HT12,df2_HTX2,df2_BTTS,df2_OTTS,df2_1.5O,df2_1.5U,df2_2.5O,df2_2.5U,df2_3.5O,df2_3.5U,df2_4.5O,df2_4.5U,df2_HT0.5O,df2_HT0.5U,df2_HT1.5O,df2_HT1.5U,Date,League,FT,HT,FTHG,FTAG,FTTG,HTHG,HTAG,HTTG,FT1,FTX,FT2,FT1X,FT12,FTX2,1.5O,1.5U,2.5O,2.5U,3.5O,3.5U,4.5O,4.5U,BTTS,OTTS,HT1,HTX,HT2,HT1X,HT12,HTX2,HT0.5O,HT0.5U,HT1.5O,HT1.5U,FT1P,FTXP,FT2P,FT1XP,FT12P,FTX2P,1.5OP,1.5UP,2.5OP,2.5UP,3.5OP,3.5UP,4.5OP,4.5UP,BTTSP,OTTSP,HT1P,HTXP,HT2P,HT1XP,HT12P,HTX2P,HT0.5OP,HT0.5UP,HT1.5OP,HT1.5UP,df1_OTTS,df1_1.5U,df1_2.5U,df1_3.5O,df1_4.5O,df1_HT0.5U,df1_HT1.5O,df1_FT1/1.5O,df1_FT1/2.5O,df1_FT1/2.5U,df1_FT1/3.5U,df1_FT1/4.5U,df1_FTX/1.5O,df1_FTX/2.5O,df1_FTX/2.5U,df1_FTX/3.5U,df1_FTX/4.5U,df1_FT2/1.5O,df1_FT2/2.5O,df1_FT2/2.5U,df1_FT2/3.5U,df1_FT2/4.5U
2489,UEL,PAOK,FCSB,47.21,27.96,24.82,1-0,75.17,72.03,52.78,65.28,38.53,81.34,92.42,43.5,20.1,57.22,22.65,0-0,77.32,42.75,79.87,58.04,67.84,73.83,58.9,38.75,22.36,84.76,93.89,PAOK,FCSB,1.67,4.0,4.75,1.18,1.22,2.15,2.3,2.2,5.5,1.13,1.62,1.57,1.95,1.8,1.3,3.5,2.01,1.89,3.5,1.3,6.5,1.11,1.4,2.75,3.0,1.36,10/3/2024,UEL,0 - 1,(0-1),0.0,1.0,1.0,0.0,1.0,1.0,0,0,1,0,1,1,0,1,0,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,0.0,-1.0,-1.0,0.0,-1.0,-1.0,0.0,-1.0,-1.0,1.89,-1.0,1.3,-1.0,1.11,-1.0,1.8,-1.0,0.0,-1.0,-1.0,-1.0,1.57,1.4,-1.0,-1.0,1.36,56.5,34.72,61.47,18.66,7.58,41.96,32.16,30.82,18.19,29.02,38.4,43.63,18.25,10.77,17.19,22.74,25.84,16.2,9.56,15.26,20.19,22.94
2490,Scotland2,Falkirk,Ayr Utd,1.21,1.89,66.58,1-7,3.1,67.79,68.47,69.52,68.92,2.5,6.33,54.44,0.15,0.46,11.99,0-4,0.61,12.14,12.45,12.57,0.24,54.5,69.6,31.37,69.08,55.93,2.46,Falkirk,Ayr,1.85,3.6,4.0,1.22,1.29,1.83,2.4,2.2,4.33,1.2,1.62,1.53,1.73,2.0,1.25,3.75,1.85,2.0,3.0,1.36,5.5,1.13,1.36,3.0,2.75,1.4,2024-10-05 00:00:00,Scotland2,2 - 0,(1-0),2.0,0.0,2.0,1.0,0.0,1.0,1,0,0,1,1,0,1,0,0,1,0,1,0,1,0,1,1,0,0,1,1,0,1,0,0,1,-1.0,-1.0,0.0,-1.0,-1.0,0.0,1.25,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,0.0,-1.0,0.0,0.0,-1.0,45.56,30.48,31.08,97.5,93.67,87.43,99.76,0.84,0.83,0.38,0.03,0.08,1.31,1.3,0.59,0.05,0.12,46.29,45.89,20.69,1.66,4.21
2491,Scotland2,Queens Park,Hamilton,0.69,1.15,56.34,1-8,1.84,57.03,57.49,58.1,57.79,1.38,3.78,45.44,0.07,0.26,8.59,0-4,0.33,8.66,8.85,8.9,0.15,45.47,58.15,26.13,57.89,46.75,1.29,Queen's Park,Hamilton,2.15,3.3,3.4,1.3,1.33,1.67,2.63,2.25,3.5,1.29,1.57,1.44,1.57,2.25,1.2,4.33,1.73,2.08,2.63,1.44,4.5,1.17,1.33,3.25,2.63,1.44,2024-10-05 00:00:00,Scotland2,1 - 0,(0-0),1.0,0.0,1.0,0.0,0.0,0.0,1,0,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,-1.0,-1.0,0.0,-1.0,-1.0,0.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,-1.0,2.25,-1.0,-1.0,0.0,-1.0,-1.0,1.44,-1.0,3.25,0.0,-1.0,54.56,41.9,42.21,98.62,96.22,91.1,99.85,0.4,0.4,0.29,0.01,0.03,0.67,0.66,0.49,0.02,0.04,32.73,32.56,23.78,0.78,2.13
2492,Scotland3,Inverness,Stenhousemuir,0.64,1.27,69.37,1-7,1.91,70.01,70.64,71.04,70.15,3.6,8.67,47.88,0.11,0.42,14.77,0-4,0.53,14.88,15.19,15.25,0.38,47.94,71.21,21.88,70.63,63.95,2.7,Inverness,Stenhousemuir,1.95,3.5,3.25,1.25,1.22,1.7,2.6,2.1,4.0,1.22,1.67,1.5,1.83,1.83,1.3,3.4,2.0,1.85,3.4,1.3,6.5,1.1,1.4,2.75,3.0,1.36,2024-09-28 00:00:00,Scotland3,0 - 0,(0-0),0.0,0.0,0.0,0.0,0.0,0.0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,-1.0,-1.0,0.0,-1.0,-1.0,1.7,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,-1.0,1.83,-1.0,-1.0,0.0,-1.0,-1.0,1.5,-1.0,2.75,0.0,-1.0,52.12,28.96,29.85,96.4,91.33,84.75,99.62,0.45,0.45,0.19,0.02,0.06,0.9,0.89,0.38,0.05,0.11,49.28,48.66,20.71,2.5,6.01
2493,Scotland3,Dumbarton,Montrose,0.37,0.74,55.74,1-8,1.11,56.11,56.48,56.76,56.4,1.61,4.31,40.02,0.05,0.21,9.83,0-4,0.26,9.88,10.04,10.07,0.2,40.04,56.82,19.55,56.59,49.77,1.2,Dumbarton,Montrose,2.3,3.3,2.6,1.4,1.25,1.5,3.0,2.1,3.25,1.33,1.67,1.36,1.73,2.0,1.29,3.5,1.9,1.95,3.25,1.33,5.5,1.13,1.4,2.75,2.75,1.4,2024-09-21 00:00:00,Scotland3,0 - 1,(0-1),0.0,1.0,1.0,0.0,1.0,1.0,0,0,1,0,1,1,0,1,0,1,0,1,0,1,0,1,0,0,1,0,1,1,1,0,0,1,-1.0,-1.0,2.6,-1.0,-1.0,1.5,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,-1.0,2.0,-1.0,-1.0,3.25,-1.0,-1.0,1.36,-1.0,0.0,0.0,-1.0,59.98,43.24,43.6,98.39,95.69,89.93,99.8,0.21,0.21,0.16,0.01,0.02,0.42,0.42,0.32,0.01,0.03,31.64,31.44,24.3,0.9,2.4
