# Reading Model Predictions and Bet365 Odds

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import warnings

warnings.filterwarnings('ignore')
pd.options.mode.chained_assignment = None
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

predictions = pd.read_excel('C:/Users/99451/Desktop/MODEL/2025/dixon_coles_model_predictions/_predictions.xlsx')
bet365_odds = pd.read_excel('C:/Users/99451/Desktop/MODEL/2025/dixon_coles_model_predictions/final_odds.xlsx')
bet365_odds.tail()

Unnamed: 0,Home,Away,FT1,FTX,FT2,DC1X,DC12,DCX2,HT1,HTX,HT2,HT1X,HT12,HTX2,BTTS,OTTS,1.5O,1.5U,2.5O,2.5U,3.5O,3.5U,4.5O,4.5U,HT0.5O,HT0.5U,HT1.5O,HT1.5U
2192,Benfica,FC Porto,2.1,3.3,3.5,1.29,1.33,1.7,2.75,2.2,4.0,1.22,1.62,1.4,1.75,2.0,1.29,3.5,1.98,1.88,3.25,1.33,6.5,1.11,1.4,2.75,2.75,1.4
2193,Sport Recife,Chapecoense-SC,1.38,4.5,8.5,1.08,1.18,3.0,1.91,2.3,8.0,1.06,1.57,1.8,2.25,1.57,1.29,3.5,1.98,1.88,3.4,1.3,6.5,1.11,1.4,2.75,2.75,1.4
2194,Avai,Mirassol,3.0,2.75,2.75,1.4,1.4,1.36,3.75,1.91,3.5,1.29,1.8,1.25,2.2,1.62,1.53,2.38,2.7,1.44,5.5,1.14,13.0,1.04,1.62,2.2,4.0,1.22
2195,Godoy Cruz,Talleres Cordoba,2.88,3.1,2.63,1.44,1.36,1.4,3.6,1.91,3.5,1.29,1.8,1.29,2.1,1.67,1.5,2.5,2.6,1.48,5.5,1.14,13.0,1.04,1.57,2.25,3.75,1.25
2196,Racing Club,Ind. Rivadavia,1.53,3.8,7.5,1.1,1.25,2.38,2.1,2.1,7.0,1.08,1.67,1.67,2.25,1.57,1.4,2.75,2.25,1.62,4.0,1.22,9.0,1.07,1.44,2.63,3.25,1.33


# Merging 2 DataFrames for similarity of values

In [2]:
from fuzzywuzzy import fuzz
from fuzzywuzzy import process

# Function for fuzzy matching on both columns
def fuzzy_merge_on_two_columns(df1, df2, key1_home, key1_away, key2_home, key2_away, threshold=80):
    """
    Merge two DataFrames based on fuzzy matching of both Home and Away columns.
    - df1, df2: DataFrames to merge
    - key1_home, key1_away: column names for 'Home' and 'Away' in df1
    - key2_home, key2_away: column names for 'Home' and 'Away' in df2
    - threshold: minimum similarity score for a match
    """
    matches = []
    
    for i, row1 in df1.iterrows():
        home_team1, away_team1 = row1[key1_home], row1[key1_away]
        
        # Find the best match in df2 for both Home and Away teams
        best_match = None
        best_score = 0
        
        for j, row2 in df2.iterrows():
            home_team2, away_team2 = row2[key2_home], row2[key2_away]
            
            # Calculate similarity for both Home and Away columns
            home_score = fuzz.ratio(home_team1, home_team2)
            away_score = fuzz.ratio(away_team1, away_team2)
            
            # Average similarity score for the pair
            avg_score = (home_score + away_score) / 2
            
            # Check if this is the best match
            if avg_score >= threshold and avg_score > best_score:
                best_match = j
                best_score = avg_score
        
        # If a match was found above the threshold, save the indices
        if best_match is not None:
            matches.append((i, best_match))

    # Create matched DataFrames based on indices
    matched_df1 = df1.loc[[i for i, _ in matches]].reset_index(drop=True)
    matched_df2 = df2.loc[[j for _, j in matches]].reset_index(drop=True)
    
    # Concatenate the matched data side by side
    return pd.concat([matched_df1, matched_df2], axis=1, keys=["df1", "df2"])

# Use the function to merge
merged_df = fuzzy_merge_on_two_columns(predictions, bet365_odds, 'Home', 'Away', 'Home', 'Away', threshold=80)
merged_df.tail()

Unnamed: 0_level_0,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2
Unnamed: 0_level_1,League,Home,Away,FT1,FTX,FT2,FTR,DC1X,DC12,DCX2,1.5O,2.5O,3.5U,4.5U,BTTS,HT1,HTX,HT2,HTR,HTDC1X,HTDC12,HTDCX2,HT0.5O,HT1.5U,H0.5O,A0.5O,H1.5O,A1.5O,H2.5U,A2.5U,Home,Away,FT1,FTX,FT2,DC1X,DC12,DCX2,HT1,HTX,HT2,HT1X,HT12,HTX2,BTTS,OTTS,1.5O,1.5U,2.5O,2.5U,3.5O,3.5U,4.5O,4.5U,HT0.5O,HT0.5U,HT1.5O,HT1.5U
1835,Switzerland,Grasshopper,St. Gallen,26.23,21.59,52.16,0-1,47.82,78.39,73.75,77.1,54.86,67.48,83.55,55.05,20.87,35.44,43.48,0-0,56.31,64.35,78.92,75.37,61.65,68.17,82.3,31.74,51.65,89.11,74.88,Grasshoppers,St. Gallen,2.63,3.6,2.45,1.53,1.29,1.44,3.2,2.3,3.0,1.36,1.57,1.36,1.53,2.38,1.18,4.5,1.65,2.2,2.5,1.5,4.5,1.18,1.3,3.4,2.5,1.5
1836,Turkey,Goztepe,Konyaspor,47.68,26.61,25.68,1-1,74.29,73.36,52.29,88.91,67.85,53.71,72.59,69.47,49.32,33.23,16.65,1-0,82.55,65.97,49.88,81.03,45.99,86.71,77.16,59.91,43.45,67.12,81.42,Goztepe,Konyaspor,2.0,3.4,3.75,1.25,1.3,1.8,2.63,2.1,4.33,1.2,1.67,1.44,1.91,1.8,1.33,3.25,2.1,1.7,3.75,1.25,8.0,1.08,1.44,2.63,3.25,1.33
1837,Turkey,Galatasaray,Samsunspor,45.37,31.37,23.26,1-1,76.74,68.63,54.63,79.13,51.57,70.56,85.71,57.4,76.23,18.53,1.62,1-0,94.76,77.85,20.15,81.25,42.86,80.07,67.54,47.92,31.02,78.0,89.52,Galatasaray,Samsunspor,1.4,4.75,7.0,1.1,1.18,2.75,1.91,2.5,6.5,1.1,1.5,1.8,1.8,1.91,1.17,5.0,1.57,2.35,2.38,1.53,4.33,1.2,1.29,3.5,2.25,1.57
1838,Turkey,Basaksehir,Besiktas,15.95,24.16,59.84,1-1,40.11,75.79,84.0,87.98,66.5,55.22,73.9,63.91,2.82,16.5,74.85,0-2,19.32,77.67,91.35,83.6,32.75,68.61,89.59,32.26,66.12,88.76,60.45,Basaksehir,Besiktas,2.88,3.25,2.45,1.53,1.33,1.4,3.4,2.2,3.0,1.36,1.62,1.3,1.67,2.1,1.25,3.75,1.88,1.98,3.0,1.36,6.0,1.13,1.36,3.0,2.75,1.4
1839,Turkey,Fenerbahce,Sivasspor,69.09,20.4,10.37,2-0,89.49,79.46,30.77,88.82,68.64,52.65,71.65,59.89,74.65,20.52,1.94,1-0,95.17,76.59,22.46,80.37,45.76,92.13,62.69,72.32,25.95,52.88,92.06,Fenerbahce,Sivasspor,1.2,7.0,11.0,1.05,1.1,4.33,1.62,2.88,10.0,1.06,1.4,2.2,2.1,1.67,1.14,5.5,1.48,2.6,2.2,1.62,3.75,1.25,1.25,3.75,2.2,1.62


# Scraping SoccerStats For Match Results

In [3]:
final =  pd.DataFrame()
liqa = ''
unique_leagues = predictions['League'].unique().tolist()

for i in unique_leagues:
    URL = "https://www.soccerstats.com/results.asp?league=" + i + "&pmtype=bydate"
    page = requests.get(URL)
    liqa = i
    soup = BeautifulSoup(page.content, "html.parser")
    results = soup.find(id="btable")
    sth = results.find_all("tr", class_="odd")
    sth


    date, league, home, away, ft, ht = [], [], [], [], [],[]
    for i in sth:
        date.append(i.find_all("td", align = 'right')[0].get_text(strip=True))
        league.append(liqa.capitalize())
        home.append(i.find_all("td", align = 'right')[1].get_text(strip=True))
        away.append(i.find("td", align = "left").get_text(strip = True))
        ft.append(i.find_all("td", align = 'center')[0].get_text(strip = True))
        try:
            ht.append(i.find_all("td", align = 'center')[2].get_text(strip = True))
        except IndexError as e:
            ht.append('NA')#print("Last output before error occurred:", i.find_all("td", align = 'center'))

    data = {'Date': date, 'League': league,'Home': home, 'Away': away, 'FT': ft, 'HT': ht}

# Create a DataFrame from the dictionary
    df = pd.DataFrame(data)

# Replace empty strings with NaN
    #next_df = df[(df['Date'] == formatted_date) & (df['HT'] == '')]
    df.replace('', pd.NA, inplace=True)

# Drop rows with NaN values
    df_cleaned = df.dropna()

#For Half-Time Results
    hthg, htag = [], []
    for i in df_cleaned['HT']:
        if i == 'NA':
            hthg.append('NA')
            htag.append('NA')
        elif i == '+' or i == '-':
            hthg.append('NA')
            htag.append('NA')
        else:
            try:
                hthg.append(int(i[1]))
                htag.append(int(i[3]))
            except IndexError as e:
                print("Last output before error occurred:", i)



#For Full-Time Results
    hg, ag, tg = [], [], []
    for i in df_cleaned['FT']:
        if len(i) < 5 or ':' in i:
            hg.append('NA')
            ag.append('NA')
            tg.append('NA')
        else:
            try:
                hghg = int(i.split(' - ')[0])
                hg.append(hghg)
                agag = int(i.split(' - ')[1])
                ag.append(agag)
                tg.append(hghg + agag)
            except:
                print(hghg + agag)

    
    df_cleaned['FTHG'], df_cleaned['FTAG'], df_cleaned['FTTG'] = hg, ag, tg
    df_cleaned['HTHG'], df_cleaned['HTAG'] = hthg, htag
    df_cleaned['HTTG'] = df_cleaned['HTHG'] + df_cleaned['HTAG']
    
    final = pd.concat([final, df_cleaned], ignore_index=True)
    
final = final[final['HT'] != 'NA']

# Example: Fix the 'date' column by removing the weekday and extra punctuation
final['Date'] = final['Date'].str.extract(r'(\d{1,2} \w{3})')  # Extract day and month part
final['Date'] = final['Date'] + ' 2024'  # Append the year

# Convert to datetime format
final['Date'] = pd.to_datetime(final['Date'], format='%d %b %Y', errors='coerce')

# Filter rows before September 6th, 2024
final_filtered = final[final['Date'] >= pd.Timestamp('2024-09-17')]
combined = pd.concat([final_filtered.head(), final_filtered.tail()])

combined

Unnamed: 0,Date,League,Home,Away,FT,HT,FTHG,FTAG,FTTG,HTHG,HTAG,HTTG
49,2024-09-17,Spain,Mallorca,Real Sociedad,1 - 0,(1-0),1,0,1,1,0,1
50,2024-09-18,Spain,Real Betis,Getafe,2 - 1,(0-0),2,1,3,0,0,0
51,2024-09-19,Spain,Leganes,Athletic Bilbao,0 - 2,(0-0),0,2,2,0,0,0
52,2024-09-20,Spain,Alaves,Sevilla FC,2 - 1,(1-0),2,1,3,1,0,1
53,2024-09-21,Spain,Valladolid,Real Sociedad,0 - 0,(0-0),0,0,0,0,0,0
9460,2024-11-03,Portugal2,FC Porto B,Torreense,1 - 1,(1-1),1,1,2,1,1,2
9461,2024-11-03,Portugal2,Uniao de Leiria,Leixoes,1 - 0,(1-0),1,0,1,1,0,1
9462,2024-11-04,Portugal2,Tondela,Oliveirense,2 - 0,(2-0),2,0,2,2,0,2
9463,2024-11-08,Portugal2,Vizela,Chaves,0 - 1,(0-0),0,1,1,0,0,0
9464,2024-11-09,Portugal2,Penafiel,Felgueiras,2 - 1,(0-1),2,1,3,0,1,1


# Merging with Predictions + Odds Dataframes

In [4]:
# Flatten multi-level columns
merged_df.columns = ['_'.join(col).strip() if isinstance(col, tuple) else col for col in merged_df.columns]

# Rename two columns
merged_df = merged_df.rename(columns={'df1_Home': 'Home', 'df1_Away': 'Away'})

# Merge on multiple columns
final_df = pd.merge(merged_df, final_filtered, on=['Home', 'Away'], how='inner')

# Drop duplicates based on the 'Home' and 'Away' columns
final_df_unique = final_df.drop_duplicates(subset=['Home', 'Away'])

# Reset the index afterward
final_df_unique.reset_index(drop=True, inplace=True)

print('Number of games matched: ', len(final_df_unique))
final_df_unique.tail()

Number of games matched:  1635


Unnamed: 0,df1_League,Home,Away,df1_FT1,df1_FTX,df1_FT2,df1_FTR,df1_DC1X,df1_DC12,df1_DCX2,df1_1.5O,df1_2.5O,df1_3.5U,df1_4.5U,df1_BTTS,df1_HT1,df1_HTX,df1_HT2,df1_HTR,df1_HTDC1X,df1_HTDC12,df1_HTDCX2,df1_HT0.5O,df1_HT1.5U,df1_H0.5O,df1_A0.5O,df1_H1.5O,df1_A1.5O,df1_H2.5U,df1_A2.5U,df2_Home,df2_Away,df2_FT1,df2_FTX,df2_FT2,df2_DC1X,df2_DC12,df2_DCX2,df2_HT1,df2_HTX,df2_HT2,df2_HT1X,df2_HT12,df2_HTX2,df2_BTTS,df2_OTTS,df2_1.5O,df2_1.5U,df2_2.5O,df2_2.5U,df2_3.5O,df2_3.5U,df2_4.5O,df2_4.5U,df2_HT0.5O,df2_HT0.5U,df2_HT1.5O,df2_HT1.5U,Date,League,FT,HT,FTHG,FTAG,FTTG,HTHG,HTAG,HTTG
1630,Mexico,Queretaro,Santos Laguna,41.96,36.02,22.01,0-0,77.98,63.97,58.03,48.95,22.12,91.87,97.53,30.25,17.88,54.65,27.46,0-0,72.53,45.34,82.11,51.18,84.4,62.66,46.88,25.87,13.27,92.24,97.35,Queretaro,Santos Laguna,1.91,3.5,3.9,1.25,1.29,1.83,2.6,2.1,4.33,1.2,1.67,1.5,1.91,1.91,1.29,3.5,2.0,1.85,3.25,1.33,6.0,1.13,1.4,2.75,2.75,1.4,2024-11-09,Mexico,3 - 2,(1-1),3,2,5,1,1,2
1631,Poland,Korona Kielce,Lechia Gdansk,43.0,23.92,33.08,1-1,66.92,76.08,57.0,74.37,50.76,71.3,86.22,53.82,19.06,57.76,23.18,0-0,76.82,42.24,80.94,51.15,80.39,76.78,71.19,42.88,35.34,81.87,86.96,Korona Kielce,Lechia Gdansk,1.9,3.4,4.0,1.22,1.3,1.91,2.5,2.2,4.33,1.2,1.62,1.5,1.75,2.0,1.25,3.75,1.88,1.98,3.25,1.33,6.0,1.13,1.4,2.75,2.75,1.4,2024-11-09,Poland,0 - 0,(0-0),0,0,0,0,0,0
1632,Portugal2,Penafiel,Felgueiras,40.38,31.95,27.67,1-1,72.33,68.05,59.62,76.71,48.76,73.08,87.4,55.88,34.79,40.34,24.77,0-0,75.13,59.56,65.11,71.27,65.12,76.51,69.13,42.48,32.84,82.16,88.48,Penafiel,Felgueiras,2.8,3.25,2.45,1.53,1.33,1.4,3.5,2.1,3.2,1.36,1.67,1.29,1.83,1.83,1.33,3.25,2.08,1.73,3.75,1.25,8.0,1.08,1.44,2.63,3.25,1.33,2024-11-09,Portugal2,2 - 1,(0-1),2,1,3,0,1,1
1633,Spain,Real Madrid,Osasuna,70.65,19.45,9.76,2-0,90.1,80.41,29.21,87.81,67.44,54.06,72.89,57.39,34.51,58.84,6.63,0-0,93.35,41.14,65.47,43.93,88.64,92.09,60.36,72.23,23.73,53.0,93.13,Real Madrid,Osasuna,1.22,6.25,12.0,1.05,1.13,4.0,1.62,2.88,9.0,1.04,1.4,2.2,1.95,1.8,1.14,5.5,1.48,2.6,2.2,1.67,3.75,1.29,1.25,3.75,2.1,1.67,2024-11-09,Spain,4 - 0,(2-0),4,0,4,2,0,2
1634,Turkey,Antalyaspor,Bodrumspor,35.64,39.29,25.07,0-0,74.93,60.71,64.36,59.48,28.84,87.96,95.83,40.23,8.03,52.45,39.48,0-0,60.48,47.51,91.93,52.84,81.47,64.43,56.7,27.66,20.46,91.34,94.71,Antalyaspor,Bodrumspor,2.4,3.0,3.25,1.33,1.36,1.53,3.1,2.0,3.75,1.25,1.73,1.36,1.83,1.83,1.4,2.75,2.25,1.62,4.0,1.22,10.0,1.06,1.5,2.5,3.4,1.3,2024-11-09,Turkey,3 - 2,(3-1),3,2,5,3,1,4


# Creating Results Columns

In [5]:
import numpy as np

# Add the new columns based on the condition
final_df_unique['FT1'] = np.where(final_df_unique['FTHG'] > final_df_unique['FTAG'], 1, 0)
final_df_unique['FTX'] = np.where(final_df_unique['FTHG'] == final_df_unique['FTAG'], 1, 0)
final_df_unique['FT2'] = np.where(final_df_unique['FTHG'] < final_df_unique['FTAG'], 1, 0)

final_df_unique['FT1X'] = np.where(final_df_unique['FTHG'] >= final_df_unique['FTAG'], 1, 0)
final_df_unique['FT12'] = np.where(final_df_unique['FTX'] == 0, 1, 0)
final_df_unique['FTX2'] = np.where(final_df_unique['FTHG'] <= final_df_unique['FTAG'], 1, 0)

final_df_unique['1.5O'] = np.where(final_df_unique['FTTG'] > 1.5, 1, 0)
final_df_unique['1.5U'] = np.where(final_df_unique['FTTG'] < 1.5, 1, 0)
final_df_unique['2.5O'] = np.where(final_df_unique['FTTG'] > 2.5, 1, 0)
final_df_unique['2.5U'] = np.where(final_df_unique['FTTG'] < 2.5, 1, 0)
final_df_unique['3.5O'] = np.where(final_df_unique['FTTG'] > 3.5, 1, 0)
final_df_unique['3.5U'] = np.where(final_df_unique['FTTG'] < 3.5, 1, 0)
final_df_unique['4.5O'] = np.where(final_df_unique['FTTG'] > 4.5, 1, 0)
final_df_unique['4.5U'] = np.where(final_df_unique['FTTG'] < 4.5, 1, 0)

final_df_unique['BTTS'] = np.where((final_df_unique['FTHG'] != 0) & (final_df_unique['FTAG'] != 0), 1, 0)
final_df_unique['OTTS'] = np.where(final_df_unique['BTTS'] == 0, 1, 0)

final_df_unique['HT1'] = np.where(final_df_unique['HTHG'] > final_df_unique['HTAG'], 1, 0)
final_df_unique['HTX'] = np.where(final_df_unique['HTHG'] == final_df_unique['HTAG'], 1, 0)
final_df_unique['HT2'] = np.where(final_df_unique['HTHG'] < final_df_unique['HTAG'], 1, 0)

final_df_unique['HT1X'] = np.where(final_df_unique['HTHG'] >= final_df_unique['HTAG'], 1, 0)
final_df_unique['HT12'] = np.where(final_df_unique['HTX'] == 0, 1, 0)
final_df_unique['HTX2'] = np.where(final_df_unique['HTHG'] <= final_df_unique['HTAG'], 1, 0)

final_df_unique['HT0.5O'] = np.where(final_df_unique['HTTG'] > 0.5, 1, 0)
final_df_unique['HT0.5U'] = np.where(final_df_unique['HTTG'] < 0.5, 1, 0)
final_df_unique['HT1.5O'] = np.where(final_df_unique['HTTG'] > 1.5, 1, 0)
final_df_unique['HT1.5U'] = np.where(final_df_unique['HTTG'] < 1.5, 1, 0)

print('Games Found: ', len(final_df_unique))
final_df_unique.tail()

Games Found:  1635


Unnamed: 0,df1_League,Home,Away,df1_FT1,df1_FTX,df1_FT2,df1_FTR,df1_DC1X,df1_DC12,df1_DCX2,df1_1.5O,df1_2.5O,df1_3.5U,df1_4.5U,df1_BTTS,df1_HT1,df1_HTX,df1_HT2,df1_HTR,df1_HTDC1X,df1_HTDC12,df1_HTDCX2,df1_HT0.5O,df1_HT1.5U,df1_H0.5O,df1_A0.5O,df1_H1.5O,df1_A1.5O,df1_H2.5U,df1_A2.5U,df2_Home,df2_Away,df2_FT1,df2_FTX,df2_FT2,df2_DC1X,df2_DC12,df2_DCX2,df2_HT1,df2_HTX,df2_HT2,df2_HT1X,df2_HT12,df2_HTX2,df2_BTTS,df2_OTTS,df2_1.5O,df2_1.5U,df2_2.5O,df2_2.5U,df2_3.5O,df2_3.5U,df2_4.5O,df2_4.5U,df2_HT0.5O,df2_HT0.5U,df2_HT1.5O,df2_HT1.5U,Date,League,FT,HT,FTHG,FTAG,FTTG,HTHG,HTAG,HTTG,FT1,FTX,FT2,FT1X,FT12,FTX2,1.5O,1.5U,2.5O,2.5U,3.5O,3.5U,4.5O,4.5U,BTTS,OTTS,HT1,HTX,HT2,HT1X,HT12,HTX2,HT0.5O,HT0.5U,HT1.5O,HT1.5U
1630,Mexico,Queretaro,Santos Laguna,41.96,36.02,22.01,0-0,77.98,63.97,58.03,48.95,22.12,91.87,97.53,30.25,17.88,54.65,27.46,0-0,72.53,45.34,82.11,51.18,84.4,62.66,46.88,25.87,13.27,92.24,97.35,Queretaro,Santos Laguna,1.91,3.5,3.9,1.25,1.29,1.83,2.6,2.1,4.33,1.2,1.67,1.5,1.91,1.91,1.29,3.5,2.0,1.85,3.25,1.33,6.0,1.13,1.4,2.75,2.75,1.4,2024-11-09,Mexico,3 - 2,(1-1),3,2,5,1,1,2,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,0
1631,Poland,Korona Kielce,Lechia Gdansk,43.0,23.92,33.08,1-1,66.92,76.08,57.0,74.37,50.76,71.3,86.22,53.82,19.06,57.76,23.18,0-0,76.82,42.24,80.94,51.15,80.39,76.78,71.19,42.88,35.34,81.87,86.96,Korona Kielce,Lechia Gdansk,1.9,3.4,4.0,1.22,1.3,1.91,2.5,2.2,4.33,1.2,1.62,1.5,1.75,2.0,1.25,3.75,1.88,1.98,3.25,1.33,6.0,1.13,1.4,2.75,2.75,1.4,2024-11-09,Poland,0 - 0,(0-0),0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1
1632,Portugal2,Penafiel,Felgueiras,40.38,31.95,27.67,1-1,72.33,68.05,59.62,76.71,48.76,73.08,87.4,55.88,34.79,40.34,24.77,0-0,75.13,59.56,65.11,71.27,65.12,76.51,69.13,42.48,32.84,82.16,88.48,Penafiel,Felgueiras,2.8,3.25,2.45,1.53,1.33,1.4,3.5,2.1,3.2,1.36,1.67,1.29,1.83,1.83,1.33,3.25,2.08,1.73,3.75,1.25,8.0,1.08,1.44,2.63,3.25,1.33,2024-11-09,Portugal2,2 - 1,(0-1),2,1,3,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1
1633,Spain,Real Madrid,Osasuna,70.65,19.45,9.76,2-0,90.1,80.41,29.21,87.81,67.44,54.06,72.89,57.39,34.51,58.84,6.63,0-0,93.35,41.14,65.47,43.93,88.64,92.09,60.36,72.23,23.73,53.0,93.13,Real Madrid,Osasuna,1.22,6.25,12.0,1.05,1.13,4.0,1.62,2.88,9.0,1.04,1.4,2.2,1.95,1.8,1.14,5.5,1.48,2.6,2.2,1.67,3.75,1.29,1.25,3.75,2.1,1.67,2024-11-09,Spain,4 - 0,(2-0),4,0,4,2,0,2,1,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0
1634,Turkey,Antalyaspor,Bodrumspor,35.64,39.29,25.07,0-0,74.93,60.71,64.36,59.48,28.84,87.96,95.83,40.23,8.03,52.45,39.48,0-0,60.48,47.51,91.93,52.84,81.47,64.43,56.7,27.66,20.46,91.34,94.71,Antalyaspor,Bodrumspor,2.4,3.0,3.25,1.33,1.36,1.53,3.1,2.0,3.75,1.25,1.73,1.36,1.83,1.83,1.4,2.75,2.25,1.62,4.0,1.22,10.0,1.06,1.5,2.5,3.4,1.3,2024-11-09,Turkey,3 - 2,(3-1),3,2,5,3,1,4,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0


# Creating Profit Columns for Initial Model Predictions

In [6]:
ft1p, ftxp, ft2p, ft1xp, ft12p, ftx2p = [], [], [], [], [], []
over15, under15, over25, under25, over35, under35, over45, under45 = [], [], [], [], [], [], [], []
btts, otts = [], []

for i in range(len(final_df_unique)):
    ft_list = [final_df_unique['df1_FT1'].iloc[i], final_df_unique['df1_FTX'].iloc[i], final_df_unique['df1_FT2'].iloc[i]]
    if (ft_list.index(max(ft_list)) == 0) and (final_df_unique['FT1'].iloc[i] == 1):
        ft1p.append(final_df_unique['df2_FT1'].iloc[i])
    elif (ft_list.index(max(ft_list)) == 0) and (final_df_unique['FT1'].iloc[i] != 1):
        ft1p.append(-1)
    else:
        ft1p.append(0)
    
    if (ft_list.index(max(ft_list)) == 1) and (final_df_unique['FTX'].iloc[i] == 1):
        ftxp.append(final_df_unique['df2_FTX'].iloc[i])
    elif (ft_list.index(max(ft_list)) == 1) and (final_df_unique['FTX'].iloc[i] != 1):
        ftxp.append(-1)
    else:
        ftxp.append(0)
    
    if (ft_list.index(max(ft_list)) == 2) and (final_df_unique['FT2'].iloc[i] == 1):
        ft2p.append(final_df_unique['df2_FT2'].iloc[i])
    elif (ft_list.index(max(ft_list)) == 2) and (final_df_unique['FT2'].iloc[i] != 1):
        ft2p.append(-1)
    else:
        ft2p.append(0)

    dc_list = [final_df_unique['df1_DC1X'].iloc[i], final_df_unique['df1_DC12'].iloc[i], final_df_unique['df1_DCX2'].iloc[i]]
    if (dc_list.index(max(dc_list)) == 0) and (final_df_unique['FT1X'].iloc[i] == 1):
        ft1xp.append(final_df_unique['df2_DC1X'].iloc[i])
    elif (dc_list.index(max(dc_list)) == 0) and (final_df_unique['FT1X'].iloc[i] != 1):
        ft1xp.append(-1)
    else:
        ft1xp.append(0)
    
    if (dc_list.index(max(dc_list)) == 1) and (final_df_unique['FT12'].iloc[i] == 1):
        ft12p.append(final_df_unique['df2_DC12'].iloc[i])
    elif (dc_list.index(max(dc_list)) == 1) and (final_df_unique['FT12'].iloc[i] != 1):
        ft12p.append(-1)
    else:
        ft12p.append(0)
    
    if (dc_list.index(max(dc_list)) == 2) and (final_df_unique['FTX2'].iloc[i] == 1):
        ftx2p.append(final_df_unique['df2_DCX2'].iloc[i])
    elif (dc_list.index(max(dc_list)) == 2) and (final_df_unique['FTX2'].iloc[i] != 1):
        ftx2p.append(-1)
    else:
        ftx2p.append(0)
    
    if (final_df_unique['df1_1.5O'].iloc[i] >= 50) and (final_df_unique['1.5O'].iloc[i] == 1):
        over15.append(final_df_unique['df2_1.5O'].iloc[i])
    elif (final_df_unique['df1_1.5O'].iloc[i] >= 50) and (final_df_unique['1.5O'].iloc[i] != 1):
        over15.append(-1)
    else:
        over15.append(0)
    
    if (final_df_unique['df1_1.5O'].iloc[i] < 50) and (final_df_unique['1.5O'].iloc[i] != 1):
        under15.append(final_df_unique['df2_1.5U'].iloc[i])
    elif (final_df_unique['df1_1.5O'].iloc[i] < 50) and (final_df_unique['1.5O'].iloc[i] == 1):
        under15.append(-1)
    else:
        under15.append(0)
    
    if (final_df_unique['df1_2.5O'].iloc[i] >= 50) and (final_df_unique['2.5O'].iloc[i] == 1):
        over25.append(final_df_unique['df2_2.5O'].iloc[i])
    elif (final_df_unique['df1_2.5O'].iloc[i] >= 50) and (final_df_unique['2.5O'].iloc[i] != 1):
        over25.append(-1)
    else:
        over25.append(0)
    
    if (final_df_unique['df1_2.5O'].iloc[i] < 50) and (final_df_unique['2.5O'].iloc[i] != 1):
        under25.append(final_df_unique['df2_2.5U'].iloc[i])
    elif (final_df_unique['df1_2.5O'].iloc[i] < 50) and (final_df_unique['2.5O'].iloc[i] == 1):
        under25.append(-1)
    else:
        under25.append(0)
    
    if (final_df_unique['df1_3.5U'].iloc[i] <= 50) and (final_df_unique['3.5O'].iloc[i] == 1):
        over35.append(final_df_unique['df2_3.5O'].iloc[i])
    elif (final_df_unique['df1_3.5U'].iloc[i] <= 50) and (final_df_unique['3.5O'].iloc[i] != 1):
        over35.append(-1)
    else:
        over35.append(0)
    
    if (final_df_unique['df1_3.5U'].iloc[i] > 50) and (final_df_unique['3.5O'].iloc[i] != 1):
        under35.append(final_df_unique['df2_3.5U'].iloc[i])
    elif (final_df_unique['df1_3.5U'].iloc[i] > 50) and (final_df_unique['3.5O'].iloc[i] == 1):
        under35.append(-1)
    else:
        under35.append(0)
    
    if (final_df_unique['df1_4.5U'].iloc[i] <= 50) and (final_df_unique['4.5O'].iloc[i] == 1):
        over45.append(final_df_unique['df2_4.5O'].iloc[i])
    elif (final_df_unique['df1_4.5U'].iloc[i] <= 50) and (final_df_unique['4.5O'].iloc[i] != 1):
        over45.append(-1)
    else:
        over45.append(0)
    
    if (final_df_unique['df1_4.5U'].iloc[i] > 50) and (final_df_unique['4.5O'].iloc[i] != 1):
        under45.append(final_df_unique['df2_4.5U'].iloc[i])
    elif (final_df_unique['df1_4.5U'].iloc[i] > 50) and (final_df_unique['4.5O'].iloc[i] == 1):
        under45.append(-1)
    else:
        under45.append(0)
    
    if (final_df_unique['df1_BTTS'].iloc[i] >= 50) and (final_df_unique['BTTS'].iloc[i] == 1):
        btts.append(final_df_unique['df2_BTTS'].iloc[i])
    elif (final_df_unique['df1_BTTS'].iloc[i] >= 50) and (final_df_unique['BTTS'].iloc[i] != 1):
        btts.append(-1)
    else:
        btts.append(0)
    
    if (final_df_unique['df1_BTTS'].iloc[i] < 50) and (final_df_unique['BTTS'].iloc[i] != 1):
        otts.append(final_df_unique['df2_OTTS'].iloc[i])
    elif (final_df_unique['df1_BTTS'].iloc[i] < 50) and (final_df_unique['BTTS'].iloc[i] == 1):
        otts.append(-1)
    else:
        otts.append(0)

final_df_unique['FT1P'], final_df_unique['FTXP'], final_df_unique['FT2P'] = ft1p, ftxp, ft2p
final_df_unique['FT1XP'], final_df_unique['FT12P'], final_df_unique['FTX2P'] = ft1xp, ft12p, ftx2p
final_df_unique['1.5OP'], final_df_unique['1.5UP'], final_df_unique['2.5OP'], final_df_unique['2.5UP'] = over15, under15, over25, under25
final_df_unique['3.5OP'], final_df_unique['3.5UP'], final_df_unique['4.5OP'], final_df_unique['4.5UP'] = over35, under35, over45, under45
final_df_unique['BTTSP'], final_df_unique['OTTSP'] = btts, otts

print('Games Found: ', len(final_df_unique))
final_df_unique.tail()

Games Found:  1635


Unnamed: 0,df1_League,Home,Away,df1_FT1,df1_FTX,df1_FT2,df1_FTR,df1_DC1X,df1_DC12,df1_DCX2,df1_1.5O,df1_2.5O,df1_3.5U,df1_4.5U,df1_BTTS,df1_HT1,df1_HTX,df1_HT2,df1_HTR,df1_HTDC1X,df1_HTDC12,df1_HTDCX2,df1_HT0.5O,df1_HT1.5U,df1_H0.5O,df1_A0.5O,df1_H1.5O,df1_A1.5O,df1_H2.5U,df1_A2.5U,df2_Home,df2_Away,df2_FT1,df2_FTX,df2_FT2,df2_DC1X,df2_DC12,df2_DCX2,df2_HT1,df2_HTX,df2_HT2,df2_HT1X,df2_HT12,df2_HTX2,df2_BTTS,df2_OTTS,df2_1.5O,df2_1.5U,df2_2.5O,df2_2.5U,df2_3.5O,df2_3.5U,df2_4.5O,df2_4.5U,df2_HT0.5O,df2_HT0.5U,df2_HT1.5O,df2_HT1.5U,Date,League,FT,HT,FTHG,FTAG,FTTG,HTHG,HTAG,HTTG,FT1,FTX,FT2,FT1X,FT12,FTX2,1.5O,1.5U,2.5O,2.5U,3.5O,3.5U,4.5O,4.5U,BTTS,OTTS,HT1,HTX,HT2,HT1X,HT12,HTX2,HT0.5O,HT0.5U,HT1.5O,HT1.5U,FT1P,FTXP,FT2P,FT1XP,FT12P,FTX2P,1.5OP,1.5UP,2.5OP,2.5UP,3.5OP,3.5UP,4.5OP,4.5UP,BTTSP,OTTSP
1630,Mexico,Queretaro,Santos Laguna,41.96,36.02,22.01,0-0,77.98,63.97,58.03,48.95,22.12,91.87,97.53,30.25,17.88,54.65,27.46,0-0,72.53,45.34,82.11,51.18,84.4,62.66,46.88,25.87,13.27,92.24,97.35,Queretaro,Santos Laguna,1.91,3.5,3.9,1.25,1.29,1.83,2.6,2.1,4.33,1.2,1.67,1.5,1.91,1.91,1.29,3.5,2.0,1.85,3.25,1.33,6.0,1.13,1.4,2.75,2.75,1.4,2024-11-09,Mexico,3 - 2,(1-1),3,2,5,1,1,2,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,0,0,1,0,1,0,1,1,0,1,0,1.91,0.0,0.0,1.25,0.0,0.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0
1631,Poland,Korona Kielce,Lechia Gdansk,43.0,23.92,33.08,1-1,66.92,76.08,57.0,74.37,50.76,71.3,86.22,53.82,19.06,57.76,23.18,0-0,76.82,42.24,80.94,51.15,80.39,76.78,71.19,42.88,35.34,81.87,86.96,Korona Kielce,Lechia Gdansk,1.9,3.4,4.0,1.22,1.3,1.91,2.5,2.2,4.33,1.2,1.62,1.5,1.75,2.0,1.25,3.75,1.88,1.98,3.25,1.33,6.0,1.13,1.4,2.75,2.75,1.4,2024-11-09,Poland,0 - 0,(0-0),0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,-1.0,0.0,0.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,0.0,1.33,0.0,1.13,-1.0,0.0
1632,Portugal2,Penafiel,Felgueiras,40.38,31.95,27.67,1-1,72.33,68.05,59.62,76.71,48.76,73.08,87.4,55.88,34.79,40.34,24.77,0-0,75.13,59.56,65.11,71.27,65.12,76.51,69.13,42.48,32.84,82.16,88.48,Penafiel,Felgueiras,2.8,3.25,2.45,1.53,1.33,1.4,3.5,2.1,3.2,1.36,1.67,1.29,1.83,1.83,1.33,3.25,2.08,1.73,3.75,1.25,8.0,1.08,1.44,2.63,3.25,1.33,2024-11-09,Portugal2,2 - 1,(0-1),2,1,3,0,1,1,1,0,0,1,1,0,1,0,1,0,0,1,0,1,1,0,0,0,1,0,1,1,1,0,0,1,2.8,0.0,0.0,1.53,0.0,0.0,1.33,0.0,0.0,-1.0,0.0,1.25,0.0,1.08,1.83,0.0
1633,Spain,Real Madrid,Osasuna,70.65,19.45,9.76,2-0,90.1,80.41,29.21,87.81,67.44,54.06,72.89,57.39,34.51,58.84,6.63,0-0,93.35,41.14,65.47,43.93,88.64,92.09,60.36,72.23,23.73,53.0,93.13,Real Madrid,Osasuna,1.22,6.25,12.0,1.05,1.13,4.0,1.62,2.88,9.0,1.04,1.4,2.2,1.95,1.8,1.14,5.5,1.48,2.6,2.2,1.67,3.75,1.29,1.25,3.75,2.1,1.67,2024-11-09,Spain,4 - 0,(2-0),4,0,4,2,0,2,1,0,0,1,1,0,1,0,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1,0,1,0,1.22,0.0,0.0,1.05,0.0,0.0,1.14,0.0,1.48,0.0,0.0,-1.0,0.0,1.29,-1.0,0.0
1634,Turkey,Antalyaspor,Bodrumspor,35.64,39.29,25.07,0-0,74.93,60.71,64.36,59.48,28.84,87.96,95.83,40.23,8.03,52.45,39.48,0-0,60.48,47.51,91.93,52.84,81.47,64.43,56.7,27.66,20.46,91.34,94.71,Antalyaspor,Bodrumspor,2.4,3.0,3.25,1.33,1.36,1.53,3.1,2.0,3.75,1.25,1.73,1.36,1.83,1.83,1.4,2.75,2.25,1.62,4.0,1.22,10.0,1.06,1.5,2.5,3.4,1.3,2024-11-09,Turkey,3 - 2,(3-1),3,2,5,3,1,4,1,0,0,1,1,0,1,0,1,0,1,0,1,0,1,0,1,0,0,1,1,0,1,0,1,0,0.0,-1.0,0.0,1.33,0.0,0.0,1.4,0.0,0.0,-1.0,0.0,-1.0,0.0,-1.0,0.0,-1.0


In [7]:
(np.sum(final_df_unique['OTTSP']) - len(final_df_unique[final_df_unique['OTTSP'] != 0])) / len(final_df_unique[final_df_unique['OTTSP'] != 0])

-0.5927808069792802