# Reading Model Predictions and Bet365 Odds

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import warnings

warnings.filterwarnings('ignore')
pd.options.mode.chained_assignment = None
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

csv_url = "https://docs.google.com/spreadsheets/d/1WfEG-1icUjj6k7TGePJQEXH-w0TLEIcN/export?format=csv"
uefa = pd.read_csv(csv_url, dtype={'FTHG': 'Int64', 'FTAG': 'Int64', 'HTHG': 'Int64', 'HTAG': 'Int64'})
uefa['FT'] = uefa['FTHG'].astype(str) + ' - ' + uefa['FTAG'].astype(str)
uefa['HT'] = '(' + uefa['HTHG'].astype(str) + '-' + uefa['HTAG'].astype(str) + ')'
uefa['FTTG'] = uefa['FTHG'] + uefa['FTAG']
uefa['HTTG'] = uefa['HTHG'] + uefa['HTAG']

predictions = pd.read_excel('C:/Users/99451/Desktop/MODEL/2025/dixon_coles_model_predictions/_predictions.xlsx')
bet365_odds = pd.read_excel('C:/Users/99451/Desktop/MODEL/2025/dixon_coles_model_predictions/final_odds.xlsx')
bet365_odds.tail()

Unnamed: 0,Home,Away,FT1,FTX,FT2,DC1X,DC12,DCX2,HT1,HTX,HT2,HT1X,HT12,HTX2,BTTS,OTTS,1.5O,1.5U,2.5O,2.5U,3.5O,3.5U,4.5O,4.5U,HT0.5O,HT0.5U,HT1.5O,HT1.5U
2280,Cheltenham,Tranmere,2.4,3.4,2.75,1.4,1.3,1.53,3.2,2.1,3.5,1.29,1.67,1.36,1.8,1.91,1.33,3.25,2.05,1.75,3.75,1.25,8.0,1.08,1.44,2.63,3.25,1.33
2281,Queen's Park,Falkirk,4.2,4.0,1.67,2.1,1.22,1.22,4.75,2.25,2.2,1.62,1.57,1.17,1.8,1.91,1.22,4.0,1.73,2.08,2.75,1.4,5.0,1.14,1.33,3.25,2.63,1.44
2282,Plymouth,Watford,3.0,3.5,2.25,1.62,1.29,1.36,3.4,2.3,2.88,1.4,1.57,1.3,1.57,2.25,1.2,4.5,1.67,2.2,2.63,1.5,4.5,1.2,1.33,3.25,2.5,1.5
2283,PSG,Toulouse,1.33,5.75,7.0,1.1,1.14,3.25,1.8,2.75,7.0,1.1,1.4,1.91,1.75,2.0,1.14,5.5,1.5,2.63,2.2,1.67,3.75,1.29,1.25,3.75,2.1,1.67
2284,Getafe,Valladolid,1.73,3.3,5.75,1.14,1.33,2.05,2.5,1.91,6.5,1.1,1.8,1.5,2.63,1.44,1.62,2.3,2.75,1.44,5.5,1.14,13.0,1.04,1.62,2.2,4.0,1.22


# Merging 2 DataFrames for similarity of values

In [2]:
from fuzzywuzzy import fuzz
from fuzzywuzzy import process

# Function for fuzzy matching on both columns
def fuzzy_merge_on_two_columns(df1, df2, key1_home, key1_away, key2_home, key2_away, threshold=80):
    """
    Merge two DataFrames based on fuzzy matching of both Home and Away columns.
    - df1, df2: DataFrames to merge
    - key1_home, key1_away: column names for 'Home' and 'Away' in df1
    - key2_home, key2_away: column names for 'Home' and 'Away' in df2
    - threshold: minimum similarity score for a match
    """
    matches = []
    
    for i, row1 in df1.iterrows():
        home_team1, away_team1 = row1[key1_home], row1[key1_away]
        
        # Find the best match in df2 for both Home and Away teams
        best_match = None
        best_score = 0
        
        for j, row2 in df2.iterrows():
            home_team2, away_team2 = row2[key2_home], row2[key2_away]
            
            # Calculate similarity for both Home and Away columns
            home_score = fuzz.ratio(home_team1, home_team2)
            away_score = fuzz.ratio(away_team1, away_team2)
            
            # Average similarity score for the pair
            avg_score = (home_score + away_score) / 2
            
            # Check if this is the best match
            if avg_score >= threshold and avg_score > best_score:
                best_match = j
                best_score = avg_score
        
        # If a match was found above the threshold, save the indices
        if best_match is not None:
            matches.append((i, best_match))

    # Create matched DataFrames based on indices
    matched_df1 = df1.loc[[i for i, _ in matches]].reset_index(drop=True)
    matched_df2 = df2.loc[[j for _, j in matches]].reset_index(drop=True)
    
    # Concatenate the matched data side by side
    return pd.concat([matched_df1, matched_df2], axis=1, keys=["df1", "df2"])

# Use the function to merge
merged_df = fuzzy_merge_on_two_columns(predictions, bet365_odds, 'Home', 'Away', 'Home', 'Away', threshold=80)
merged_df.tail()

Unnamed: 0_level_0,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df1,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2,df2
Unnamed: 0_level_1,League,Home,Away,FT1,FTX,FT2,FTR,DC1X,DC12,DCX2,1.5O,2.5O,3.5U,4.5U,BTTS,HT1,HTX,HT2,HTR,HTDC1X,HTDC12,HTDCX2,HT0.5O,HT1.5U,H0.5O,A0.5O,H1.5O,A1.5O,H2.5U,A2.5U,Home,Away,FT1,FTX,FT2,DC1X,DC12,DCX2,HT1,HTX,HT2,HT1X,HT12,HTX2,BTTS,OTTS,1.5O,1.5U,2.5O,2.5U,3.5O,3.5U,4.5O,4.5U,HT0.5O,HT0.5U,HT1.5O,HT1.5U
1914,Italy2,Cosenza,Modena,40.98,30.13,28.89,1-1,71.11,69.87,59.02,72.39,45.0,76.27,89.43,51.57,47.84,37.3,14.44,0-0,85.14,62.28,51.74,75.38,55.27,74.45,67.09,39.59,30.51,84.2,89.81,Cosenza,Modena,2.6,3.0,2.9,1.4,1.36,1.5,3.4,1.91,3.75,1.25,1.8,1.3,2.05,1.7,1.44,2.63,2.5,1.5,5.0,1.17,11.0,1.05,1.57,2.25,3.75,1.25
1915,Poland,Zaglebie Lubin,Motor Lublin,39.26,23.07,37.67,1-1,62.33,76.93,60.74,76.43,53.84,68.45,84.25,56.35,22.53,44.57,32.74,0-0,67.1,55.27,77.31,71.38,57.38,76.17,75.31,41.99,40.78,82.51,83.37,Zaglebie,Motor Lublin,2.05,3.4,3.5,1.29,1.3,1.73,2.75,2.2,4.0,1.22,1.62,1.4,1.67,2.1,1.25,3.75,1.83,2.03,3.0,1.36,6.0,1.13,1.36,3.0,2.75,1.4
1916,Poland,Jagiellonia,Slask Wroclaw,74.56,15.05,10.31,2-0,89.61,84.87,25.36,80.71,59.7,62.59,79.9,46.12,51.53,38.99,9.03,0-0,90.52,60.56,48.02,72.01,58.01,90.56,51.63,68.38,16.52,57.81,96.17,Jagiellonia,Slask Wroclaw,1.8,3.7,4.0,1.22,1.29,1.95,2.4,2.25,4.5,1.17,1.57,1.53,1.75,2.0,1.25,3.75,1.83,2.03,3.0,1.36,6.0,1.13,1.36,3.0,2.63,1.44
1917,Spain,Getafe,Valladolid,45.32,41.02,13.66,0-0,86.34,58.98,54.68,38.18,13.91,95.86,98.99,20.32,16.17,75.6,8.23,0-0,91.77,24.4,83.83,25.84,96.41,59.38,31.78,22.78,5.69,93.7,99.3,Getafe,Valladolid,1.73,3.3,5.75,1.14,1.33,2.05,2.5,1.91,6.5,1.1,1.8,1.5,2.63,1.44,1.62,2.3,2.75,1.44,5.5,1.14,13.0,1.04,1.62,2.2,4.0,1.22
1918,Spain2,Racing Ferrol,Levante,16.33,23.27,60.4,0-1,39.6,76.73,83.67,56.13,31.1,86.52,95.14,29.72,7.32,45.42,47.17,0-0,52.74,54.49,92.59,58.6,78.65,42.0,75.58,10.41,41.15,98.2,83.11,Racing Club Ferrol,Levante,3.2,3.0,2.38,1.53,1.36,1.36,4.0,1.95,3.2,1.36,1.8,1.22,2.0,1.73,1.44,2.63,2.4,1.53,4.5,1.18,11.0,1.05,1.53,2.38,3.75,1.25


# Scraping SoccerStats For Match Results

In [6]:
final =  pd.DataFrame()
liqa = ''
unique_leagues = predictions['League'].unique().tolist()

# Convert to lowercase and remove 'UNL'
unique_leagues = [league.lower() for league in unique_leagues if league.lower() != 'unl']

for i in unique_leagues:
    URL = "https://www.soccerstats.com/results.asp?league=" + i + "&pmtype=bydate"
    page = requests.get(URL)
    liqa = i
    soup = BeautifulSoup(page.content, "html.parser")
    results = soup.find(id="btable")
    sth = results.find_all("tr", class_="odd")
    sth


    date, league, home, away, ft, ht = [], [], [], [], [],[]
    for i in sth:
        date.append(i.find_all("td", align = 'right')[0].get_text(strip=True))
        league.append(liqa.capitalize())
        home.append(i.find_all("td", align = 'right')[1].get_text(strip=True))
        away.append(i.find("td", align = "left").get_text(strip = True))
        ft.append(i.find_all("td", align = 'center')[0].get_text(strip = True))
        try:
            ht.append(i.find_all("td", align = 'center')[2].get_text(strip = True))
        except IndexError as e:
            ht.append('NA')#print("Last output before error occurred:", i.find_all("td", align = 'center'))

    data = {'Date': date, 'League': league,'Home': home, 'Away': away, 'FT': ft, 'HT': ht}

# Create a DataFrame from the dictionary
    df = pd.DataFrame(data)

# Replace empty strings with NaN
    #next_df = df[(df['Date'] == formatted_date) & (df['HT'] == '')]
    df.replace('', pd.NA, inplace=True)

# Drop rows with NaN values
    df_cleaned = df.dropna()

#For Half-Time Results
    hthg, htag = [], []
    for i in df_cleaned['HT']:
        if i == 'NA':
            hthg.append('NA')
            htag.append('NA')
        elif i == '+' or i == '-':
            hthg.append('NA')
            htag.append('NA')
        else:
            try:
                hthg.append(int(i[1]))
                htag.append(int(i[3]))
            except IndexError as e:
                print("Last output before error occurred:", i)



#For Full-Time Results
    hg, ag, tg = [], [], []
    for i in df_cleaned['FT']:
        if len(i) < 5 or ':' in i:
            hg.append('NA')
            ag.append('NA')
            tg.append('NA')
        else:
            try:
                hghg = int(i.split(' - ')[0])
                hg.append(hghg)
                agag = int(i.split(' - ')[1])
                ag.append(agag)
                tg.append(hghg + agag)
            except:
                print(hghg + agag)

    
    df_cleaned['FTHG'], df_cleaned['FTAG'], df_cleaned['FTTG'] = hg, ag, tg
    df_cleaned['HTHG'], df_cleaned['HTAG'] = hthg, htag
    df_cleaned['HTTG'] = df_cleaned['HTHG'] + df_cleaned['HTAG']
    
    final = pd.concat([final, df_cleaned], ignore_index=True)
    
final = final[final['HT'] != 'NA']

# Example: Fix the 'date' column by removing the weekday and extra punctuation
final['Date'] = final['Date'].str.extract(r'(\d{1,2} \w{3})')  # Extract day and month part
final['Date'] = final['Date'] + ' 2024'  # Append the year

# Convert to datetime format
final['Date'] = pd.to_datetime(final['Date'], format='%d %b %Y', errors='coerce')

# Filter rows before September 6th, 2024
final_filtered = final[final['Date'] >= pd.Timestamp('2024-09-17')]

# Align columns of uefa to match final_filtered
uefa = uefa[final_filtered.columns]

# Concatenate
final_filtered = pd.concat([uefa, final_filtered], ignore_index=True)

combined = pd.concat([final_filtered.head(), final_filtered.tail()])

combined

Unnamed: 0,Date,League,Home,Away,FT,HT,FTHG,FTAG,FTTG,HTHG,HTAG,HTTG
0,9/17/2024,UCL,Juventus,PSV,3 - 1,(2-0),3,1,4,2,0,2
1,9/17/2024,UCL,Young Boys,Aston Villa,0 - 3,(0-2),0,3,3,0,2,2
2,9/17/2024,UCL,Bayern,Dinamo Zagreb,9 - 2,(3-0),9,2,11,3,0,3
3,9/17/2024,UCL,Milan,Liverpool,1 - 3,(1-2),1,3,4,1,2,3
4,9/17/2024,UCL,Real Madrid,Stuttgart,3 - 1,(0-0),3,1,4,0,0,0
4972,2024-11-09 00:00:00,Portugal2,Feirense,Tondela,1 - 1,(0-1),1,1,2,0,1,1
4973,2024-11-10 00:00:00,Portugal2,Pacos Ferreira,FC Porto B,2 - 2,(1-0),2,2,4,1,0,1
4974,2024-11-10 00:00:00,Portugal2,Leixoes,Maritimo,2 - 1,(1-0),2,1,3,1,0,1
4975,2024-11-10 00:00:00,Portugal2,Benfica B,Alverca,2 - 1,(1-1),2,1,3,1,1,2
4976,2024-11-10 00:00:00,Portugal2,Torreense,Uniao de Leiria,2 - 1,(1-1),2,1,3,1,1,2


# Merging with Predictions + Odds Dataframes

In [7]:
# Flatten multi-level columns
merged_df.columns = ['_'.join(col).strip() if isinstance(col, tuple) else col for col in merged_df.columns]

# Rename two columns
merged_df = merged_df.rename(columns={'df1_Home': 'Home', 'df1_Away': 'Away'})

# Merge on multiple columns
final_df = pd.merge(merged_df, final_filtered, on=['Home', 'Away'], how='inner')

# Drop duplicates based on the 'Home' and 'Away' columns
final_df_unique = final_df.drop_duplicates(subset=['Home', 'Away'])

# Reset the index afterward
final_df_unique.reset_index(drop=True, inplace=True)

print('Number of games matched: ', len(final_df_unique))
final_df_unique.tail()

Number of games matched:  1878


Unnamed: 0,df1_League,Home,Away,df1_FT1,df1_FTX,df1_FT2,df1_FTR,df1_DC1X,df1_DC12,df1_DCX2,df1_1.5O,df1_2.5O,df1_3.5U,df1_4.5U,df1_BTTS,df1_HT1,df1_HTX,df1_HT2,df1_HTR,df1_HTDC1X,df1_HTDC12,df1_HTDCX2,df1_HT0.5O,df1_HT1.5U,df1_H0.5O,df1_A0.5O,df1_H1.5O,df1_A1.5O,df1_H2.5U,df1_A2.5U,df2_Home,df2_Away,df2_FT1,df2_FTX,df2_FT2,df2_DC1X,df2_DC12,df2_DCX2,df2_HT1,df2_HTX,df2_HT2,df2_HT1X,df2_HT12,df2_HTX2,df2_BTTS,df2_OTTS,df2_1.5O,df2_1.5U,df2_2.5O,df2_2.5U,df2_3.5O,df2_3.5U,df2_4.5O,df2_4.5U,df2_HT0.5O,df2_HT0.5U,df2_HT1.5O,df2_HT1.5U,Date,League,FT,HT,FTHG,FTAG,FTTG,HTHG,HTAG,HTTG
1873,Brazil,Bragantino,Sao Paulo,31.67,29.88,38.45,1-1,61.55,70.12,68.33,65.83,38.56,81.32,92.41,45.58,30.05,46.23,23.62,0-0,76.28,53.67,69.85,68.55,61.86,64.93,69.37,28.18,33.13,91.07,88.31,Bragantino,Sao Paulo,2.6,3.0,3.0,1.36,1.36,1.5,3.4,1.95,3.75,1.25,1.8,1.33,2.0,1.75,1.44,2.75,2.4,1.53,4.5,1.2,10.0,1.06,1.53,2.38,3.75,1.25,2024-11-20 00:00:00,Brazil,1 - 1,(1-1),1,1,2,1,1,2
1874,Brazil,Criciuma,Vitoria,51.06,24.56,24.37,1-1,75.62,75.43,48.93,78.57,54.81,67.53,83.59,56.39,33.44,49.86,16.65,0-0,83.3,50.09,66.51,61.39,71.18,82.43,67.86,51.88,31.38,74.67,89.32,Criciuma,Vitoria,2.1,3.3,3.6,1.29,1.33,1.7,2.75,2.05,4.33,1.2,1.73,1.4,1.91,1.91,1.36,3.2,2.1,1.7,4.0,1.25,8.0,1.08,1.44,2.63,3.25,1.33,2024-11-20 00:00:00,Brazil,0 - 1,(0-0),0,1,1,0,0,0
1875,Brazil,Bahia,Palmeiras,28.4,28.16,43.44,1-1,56.56,71.84,71.6,70.05,43.48,77.51,90.19,48.98,14.76,56.39,28.83,0-0,71.15,43.59,85.22,51.59,80.9,65.09,74.46,28.35,39.6,90.98,84.19,Bahia,Palmeiras,3.2,3.5,2.2,1.67,1.29,1.36,3.6,2.25,2.88,1.4,1.57,1.29,1.62,2.2,1.25,4.0,1.75,2.05,3.0,1.4,5.0,1.17,1.36,3.0,2.63,1.44,2024-11-20 00:00:00,Brazil,1 - 2,(1-1),1,2,3,1,1,2
1876,Brazil,Cuiaba,Flamengo,24.23,29.75,46.02,0-1,53.98,70.25,75.77,63.56,36.1,83.11,93.39,42.07,16.73,51.18,32.05,0-0,67.91,48.78,83.23,59.51,73.24,57.43,72.35,21.07,36.8,94.45,86.04,Cuiaba,Flamengo RJ,3.7,3.4,2.05,1.73,1.3,1.29,4.5,2.0,2.75,1.4,1.73,1.17,2.0,1.75,1.44,2.75,2.3,1.6,4.33,1.22,9.0,1.07,1.5,2.5,3.5,1.29,2024-11-20 00:00:00,Brazil,1 - 2,(0-0),1,2,3,0,0,0
1877,Brazil,Gremio,Juventude,53.86,24.01,22.11,1-1,77.87,75.97,46.12,78.56,54.8,67.53,83.59,55.42,36.76,44.83,18.26,0-0,81.59,55.02,63.09,69.41,60.92,83.44,65.88,53.68,29.2,73.09,90.52,Gremio,Juventude,1.75,3.5,5.0,1.17,1.29,2.0,2.38,2.2,4.75,1.17,1.62,1.53,1.8,1.95,1.29,3.75,1.97,1.93,3.4,1.33,6.0,1.13,1.4,2.75,2.75,1.4,2024-11-20 00:00:00,Brazil,2 - 2,(1-1),2,2,4,1,1,2


# Creating Results Columns

In [8]:
import numpy as np

# Add the new columns based on the condition
final_df_unique['FT1'] = np.where(final_df_unique['FTHG'] > final_df_unique['FTAG'], 1, 0)
final_df_unique['FTX'] = np.where(final_df_unique['FTHG'] == final_df_unique['FTAG'], 1, 0)
final_df_unique['FT2'] = np.where(final_df_unique['FTHG'] < final_df_unique['FTAG'], 1, 0)

final_df_unique['FT1X'] = np.where(final_df_unique['FTHG'] >= final_df_unique['FTAG'], 1, 0)
final_df_unique['FT12'] = np.where(final_df_unique['FTX'] == 0, 1, 0)
final_df_unique['FTX2'] = np.where(final_df_unique['FTHG'] <= final_df_unique['FTAG'], 1, 0)

final_df_unique['1.5O'] = np.where(final_df_unique['FTTG'] > 1.5, 1, 0)
final_df_unique['1.5U'] = np.where(final_df_unique['FTTG'] < 1.5, 1, 0)
final_df_unique['2.5O'] = np.where(final_df_unique['FTTG'] > 2.5, 1, 0)
final_df_unique['2.5U'] = np.where(final_df_unique['FTTG'] < 2.5, 1, 0)
final_df_unique['3.5O'] = np.where(final_df_unique['FTTG'] > 3.5, 1, 0)
final_df_unique['3.5U'] = np.where(final_df_unique['FTTG'] < 3.5, 1, 0)
final_df_unique['4.5O'] = np.where(final_df_unique['FTTG'] > 4.5, 1, 0)
final_df_unique['4.5U'] = np.where(final_df_unique['FTTG'] < 4.5, 1, 0)

final_df_unique['BTTS'] = np.where((final_df_unique['FTHG'] != 0) & (final_df_unique['FTAG'] != 0), 1, 0)
final_df_unique['OTTS'] = np.where(final_df_unique['BTTS'] == 0, 1, 0)

final_df_unique['HT1'] = np.where(final_df_unique['HTHG'] > final_df_unique['HTAG'], 1, 0)
final_df_unique['HTX'] = np.where(final_df_unique['HTHG'] == final_df_unique['HTAG'], 1, 0)
final_df_unique['HT2'] = np.where(final_df_unique['HTHG'] < final_df_unique['HTAG'], 1, 0)

final_df_unique['HT1X'] = np.where(final_df_unique['HTHG'] >= final_df_unique['HTAG'], 1, 0)
final_df_unique['HT12'] = np.where(final_df_unique['HTX'] == 0, 1, 0)
final_df_unique['HTX2'] = np.where(final_df_unique['HTHG'] <= final_df_unique['HTAG'], 1, 0)

final_df_unique['HT0.5O'] = np.where(final_df_unique['HTTG'] > 0.5, 1, 0)
final_df_unique['HT0.5U'] = np.where(final_df_unique['HTTG'] < 0.5, 1, 0)
final_df_unique['HT1.5O'] = np.where(final_df_unique['HTTG'] > 1.5, 1, 0)
final_df_unique['HT1.5U'] = np.where(final_df_unique['HTTG'] < 1.5, 1, 0)

print('Games Found: ', len(final_df_unique))
final_df_unique.tail()

Games Found:  1878


Unnamed: 0,df1_League,Home,Away,df1_FT1,df1_FTX,df1_FT2,df1_FTR,df1_DC1X,df1_DC12,df1_DCX2,df1_1.5O,df1_2.5O,df1_3.5U,df1_4.5U,df1_BTTS,df1_HT1,df1_HTX,df1_HT2,df1_HTR,df1_HTDC1X,df1_HTDC12,df1_HTDCX2,df1_HT0.5O,df1_HT1.5U,df1_H0.5O,df1_A0.5O,df1_H1.5O,df1_A1.5O,df1_H2.5U,df1_A2.5U,df2_Home,df2_Away,df2_FT1,df2_FTX,df2_FT2,df2_DC1X,df2_DC12,df2_DCX2,df2_HT1,df2_HTX,df2_HT2,df2_HT1X,df2_HT12,df2_HTX2,df2_BTTS,df2_OTTS,df2_1.5O,df2_1.5U,df2_2.5O,df2_2.5U,df2_3.5O,df2_3.5U,df2_4.5O,df2_4.5U,df2_HT0.5O,df2_HT0.5U,df2_HT1.5O,df2_HT1.5U,Date,League,FT,HT,FTHG,FTAG,FTTG,HTHG,HTAG,HTTG,FT1,FTX,FT2,FT1X,FT12,FTX2,1.5O,1.5U,2.5O,2.5U,3.5O,3.5U,4.5O,4.5U,BTTS,OTTS,HT1,HTX,HT2,HT1X,HT12,HTX2,HT0.5O,HT0.5U,HT1.5O,HT1.5U
1873,Brazil,Bragantino,Sao Paulo,31.67,29.88,38.45,1-1,61.55,70.12,68.33,65.83,38.56,81.32,92.41,45.58,30.05,46.23,23.62,0-0,76.28,53.67,69.85,68.55,61.86,64.93,69.37,28.18,33.13,91.07,88.31,Bragantino,Sao Paulo,2.6,3.0,3.0,1.36,1.36,1.5,3.4,1.95,3.75,1.25,1.8,1.33,2.0,1.75,1.44,2.75,2.4,1.53,4.5,1.2,10.0,1.06,1.53,2.38,3.75,1.25,2024-11-20 00:00:00,Brazil,1 - 1,(1-1),1,1,2,1,1,2,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,0
1874,Brazil,Criciuma,Vitoria,51.06,24.56,24.37,1-1,75.62,75.43,48.93,78.57,54.81,67.53,83.59,56.39,33.44,49.86,16.65,0-0,83.3,50.09,66.51,61.39,71.18,82.43,67.86,51.88,31.38,74.67,89.32,Criciuma,Vitoria,2.1,3.3,3.6,1.29,1.33,1.7,2.75,2.05,4.33,1.2,1.73,1.4,1.91,1.91,1.36,3.2,2.1,1.7,4.0,1.25,8.0,1.08,1.44,2.63,3.25,1.33,2024-11-20 00:00:00,Brazil,0 - 1,(0-0),0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1
1875,Brazil,Bahia,Palmeiras,28.4,28.16,43.44,1-1,56.56,71.84,71.6,70.05,43.48,77.51,90.19,48.98,14.76,56.39,28.83,0-0,71.15,43.59,85.22,51.59,80.9,65.09,74.46,28.35,39.6,90.98,84.19,Bahia,Palmeiras,3.2,3.5,2.2,1.67,1.29,1.36,3.6,2.25,2.88,1.4,1.57,1.29,1.62,2.2,1.25,4.0,1.75,2.05,3.0,1.4,5.0,1.17,1.36,3.0,2.63,1.44,2024-11-20 00:00:00,Brazil,1 - 2,(1-1),1,2,3,1,1,2,0,0,1,0,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,0
1876,Brazil,Cuiaba,Flamengo,24.23,29.75,46.02,0-1,53.98,70.25,75.77,63.56,36.1,83.11,93.39,42.07,16.73,51.18,32.05,0-0,67.91,48.78,83.23,59.51,73.24,57.43,72.35,21.07,36.8,94.45,86.04,Cuiaba,Flamengo RJ,3.7,3.4,2.05,1.73,1.3,1.29,4.5,2.0,2.75,1.4,1.73,1.17,2.0,1.75,1.44,2.75,2.3,1.6,4.33,1.22,9.0,1.07,1.5,2.5,3.5,1.29,2024-11-20 00:00:00,Brazil,1 - 2,(0-0),1,2,3,0,0,0,0,0,1,0,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1
1877,Brazil,Gremio,Juventude,53.86,24.01,22.11,1-1,77.87,75.97,46.12,78.56,54.8,67.53,83.59,55.42,36.76,44.83,18.26,0-0,81.59,55.02,63.09,69.41,60.92,83.44,65.88,53.68,29.2,73.09,90.52,Gremio,Juventude,1.75,3.5,5.0,1.17,1.29,2.0,2.38,2.2,4.75,1.17,1.62,1.53,1.8,1.95,1.29,3.75,1.97,1.93,3.4,1.33,6.0,1.13,1.4,2.75,2.75,1.4,2024-11-20 00:00:00,Brazil,2 - 2,(1-1),2,2,4,1,1,2,0,1,0,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,1,1,0,1,0


# Creating Profit Columns for Initial Model Predictions

In [21]:
ft1p, ftxp, ft2p, ft1xp, ft12p, ftx2p = [], [], [], [], [], []
over15, under15, over25, under25, over35, under35, over45, under45 = [], [], [], [], [], [], [], []
btts, otts, ht1p, htxp, ht2p, ht1xp, ht12p, htx2p = [], [], [], [], [], [], [], []
htover05, htunder05, htover15, htunder15 = [], [], [], []

for i in range(len(final_df_unique)):
    ft_list = [final_df_unique['df1_FT1'].iloc[i], final_df_unique['df1_FTX'].iloc[i], final_df_unique['df1_FT2'].iloc[i]]
    if (ft_list.index(max(ft_list)) == 0) and (final_df_unique['FT1'].iloc[i] == 1):
        ft1p.append(final_df_unique['df2_FT1'].iloc[i])
    elif (ft_list.index(max(ft_list)) == 0) and (final_df_unique['FT1'].iloc[i] != 1):
        ft1p.append(-1)
    else:
        ft1p.append(0)
    
    if (ft_list.index(max(ft_list)) == 1) and (final_df_unique['FTX'].iloc[i] == 1):
        ftxp.append(final_df_unique['df2_FTX'].iloc[i])
    elif (ft_list.index(max(ft_list)) == 1) and (final_df_unique['FTX'].iloc[i] != 1):
        ftxp.append(-1)
    else:
        ftxp.append(0)
    
    if (ft_list.index(max(ft_list)) == 2) and (final_df_unique['FT2'].iloc[i] == 1):
        ft2p.append(final_df_unique['df2_FT2'].iloc[i])
    elif (ft_list.index(max(ft_list)) == 2) and (final_df_unique['FT2'].iloc[i] != 1):
        ft2p.append(-1)
    else:
        ft2p.append(0)

    dc_list = [final_df_unique['df1_DC1X'].iloc[i], final_df_unique['df1_DC12'].iloc[i], final_df_unique['df1_DCX2'].iloc[i]]
    if (dc_list.index(max(dc_list)) == 0) and (final_df_unique['FT1X'].iloc[i] == 1):
        ft1xp.append(final_df_unique['df2_DC1X'].iloc[i])
    elif (dc_list.index(max(dc_list)) == 0) and (final_df_unique['FT1X'].iloc[i] != 1):
        ft1xp.append(-1)
    else:
        ft1xp.append(0)
    
    if (dc_list.index(max(dc_list)) == 1) and (final_df_unique['FT12'].iloc[i] == 1):
        ft12p.append(final_df_unique['df2_DC12'].iloc[i])
    elif (dc_list.index(max(dc_list)) == 1) and (final_df_unique['FT12'].iloc[i] != 1):
        ft12p.append(-1)
    else:
        ft12p.append(0)
    
    if (dc_list.index(max(dc_list)) == 2) and (final_df_unique['FTX2'].iloc[i] == 1):
        ftx2p.append(final_df_unique['df2_DCX2'].iloc[i])
    elif (dc_list.index(max(dc_list)) == 2) and (final_df_unique['FTX2'].iloc[i] != 1):
        ftx2p.append(-1)
    else:
        ftx2p.append(0)
    
    if (final_df_unique['df1_1.5O'].iloc[i] >= 50) and (final_df_unique['1.5O'].iloc[i] == 1):
        over15.append(final_df_unique['df2_1.5O'].iloc[i])
    elif (final_df_unique['df1_1.5O'].iloc[i] >= 50) and (final_df_unique['1.5O'].iloc[i] != 1):
        over15.append(-1)
    else:
        over15.append(0)
    
    if (final_df_unique['df1_1.5O'].iloc[i] < 50) and (final_df_unique['1.5O'].iloc[i] != 1):
        under15.append(final_df_unique['df2_1.5U'].iloc[i])
    elif (final_df_unique['df1_1.5O'].iloc[i] < 50) and (final_df_unique['1.5O'].iloc[i] == 1):
        under15.append(-1)
    else:
        under15.append(0)
    
    if (final_df_unique['df1_2.5O'].iloc[i] >= 50) and (final_df_unique['2.5O'].iloc[i] == 1):
        over25.append(final_df_unique['df2_2.5O'].iloc[i])
    elif (final_df_unique['df1_2.5O'].iloc[i] >= 50) and (final_df_unique['2.5O'].iloc[i] != 1):
        over25.append(-1)
    else:
        over25.append(0)
    
    if (final_df_unique['df1_2.5O'].iloc[i] < 50) and (final_df_unique['2.5O'].iloc[i] != 1):
        under25.append(final_df_unique['df2_2.5U'].iloc[i])
    elif (final_df_unique['df1_2.5O'].iloc[i] < 50) and (final_df_unique['2.5O'].iloc[i] == 1):
        under25.append(-1)
    else:
        under25.append(0)
    
    if (final_df_unique['df1_3.5U'].iloc[i] <= 50) and (final_df_unique['3.5O'].iloc[i] == 1):
        over35.append(final_df_unique['df2_3.5O'].iloc[i])
    elif (final_df_unique['df1_3.5U'].iloc[i] <= 50) and (final_df_unique['3.5O'].iloc[i] != 1):
        over35.append(-1)
    else:
        over35.append(0)
    
    if (final_df_unique['df1_3.5U'].iloc[i] > 50) and (final_df_unique['3.5O'].iloc[i] != 1):
        under35.append(final_df_unique['df2_3.5U'].iloc[i])
    elif (final_df_unique['df1_3.5U'].iloc[i] > 50) and (final_df_unique['3.5O'].iloc[i] == 1):
        under35.append(-1)
    else:
        under35.append(0)
    
    if (final_df_unique['df1_4.5U'].iloc[i] <= 50) and (final_df_unique['4.5O'].iloc[i] == 1):
        over45.append(final_df_unique['df2_4.5O'].iloc[i])
    elif (final_df_unique['df1_4.5U'].iloc[i] <= 50) and (final_df_unique['4.5O'].iloc[i] != 1):
        over45.append(-1)
    else:
        over45.append(0)
    
    if (final_df_unique['df1_4.5U'].iloc[i] > 50) and (final_df_unique['4.5O'].iloc[i] != 1):
        under45.append(final_df_unique['df2_4.5U'].iloc[i])
    elif (final_df_unique['df1_4.5U'].iloc[i] > 50) and (final_df_unique['4.5O'].iloc[i] == 1):
        under45.append(-1)
    else:
        under45.append(0)
    
    if (final_df_unique['df1_BTTS'].iloc[i] >= 50) and (final_df_unique['BTTS'].iloc[i] == 1):
        btts.append(final_df_unique['df2_BTTS'].iloc[i])
    elif (final_df_unique['df1_BTTS'].iloc[i] >= 50) and (final_df_unique['BTTS'].iloc[i] != 1):
        btts.append(-1)
    else:
        btts.append(0)
    
    if (final_df_unique['df1_BTTS'].iloc[i] < 50) and (final_df_unique['BTTS'].iloc[i] != 1):
        otts.append(final_df_unique['df2_OTTS'].iloc[i])
    elif (final_df_unique['df1_BTTS'].iloc[i] < 50) and (final_df_unique['BTTS'].iloc[i] == 1):
        otts.append(-1)
    else:
        otts.append(0)
    
    ht_list = [final_df_unique['df1_HT1'].iloc[i], final_df_unique['df1_HTX'].iloc[i], final_df_unique['df1_HT2'].iloc[i]]
    if (ht_list.index(max(ht_list)) == 0) and (final_df_unique['HT1'].iloc[i] == 1):
        ht1p.append(final_df_unique['df2_HT1'].iloc[i])
    elif (ht_list.index(max(ht_list)) == 0) and (final_df_unique['HT1'].iloc[i] != 1):
        ht1p.append(-1)
    else:
        ht1p.append(0)
    
    if (ht_list.index(max(ht_list)) == 1) and (final_df_unique['HTX'].iloc[i] == 1):
        htxp.append(final_df_unique['df2_HTX'].iloc[i])
    elif (ht_list.index(max(ht_list)) == 1) and (final_df_unique['HTX'].iloc[i] != 1):
        htxp.append(-1)
    else:
        htxp.append(0)
    
    if (ht_list.index(max(ht_list)) == 2) and (final_df_unique['HT2'].iloc[i] == 1):
        ht2p.append(final_df_unique['df2_HT2'].iloc[i])
    elif (ht_list.index(max(ht_list)) == 2) and (final_df_unique['HT2'].iloc[i] != 1):
        ht2p.append(-1)
    else:
        ht2p.append(0)
    
    htdc_list = [final_df_unique['df1_HTDC1X'].iloc[i], final_df_unique['df1_HTDC12'].iloc[i], final_df_unique['df1_HTDCX2'].iloc[i]]
    if (htdc_list.index(max(htdc_list)) == 0) and (final_df_unique['HT1X'].iloc[i] == 1):
        ht1xp.append(final_df_unique['df2_HT1X'].iloc[i])
    elif (htdc_list.index(max(htdc_list)) == 0) and (final_df_unique['HT1X'].iloc[i] != 1):
        ht1xp.append(-1)
    else:
        ht1xp.append(0)
    
    if (htdc_list.index(max(htdc_list)) == 1) and (final_df_unique['HT12'].iloc[i] == 1):
        ht12p.append(final_df_unique['df2_HT12'].iloc[i])
    elif (htdc_list.index(max(htdc_list)) == 1) and (final_df_unique['HT12'].iloc[i] != 1):
        ht12p.append(-1)
    else:
        ht12p.append(0)
    
    if (htdc_list.index(max(htdc_list)) == 2) and (final_df_unique['HTX2'].iloc[i] == 1):
        htx2p.append(final_df_unique['df2_HTX2'].iloc[i])
    elif (htdc_list.index(max(htdc_list)) == 2) and (final_df_unique['HTX2'].iloc[i] != 1):
        htx2p.append(-1)
    else:
        htx2p.append(0)
    
    if (final_df_unique['df1_HT0.5O'].iloc[i] >= 50) and (final_df_unique['HT0.5O'].iloc[i] == 1):
        htover05.append(final_df_unique['df2_HT0.5O'].iloc[i])
    elif (final_df_unique['df1_HT0.5O'].iloc[i] >= 50) and (final_df_unique['HT0.5O'].iloc[i] != 1):
        htover05.append(-1)
    else:
        htover05.append(0)
    
    if (final_df_unique['df1_HT0.5O'].iloc[i] < 50) and (final_df_unique['HT0.5O'].iloc[i] != 1):
        htunder05.append(final_df_unique['df2_HT0.5U'].iloc[i])
    elif (final_df_unique['df1_HT0.5O'].iloc[i] < 50) and (final_df_unique['HT0.5O'].iloc[i] == 1):
        htunder05.append(-1)
    else:
        htunder05.append(0)
    
    if (final_df_unique['df1_HT1.5U'].iloc[i] < 50) and (final_df_unique['HT1.5O'].iloc[i] == 1):
        htover15.append(final_df_unique['df2_HT1.5O'].iloc[i])
    elif (final_df_unique['df1_HT1.5U'].iloc[i] < 50) and (final_df_unique['HT1.5O'].iloc[i] != 1):
        htover15.append(-1)
    else:
        htover15.append(0)
    
    if (final_df_unique['df1_HT1.5U'].iloc[i] >= 50) and (final_df_unique['HT1.5O'].iloc[i] != 1):
        htunder15.append(final_df_unique['df2_HT1.5U'].iloc[i])
    elif (final_df_unique['df1_HT1.5U'].iloc[i] >= 50) and (final_df_unique['HT1.5O'].iloc[i] == 1):
        htunder15.append(-1)
    else:
        htunder15.append(0)

final_df_unique['FT1P'], final_df_unique['FTXP'], final_df_unique['FT2P'] = ft1p, ftxp, ft2p
final_df_unique['FT1XP'], final_df_unique['FT12P'], final_df_unique['FTX2P'] = ft1xp, ft12p, ftx2p
final_df_unique['1.5OP'], final_df_unique['1.5UP'], final_df_unique['2.5OP'], final_df_unique['2.5UP'] = over15, under15, over25, under25
final_df_unique['3.5OP'], final_df_unique['3.5UP'], final_df_unique['4.5OP'], final_df_unique['4.5UP'] = over35, under35, over45, under45
final_df_unique['BTTSP'], final_df_unique['OTTSP'] = btts, otts
final_df_unique['HT1P'], final_df_unique['HTXP'], final_df_unique['HT2P'] = ht1p, htxp, ht2p
final_df_unique['HT1XP'], final_df_unique['HT12P'], final_df_unique['HTX2P'] = ht1xp, ht12p, htx2p
final_df_unique['HT0.5OP'], final_df_unique['HT0.5UP'] = htover05, htunder05
final_df_unique['HT1.5OP'], final_df_unique['HT1.5UP'] = htover15, htunder15

print('Games Found: ', len(final_df_unique))
final_df_unique.tail()

Games Found:  1878


Unnamed: 0,df1_League,Home,Away,df1_FT1,df1_FTX,df1_FT2,df1_FTR,df1_DC1X,df1_DC12,df1_DCX2,df1_1.5O,df1_2.5O,df1_3.5U,df1_4.5U,df1_BTTS,df1_HT1,df1_HTX,df1_HT2,df1_HTR,df1_HTDC1X,df1_HTDC12,df1_HTDCX2,df1_HT0.5O,df1_HT1.5U,df1_H0.5O,df1_A0.5O,df1_H1.5O,df1_A1.5O,df1_H2.5U,df1_A2.5U,df2_Home,df2_Away,df2_FT1,df2_FTX,df2_FT2,df2_DC1X,df2_DC12,df2_DCX2,df2_HT1,df2_HTX,df2_HT2,df2_HT1X,df2_HT12,df2_HTX2,df2_BTTS,df2_OTTS,df2_1.5O,df2_1.5U,df2_2.5O,df2_2.5U,df2_3.5O,df2_3.5U,df2_4.5O,df2_4.5U,df2_HT0.5O,df2_HT0.5U,df2_HT1.5O,df2_HT1.5U,Date,League,FT,HT,FTHG,FTAG,FTTG,HTHG,HTAG,HTTG,FT1,FTX,FT2,FT1X,FT12,FTX2,1.5O,1.5U,2.5O,2.5U,3.5O,3.5U,4.5O,4.5U,BTTS,OTTS,HT1,HTX,HT2,HT1X,HT12,HTX2,HT0.5O,HT0.5U,HT1.5O,HT1.5U,FT1P,FTXP,FT2P,FT1XP,FT12P,FTX2P,1.5OP,1.5UP,2.5OP,2.5UP,3.5OP,3.5UP,4.5OP,4.5UP,BTTSP,OTTSP,HT1P,HTXP,HT2P,HT1XP,HT12P,HTX2P,HT0.5OP,HT0.5UP,HT1.5OP,HT1.5UP
1873,Brazil,Bragantino,Sao Paulo,31.67,29.88,38.45,1-1,61.55,70.12,68.33,65.83,38.56,81.32,92.41,45.58,30.05,46.23,23.62,0-0,76.28,53.67,69.85,68.55,61.86,64.93,69.37,28.18,33.13,91.07,88.31,Bragantino,Sao Paulo,2.6,3.0,3.0,1.36,1.36,1.5,3.4,1.95,3.75,1.25,1.8,1.33,2.0,1.75,1.44,2.75,2.4,1.53,4.5,1.2,10.0,1.06,1.53,2.38,3.75,1.25,2024-11-20 00:00:00,Brazil,1 - 1,(1-1),1,1,2,1,1,2,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,0,0.0,0.0,-1.0,0.0,-1.0,0.0,1.44,0.0,0.0,1.53,0.0,1.2,0.0,1.06,0.0,-1.0,0.0,1.95,0.0,1.25,0.0,0.0,1.53,0.0,0.0,-1.0
1874,Brazil,Criciuma,Vitoria,51.06,24.56,24.37,1-1,75.62,75.43,48.93,78.57,54.81,67.53,83.59,56.39,33.44,49.86,16.65,0-0,83.3,50.09,66.51,61.39,71.18,82.43,67.86,51.88,31.38,74.67,89.32,Criciuma,Vitoria,2.1,3.3,3.6,1.29,1.33,1.7,2.75,2.05,4.33,1.2,1.73,1.4,1.91,1.91,1.36,3.2,2.1,1.7,4.0,1.25,8.0,1.08,1.44,2.63,3.25,1.33,2024-11-20 00:00:00,Brazil,0 - 1,(0-0),0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,-1.0,0.0,0.0,-1.0,0.0,0.0,-1.0,0.0,-1.0,0.0,0.0,1.25,0.0,1.08,-1.0,0.0,0.0,2.05,0.0,1.2,0.0,0.0,-1.0,0.0,0.0,1.33
1875,Brazil,Bahia,Palmeiras,28.4,28.16,43.44,1-1,56.56,71.84,71.6,70.05,43.48,77.51,90.19,48.98,14.76,56.39,28.83,0-0,71.15,43.59,85.22,51.59,80.9,65.09,74.46,28.35,39.6,90.98,84.19,Bahia,Palmeiras,3.2,3.5,2.2,1.67,1.29,1.36,3.6,2.25,2.88,1.4,1.57,1.29,1.62,2.2,1.25,4.0,1.75,2.05,3.0,1.4,5.0,1.17,1.36,3.0,2.63,1.44,2024-11-20 00:00:00,Brazil,1 - 2,(1-1),1,2,3,1,1,2,0,0,1,0,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,0,1,1,0,1,0,0.0,0.0,2.2,0.0,1.29,0.0,1.25,0.0,0.0,-1.0,0.0,1.4,0.0,1.17,0.0,-1.0,0.0,2.25,0.0,0.0,0.0,1.29,1.36,0.0,0.0,-1.0
1876,Brazil,Cuiaba,Flamengo,24.23,29.75,46.02,0-1,53.98,70.25,75.77,63.56,36.1,83.11,93.39,42.07,16.73,51.18,32.05,0-0,67.91,48.78,83.23,59.51,73.24,57.43,72.35,21.07,36.8,94.45,86.04,Cuiaba,Flamengo RJ,3.7,3.4,2.05,1.73,1.3,1.29,4.5,2.0,2.75,1.4,1.73,1.17,2.0,1.75,1.44,2.75,2.3,1.6,4.33,1.22,9.0,1.07,1.5,2.5,3.5,1.29,2024-11-20 00:00:00,Brazil,1 - 2,(0-0),1,2,3,0,0,0,0,0,1,0,1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,0,1,0,1,0,1,0.0,0.0,2.05,0.0,0.0,1.29,1.44,0.0,0.0,-1.0,0.0,1.22,0.0,1.07,0.0,-1.0,0.0,2.0,0.0,0.0,0.0,1.17,-1.0,0.0,0.0,1.29
1877,Brazil,Gremio,Juventude,53.86,24.01,22.11,1-1,77.87,75.97,46.12,78.56,54.8,67.53,83.59,55.42,36.76,44.83,18.26,0-0,81.59,55.02,63.09,69.41,60.92,83.44,65.88,53.68,29.2,73.09,90.52,Gremio,Juventude,1.75,3.5,5.0,1.17,1.29,2.0,2.38,2.2,4.75,1.17,1.62,1.53,1.8,1.95,1.29,3.75,1.97,1.93,3.4,1.33,6.0,1.13,1.4,2.75,2.75,1.4,2024-11-20 00:00:00,Brazil,2 - 2,(1-1),2,2,4,1,1,2,0,1,0,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,0,1,0,1,1,0,1,0,-1.0,0.0,0.0,1.17,0.0,0.0,1.29,0.0,1.97,0.0,0.0,-1.0,0.0,1.13,1.8,0.0,0.0,2.2,0.0,1.17,0.0,0.0,1.4,0.0,0.0,-1.0


# Checking For ROI of Profit Columns

In [29]:
# Select columns that end with 'P'
columns = [col for col in final_df_unique.columns if col.endswith('P')]

# Calculate the value for each selected column
results = {}
for col in columns:
    numerator = np.sum(final_df_unique[col]) - len(final_df_unique[final_df_unique[col] != 0])
    denominator = len(final_df_unique[final_df_unique[col] != 0])
    results[col] = numerator / denominator * 100 if denominator != 0 else np.nan  # Avoid division by zero

# Convert results to a DataFrame or display as needed
results_df = pd.DataFrame(list(results.items()), columns=['Column', 'Result'])
results_df

Unnamed: 0,Column,Result
0,FT1P,-55.073055
1,FTXP,-70.156566
2,FT2P,-79.840256
3,FT1XP,-24.019883
4,FT12P,-32.751361
5,FTX2P,-43.188559
6,1.5OP,-32.621944
7,1.5UP,-74.044776
8,2.5OP,-51.176603
9,2.5UP,-50.051567


# ROI of Profit Columns According To Leagues

In [34]:
# Step 1: Filter leagues with at least 10 games
league_counts = final_df_unique['League'].value_counts()
leagues_with_10_games = league_counts[league_counts >= 10].index
filtered_df = final_df_unique[final_df_unique['League'].isin(leagues_with_10_games)]

# Group by 'League' and calculate results for each group
grouped_results = {}
for league, group in filtered_df.groupby('League'):
    group_results = {}
    for col in columns:
        numerator = np.sum(group[col]) - len(group[group[col] != 0])
        denominator = len(group[group[col] != 0])
        group_results[col] = numerator / denominator * 100 if denominator != 0 else np.nan  # Avoid division by zero
    grouped_results[league] = group_results

# Convert grouped results to a DataFrame for better visualization
grouped_results_df = pd.DataFrame(grouped_results).T

# Define a function to apply conditional formatting
def highlight_positive(val):
    # Highlight background to red if the value is positive
    color = 'background-color: red' if val > 0 else ''
    return color

# Apply the function to the DataFrame
styled_df = grouped_results_df.style.applymap(highlight_positive)

# Display the styled DataFrame
styled_df.to_excel("ROI_leagues.xlsx", index = True)
styled_df

Unnamed: 0,FT1P,FTXP,FT2P,FT1XP,FT12P,FTX2P,1.5OP,1.5UP,2.5OP,2.5UP,3.5OP,3.5UP,4.5OP,4.5UP,BTTSP,OTTSP,HT1P,HTXP,HT2P,HT1XP,HT12P,HTX2P,HT0.5OP,HT0.5UP,HT1.5OP,HT1.5UP
Argentina,-72.955882,29.916667,-165.384615,-34.930556,-23.0,-40.352941,-48.921053,-56.235294,-123.076923,-33.1,,-10.430108,,-8.956989,-127.076923,-65.9,-121.785714,-10.714286,-200.0,-16.016393,,-13.15625,-45.815789,-8.823529,-12.5,-39.89011
Austria,-108.730769,-200.0,-63.571429,-41.45,-4.857143,-130.666667,-34.333333,300.0,-40.6,-77.705882,-112.222222,-17.107143,-200.0,-7.342857,-79.681818,-97.0,-142.6,-27.478261,40.0,-12.952381,,-17.25,-45.37037,-38.5,-88.416667,7.04
Belgium,-58.151515,-65.714286,-120.3125,-14.354839,-34.909091,-68.928571,-15.653061,-139.285714,-61.107143,-64.392857,-130.153846,-10.976744,-200.0,-8.037037,-89.125,-40.28125,-27.058824,-73.666667,-53.777778,-15.08,-150.0,-30.961538,-35.2,-94.272727,-184.0,-42.694444
Brazil,-60.5,-200.0,-85.454545,-13.666667,-28.263158,-31.642857,-45.926471,150.0,-80.055556,-61.196078,,-30.086957,,-6.695652,-44.565217,-58.326087,-52.2,-11.728814,,-7.884615,,4.176471,-45.123077,56.5,-200.0,-41.393939
Denmark,-54.866667,25.0,-149.117647,9.583333,-64.5,-45.333333,-6.294118,,-43.44,-68.555556,-55.111111,-74.44,-33.333333,-32.290323,-5.333333,-75.1,48.846154,-59.285714,-200.0,11.578947,-32.333333,-25.75,-5.625,0.0,-13.1875,-48.944444
England,-23.470588,-106.0,-131.0,-5.391304,25.333333,-67.315789,-31.295455,-200.0,-61.541667,-76.333333,-200.0,-29.809524,-200.0,-18.613636,-24.56,-72.45,-126.142857,-25.933333,45.111111,-33.125,-11.75,-45.764706,-34.195122,-100.0,-69.583333,-23.818182
England2,-51.6,-91.875,-141.36,-11.773585,-7.833333,-63.62963,-49.726027,-10.538462,-75.766667,-33.232143,-200.0,-23.027027,-200.0,-7.6,-71.964286,-34.672414,-29.714286,-75.541667,-200.0,-33.089286,,-75.533333,-48.594595,-49.0,-70.777778,-19.766234
England3,-6.192308,-104.444444,-84.46875,-38.0,-24.566667,-42.517241,-21.4875,-79.769231,-32.454545,-25.714286,-84.466667,-35.474359,-140.625,-27.364706,-48.23913,-50.404255,-51.666667,-104.456522,-46.782609,-37.928571,-31.166667,-35.022222,-23.038961,-109.3125,-48.6,-31.75
England4,-109.52381,85.0,-69.487179,-66.363636,-47.018519,-45.75,-42.434783,-77.8,-66.117647,-38.94,-57.083333,-12.25,50.0,-0.814815,-81.5,-74.785714,-90.513514,-83.0,-131.666667,-56.830189,-90.285714,-66.0,-38.930556,-108.25,-87.5,-30.529412
England5,-67.734694,-200.0,-76.583333,-20.567568,-32.961538,-51.4,-23.222222,-139.285714,-41.822222,-85.953488,-128.7,-45.705128,-200.0,-17.735632,-43.390244,-92.191489,-82.424242,-77.558824,-56.190476,-38.346154,-114.666667,-63.466667,-28.818182,-130.636364,-153.571429,-43.716049
