In [22]:
last_games = 5
file = 'norway.csv'

home_team = 'KFUM Oslo'
away_team = 'Lillestrom'

home_odds = 2.3
draw_odds = 3.5
away_odds = 2.9

In [23]:
import pandas as pd
import numpy as np
import math
from scipy.stats import poisson

pd.set_option('display.max_columns', None)

df = pd.read_csv(file)

if 'PSCH' in df.columns:
    columns_to_use = ['Country', 'Season', 'Home', 'Away', 'HG', 'AG', 'Res','PSCH', 'PSCD', 'PSCA']
else:
    columns_to_use = ['Country', 'Season', 'Home', 'Away', 'HG', 'AG', 'Res','PSH', 'PSD', 'PSA']
    
df = pd.read_csv(file, usecols = columns_to_use)
df['Total'] = df['HG'] + df['AG']
df['1.5O'] = df['Total'].apply(lambda x: 1 if x > 1.5 else 0)
df['2.5O'] = df['Total'].apply(lambda x: 1 if x > 2.5 else 0)
df['3.5O'] = df['Total'].apply(lambda x: 1 if x > 3.5 else 0)
df['BTTS'] = df.apply(lambda row: 1 if row['HG'] > 0 and row['AG'] > 0 else 0, axis=1)

df.head()

Unnamed: 0,Country,Season,Home,Away,HG,AG,Res,PSCH,PSCD,PSCA,Total,1.5O,2.5O,3.5O,BTTS
0,Norway,2012,Molde,Stromsgodset,2,1,H,1.76,3.87,5.18,3,1,1,0,1
1,Norway,2012,Honefoss,Lillestrom,0,0,D,2.58,3.5,2.88,0,0,0,0,0
2,Norway,2012,Odd,Sogndal,0,4,A,1.62,3.99,6.4,4,1,1,1,0
3,Norway,2012,Stabaek,Aalesund,0,0,D,2.81,3.47,2.65,0,0,0,0,0
4,Norway,2012,Tromso,Fredrikstad,1,0,H,1.58,4.13,6.82,1,0,0,0,0


In [24]:
home_table = df[df['Home'] == home_team].tail(last_games)
away_table = df[df['Away'] == away_team].tail(last_games)

#Adding necessary columns
home_points_map = {'H': 3, 'D': 1, 'A': 0}
away_points_map = {'H': 0, 'D': 1, 'A': 3}
home_table['Points'] = home_table['Res'].map(home_points_map)
away_table['Points'] = away_table['Res'].map(away_points_map)

home_table

Unnamed: 0,Country,Season,Home,Away,HG,AG,Res,PSCH,PSCD,PSCA,Total,1.5O,2.5O,3.5O,BTTS,Points
2987,Norway,2024,KFUM Oslo,Sandefjord,3,3,D,2.31,3.49,3.19,6,1,1,1,1,1
3003,Norway,2024,KFUM Oslo,Viking,1,2,A,2.64,3.64,2.63,3,1,1,0,1,0
3022,Norway,2024,KFUM Oslo,Bodo/Glimt,1,1,D,4.6,4.22,1.72,2,1,0,0,1,1
3039,Norway,2024,KFUM Oslo,Rosenborg,1,0,H,2.36,3.65,2.98,1,0,0,0,0,3
3046,Norway,2024,KFUM Oslo,Tromso,0,1,A,2.28,3.19,3.56,1,0,0,0,0,0


In [25]:
# Calculating home/away team statistics
home_ppg = home_table['Points'].sum() / last_games
away_ppg = away_table['Points'].sum() / last_games
home_scored = home_table['HG'].sum() / last_games
home_conceded = home_table['AG'].sum() / last_games
away_scored = away_table['AG'].sum() / last_games
away_conceded = away_table['HG'].sum() / last_games
home_15over = home_table['1.5O'].sum() / last_games
away_15over = away_table['1.5O'].sum() / last_games
home_25over = home_table['2.5O'].sum() / last_games
away_25over = away_table['2.5O'].sum() / last_games
home_35over = home_table['3.5O'].sum() / last_games
away_35over = away_table['3.5O'].sum() / last_games
home_btts = home_table['BTTS'].sum() / last_games
away_btts = away_table['BTTS'].sum() / last_games

#Calculating league statistics
league_table = df[df['Season'] == df['Season'].max()]
league_matches = len(league_table)
league_home_scored = round(league_table['HG'].sum() / league_matches, 2)
league_away_scored = round(league_table['AG'].sum() / league_matches, 2)
league_15over = round(league_table['1.5O'].sum() / league_matches, 2)
league_25over = round(league_table['2.5O'].sum() / league_matches, 2)
league_35over = round(league_table['3.5O'].sum() / league_matches, 2)
league_btts = round(league_table['BTTS'].sum() / league_matches, 2)

#Calculating win/draw/loss percentages
home_win = int((home_table['Res'].value_counts().get('H', 0) + away_table['Res'].value_counts().get('H', 0)) / (2 * last_games) * 100)
draw = int((home_table['Res'].value_counts().get('D', 0) + away_table['Res'].value_counts().get('D', 0)) / (2 * last_games) * 100)
away_win = int((home_table['Res'].value_counts().get('A', 0) + away_table['Res'].value_counts().get('A', 0)) / (2 * last_games) * 100)

#Calculating poisson distribution values
home_attack = home_scored / league_home_scored
home_defence = home_conceded / league_away_scored
away_attack = away_scored / league_away_scored
away_defence = away_conceded / league_home_scored
home_expected = home_attack * away_defence * league_home_scored
away_expected = away_attack * home_defence * league_away_scored

home_xg = math.floor(home_expected) if poisson.pmf(math.floor(home_expected), home_expected) >= poisson.pmf(math.floor(home_expected) + 1, home_expected) else math.floor(home_expected) + 1
away_xg = math.floor(away_expected) if poisson.pmf(math.floor(away_expected), away_expected) >= poisson.pmf(math.floor(away_expected) + 1, away_expected) else math.floor(away_expected) + 1

home_05over = int((1 - poisson.pmf(0, home_expected)) * 100)
away_05over = int((1 - poisson.pmf(0, away_expected)) * 100)

homes_15over = int((1 - (poisson.pmf(0, home_expected) + poisson.pmf(1, home_expected))) * 100)
aways_15over = int((1 - (poisson.pmf(0, away_expected) + poisson.pmf(1, away_expected))) * 100)

In [26]:
#Creating a league table
teams = np.unique(league_table['Home'])

points = []
games = []
team_names = []

for team in teams:
    point = 0
    game = 0
    for index, row in league_table.iterrows():
        if team == row['Home']:
            game += 1
            if row['Res'] == 'H':
                point += 3
            elif row['Res'] == 'D':
                point += 1
        elif team == row['Away']:
            game += 1
            if row['Res'] == 'A':
                point += 3
            elif row['Res'] == 'D':
                point += 1
    points.append(point)
    team_names.append(team)
    games.append(game)

table = pd.DataFrame({
    'Teams': team_names,
    'Points': points,
    'Games': games,
})

table['PPG'] = round(table['Points'] / table['Games'], 2)
table = table.sort_values(by='PPG', ascending=False).reset_index(drop=True)
table.index = table.index + 1

table

Unnamed: 0,Teams,Points,Games,PPG
1,Bodo/Glimt,44,20,2.2
2,Brann,36,19,1.89
3,Viking,36,20,1.8
4,Molde,34,19,1.79
5,Fredrikstad,31,20,1.55
6,Rosenborg,30,20,1.5
7,KFUM Oslo,26,19,1.37
8,HamKam,24,20,1.2
9,Tromso,24,20,1.2
10,Kristiansund,21,18,1.17


In [27]:
# Create a mapping from team names to their indices in the points table
team_indices = {team: index + 1 for index, team in enumerate(table['Teams'])}

# Map the Home and Away columns to their corresponding indices
home_table['HomeStanding'] = home_table['Home'].map(team_indices)
home_table['AwayStanding'] = home_table['Away'].map(team_indices)
home_table['PowerDif'] = home_table['AwayStanding'] - home_table['HomeStanding']

away_table['HomeStanding'] = away_table['Home'].map(team_indices)
away_table['AwayStanding'] = away_table['Away'].map(team_indices)
away_table['PowerDif'] = away_table['HomeStanding'] - away_table['AwayStanding']

home_power = home_table['PowerDif'].sum() / last_games
away_power = away_table['PowerDif'].sum() / last_games

away_table

Unnamed: 0,Country,Season,Home,Away,HG,AG,Res,PSCH,PSCD,PSCA,Total,1.5O,2.5O,3.5O,BTTS,Points,HomeStanding,AwayStanding,PowerDif
2990,Norway,2024,Tromso,Lillestrom,1,2,A,2.05,3.66,3.65,3,1,1,0,1,3,9,15,-6
3004,Norway,2024,Molde,Lillestrom,3,0,H,1.48,5.0,6.23,3,1,1,0,0,0,4,15,-11
3032,Norway,2024,Kristiansund,Lillestrom,2,1,H,3.94,4.31,1.82,3,1,1,0,1,0,10,15,-5
3043,Norway,2024,Stromsgodset,Lillestrom,3,2,H,2.36,3.9,2.82,5,1,1,1,1,0,12,15,-3
3049,Norway,2024,Rosenborg,Lillestrom,4,0,H,2.0,3.8,3.68,4,1,1,1,0,0,6,15,-9


In [28]:
# Creating final dataframe for analysis

df = pd.DataFrame({
    'League': [home_table['Country'].iloc[0]], 'Home': [home_team], 'Away': [away_team], 'FT1': [home_odds], 'FTX': [draw_odds], 'FT2':[away_odds],
    'FT1P':[home_win], 'FTXP':[draw], 'FT2P':[away_win], 'HSC':[home_scored], 'HCD':[home_conceded], 'ASC':[away_scored], 'ACD':[away_conceded],
    'HXG':[round(home_expected, 2)], 'AXG':[round(away_expected, 2)], '1.5O':[int(np.mean([home_15over, away_15over]) * 100)], 
    '2.5O':[int(np.mean([home_25over, away_25over]) * 100)], '3.5O':[int(np.mean([home_35over, away_35over]) * 100)], 
    'BTTS':[int(np.mean([home_btts, away_btts]) * 100)], 'H0.5O':[home_05over], 'A0.50':[away_05over], 'H1.5O':[homes_15over], 'A1.5O':[aways_15over]
})

df

Unnamed: 0,League,Home,Away,FT1,FTX,FT2,FT1P,FTXP,FT2P,HSC,HCD,ASC,ACD,HXG,AXG,1.5O,2.5O,3.5O,BTTS,H0.5O,A0.50,H1.5O,A1.5O
0,Norway,KFUM Oslo,Lillestrom,2.3,3.5,2.9,50,20,30,1.2,1.4,1.0,2.6,2.11,1.02,80,70,30,60,87,64,62,27
