In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [12]:
team_stats = pd.read_csv("datasets/team_stats.csv")
standings = pd.read_csv("datasets/standings.csv")
fixtures = pd.read_csv("datasets/fixtures.csv")

In [13]:
# Ellenőrizzük, hogy biztosan léteznek-e a megfelelő oszlopok
print("Team stats oszlopok:")
print(team_stats.columns.tolist())
print("\nStandings oszlopok:")
print(standings.columns.tolist())

Team stats oszlopok:
['team', 'players', 'age', 'possession', 'goals', 'assists', 'penalty_kicks', 'penalty_kick_attempts', 'yellows', 'reds', 'expected_goals', 'expected_assists', 'progressive_carries', 'progressive_passes']

Standings oszlopok:
['rank', 'team', 'win', 'loss', 'draw', 'goals', 'conceded', 'points', 'last5', 'top_scorer', 'keeper']


In [None]:
# A modellhez használt 6 feature kiválasztása

features = [
    "goals",               # lőtt gólok
    "assists",             # gólpasszok
    "possession",          # labdabirtoklás
    "expected_goals",      # xG
    "expected_assists",    # xA
    "progressive_passes"   # előrehaladó passzok
]

X = team_stats[features]

# A feature sor megjelenítése
X



Unnamed: 0,goals,assists,possession,expected_goals,expected_assists,progressive_passes
0,65,53,56.8,57.6,43.2,1764
1,56,45,51.0,55.7,41.5,1326
2,55,39,48.1,62.4,42.6,1438
3,64,43,47.8,57.6,41.3,1318
4,60,40,52.0,56.5,39.7,1478
5,60,46,57.3,66.7,51.6,1573
6,48,37,43.1,58.7,44.7,1140
7,38,26,41.1,40.6,31.5,1027
8,53,44,52.4,47.8,36.9,1524
9,34,25,40.5,33.6,23.7,896


In [27]:
#Ellenőrzés
print("Használt feature-ök száma:", len(features))
print("Feature nevek:", features)



Használt feature-ök száma: 6
Feature nevek: ['goals', 'assists', 'possession', 'expected_goals', 'expected_assists', 'progressive_passes']


In [28]:
from scipy import stats
import numpy as np

# X az előző lépésben létrehozott feature mátrix
print("Eredeti adatméret:", X.shape)

# Z-score kiszámítása
z_scores = np.abs(stats.zscore(X))

# Küszöb: 3 standard deviáció
threshold = 3

# Boolean mask, ahol minden sor igaz/hamis attól függően,
# hogy VAN-E benne 3 feletti z-score érték
mask = (z_scores < threshold).all(axis=1)

# Outlierek eltávolítása
X_clean = X[mask]

print("Outlierek eltávolítása után:", X_clean.shape)

X_clean.head()


Eredeti adatméret: (20, 6)
Outlierek eltávolítása után: (20, 6)


Unnamed: 0,goals,assists,possession,expected_goals,expected_assists,progressive_passes
0,65,53,56.8,57.6,43.2,1764
1,56,45,51.0,55.7,41.5,1326
2,55,39,48.1,62.4,42.6,1438
3,64,43,47.8,57.6,41.3,1318
4,60,40,52.0,56.5,39.7,1478


In [36]:
# Új származtatott feature: gólhatékonyság (goals / expected_goals)
team_stats["goal_efficiency"] = team_stats["goals"] / team_stats["expected_goals"]

# Az új oszlop megtekintése
team_stats[["team", "goals", "expected_goals", "goal_efficiency"]]


Unnamed: 0,team,goals,expected_goals,goal_efficiency
0,Arsenal,65,57.6,1.128472
1,Aston Villa,56,55.7,1.005386
2,Bournemouth,55,62.4,0.88141
3,Brentford,64,57.6,1.111111
4,Brighton,60,56.5,1.061947
5,Chelsea,60,66.7,0.89955
6,Crystal Palace,48,58.7,0.817717
7,Everton,38,40.6,0.935961
8,Fulham,53,47.8,1.108787
9,Ipswich Town,34,33.6,1.011905


In [43]:
# A pontátlag kiszámítása
avg_points = standings["points"].mean()

# Célváltozó létrehozása: 1 = átlag felett teljesített, 0 = átlag alatt
standings["target"] = (standings["points"] > avg_points).astype(int)

# Ellenőrzés
standings[["team", "points", "target"]]


Unnamed: 0,team,points,target
0,Liverpool,83,1
1,Arsenal,71,1
2,Manchester City,68,1
3,Newcastle Utd,66,1
4,Chelsea,66,1
5,Aston Villa,66,1
6,Nott'ham Forest,65,1
7,Brighton,58,1
8,Brentford,55,1
9,Fulham,54,1


In [44]:
# Csapatnév alapján összekapcsoljuk az adatokat (inner join)
merged = team_stats.merge(standings[["team", "target"]], on="team", how="inner")

# Feature mátrix (X) és target (y)
X = merged[features]   # features: a korábban létrehozott lista
y = merged["target"]

# Ellenőrzés
print("X shape:", X.shape)
print("y shape:", y.shape)
merged[["team"] + features + ["target"]].head()


X shape: (20, 6)
y shape: (20,)


Unnamed: 0,team,goals,assists,possession,expected_goals,expected_assists,progressive_passes,target
0,Arsenal,65,53,56.8,57.6,43.2,1764,1
1,Aston Villa,56,45,51.0,55.7,41.5,1326,1
2,Bournemouth,55,39,48.1,62.4,42.6,1438,1
3,Brentford,64,43,47.8,57.6,41.3,1318,1
4,Brighton,60,40,52.0,56.5,39.7,1478,1
