In [96]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor

world_cup_csv = pd.read_csv('WorldCupMatches.csv')

world_cup = world_cup_csv[["Datetime","Home Team Name","Home Team Goals", "Away Team Goals","Away Team Name"]]

world_cup = world_cup.dropna(how='all',axis=0) 

world_cup["Date_dt"] = pd.to_datetime(world_cup["Datetime"],infer_datetime_format=True)

world_cup["Date"] = world_cup["Date_dt"].dt.date

world_cup = world_cup.rename(columns={"Home Team Name": "home_team", "Home Team Goals": "home_score","Away Team Name": "away_team", "Away Team Goals": "away_score"})

world_cup = world_cup.drop(["Datetime",'Date_dt', "Date"],axis=1)

world_cup["home_score"] = world_cup["home_score"].astype(int)

world_cup["away_score"] = world_cup["away_score"].astype(int)

can_hist_csv = pd.read_csv('African-Nations-results.csv')

can_hist_csv["Date_dt"] = pd.to_datetime(can_hist_csv["date"],infer_datetime_format=True)

can_hist_csv["Date"] = can_hist_csv["Date_dt"].dt.date

can_hist_csv = can_hist_csv.drop(["date","Date_dt","tournament","Date"],axis=1)

full = pd.concat([world_cup, can_hist_csv], ignore_index=True)

full["home_score"] = full["home_score"].astype(int)

full["away_score"] = full["away_score"].astype(int)

x = full.drop(["home_score","away_score"],axis=1)
y = full[["home_score","away_score"]]

categorical_features = ["home_team","away_team"]
one_hot = OneHotEncoder(handle_unknown="ignore")
preprocessor = ColumnTransformer([(
    "one_hot", 
    one_hot,
    categorical_features)],remainder="passthrough")


model = RandomForestRegressor(n_estimators=200,
    random_state=42,
    n_jobs=-1)

pipe = Pipeline(steps=[
    ("preprocess", preprocessor),
    ("model", model)
])

#Split
x_train, x_test, y_train,y_test = train_test_split(x, y, test_size=0.2)

#Fit
pipe.fit(x_train, y_train)

#Score
pipe.score(x_test,y_test)



0.09206357634144954

In [104]:
def predire_match(home, away):
    """
    ligne_raw : dict ou pd.Series avec au moins les colonnes de X.
    Retourne un pd.Series (home_score, away_score).
    """
    import pandas as pd
    
    ligne_raw = {
        "home_team": home,
        "away_team": away
    } 
    
    if isinstance(ligne_raw, dict):
        ligne_df = pd.DataFrame([ligne_raw])
    else:
        ligne_df = ligne_raw.to_frame().T

    # S’assure de l’ordre des colonnes
    ligne_df = ligne_df[x.columns]

    y_pred = pipe.predict(ligne_df)  # shape (1, 2)
    return pd.Series(y_pred[0], index=y.columns)

In [112]:
res_pred =predire_match("Egypt", "Zimbabwe")

res_pred["home_score"].round()

2.0

In [109]:
matches = pd.read_csv('matches.csv')

In [117]:
def appliquer_predictions(matches):
    # apply ligne par ligne, une seule prédiction par match
    preds = matches.apply(
        lambda row: predire_match(row["home_team"], row["away_team"]),
        axis=1
    )
    # preds est un DataFrame avec colonnes home_score, away_score (index = y.columns)
    matches[["home_score", "away_score"]] = preds.round().astype(int)
    return matches

matches = appliquer_predictions(matches)
print(matches)

            home_team          away_team  home_score  away_score
0             Morocco            Comoros           2           0
1                Mali             Zambia           1           1
2               Egypt           Zimbabwe           2           1
3        South Africa             Angola           1           1
4             Nigeria           Tanzania           1           0
5             Tunisia             Uganda           2           0
6             Senegal           Botswana           3           0
7            DR Congo              Benin           2           0
8             Algeria              Sudan           3           0
9        Burkina Faso  Equatorial Guinea           2           0
10        Ivory Coast         Mozambique           3           0
11           Cameroon              Gabon           1           1
12            Morocco               Mali           4           0
13             Zambia            Comoros           2           1
14             Angola    