In [1]:
import pandas as pd
import numpy as np 
from sklearn.linear_model import LogisticRegression
argentina= pd.read_csv('argentina.csv')
france = pd.read_csv('france.csv')

In [4]:
argentina['K'] = (argentina['TARGET'].replace({-1: 0}) * 3 + argentina['TARGET'].replace({-1: 0, 0: 1, 1: 0})).cumsum() / range(1, len(argentina) + 1) + 1
france['K'] = (france['TARGET'].replace({-1: 0}) * 3 + france['TARGET'].replace({-1: 0, 0: 1, 1: 0})).cumsum() / range(1, len(france) + 1) + 1

argentina_initial_ratings = {'ATTD': 86, 'MIDD': 82, 'DEFD': 81}
france_initial_ratings = {'ATTD': 86, 'MIDD': 82, 'DEFD': 82}

In [5]:
for department in ['ATTD', 'MIDD', 'DEFD']:
    argentina_initial_ratings[department] *= argentina['K'].iloc[-1]
    france_initial_ratings[department] *= france['K'].iloc[-1]

In [6]:
argentina['ATTD_DIFF'] = argentina_initial_ratings['ATTD'] - france_initial_ratings['ATTD']
argentina['MIDD_DIFF'] = argentina_initial_ratings['MIDD'] - france_initial_ratings['MIDD']
argentina['DEFD_DIFF'] = argentina_initial_ratings['DEFD'] - france_initial_ratings['DEFD']

france['ATTD_DIFF'] = france_initial_ratings['ATTD'] - argentina_initial_ratings['ATTD']
france['MIDD_DIFF'] = france_initial_ratings['MIDD'] - argentina_initial_ratings['MIDD']
france['DEFD_DIFF'] = france_initial_ratings['DEFD'] - argentina_initial_ratings['DEFD']

In [9]:
model_argentina = LogisticRegression()
model_france = LogisticRegression()
model_argentina.fit(argentina[['ATTD_DIFF', 'MIDD_DIFF', 'DEFD_DIFF']], argentina['TARGET'])
model_france.fit(france[['ATTD_DIFF', 'MIDD_DIFF', 'DEFD_DIFF']], france['TARGET'])


In [10]:
team_a_attack_diff = argentina['ATTD_DIFF'].iloc[-1]
team_a_midfield_diff = argentina['MIDD_DIFF'].iloc[-1]
team_a_defense_diff = argentina['DEFD_DIFF'].iloc[-1]

team_b_attack_diff = -argentina['ATTD_DIFF'].iloc[-1]
team_b_midfield_diff = -argentina['MIDD_DIFF'].iloc[-1]
team_b_defense_diff = -argentina['DEFD_DIFF'].iloc[-1]

In [11]:
team_argentina_prediction = model_argentina.predict([[team_a_attack_diff, team_a_midfield_diff, team_a_defense_diff]])
team_france_prediction = model_france.predict([[team_b_attack_diff, team_b_midfield_diff, team_b_defense_diff]])

if team_argentina_prediction > team_france_prediction:
    print("Argentina is expected to win.")
elif team_argentina_prediction < team_france_prediction:
    print("France is expected to win.")
else:
    print("The match is predicted to end in a draw.")

The match is predicted to end in a draw.




In [12]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

def predict_penalty_winner(dataset_file):

    data = pd.read_csv(dataset_file)

    X = data[['ATTD', 'MIDD', 'DEFD']]  
    y_penalty_winner = data['TARGET'] 

    X_train, X_test, y_penalty_winner_train, y_penalty_winner_test = train_test_split(
        X, y_penalty_winner, test_size=0.2, random_state=42)

    penalty_winner_model = LogisticRegression()
    penalty_winner_model.fit(X_train, y_penalty_winner_train)

    penalty_winner_predictions = penalty_winner_model.predict(X_test)

    accuracy = accuracy_score(y_penalty_winner_test, penalty_winner_predictions)
    print(f"Penalty Winner Model Accuracy: {accuracy * 100:.2f}%")

    for i in range(len(X_test)):
        if penalty_winner_predictions[i] == 1:
            winner = "Team A"
        else:
            winner = "Team B"
        print(f"Predicted Penalty Winner: {winner}")

print("Predictions for Argentina:")
predict_penalty_winner('argentina.csv')

print("\nPredictions for France:")
predict_penalty_winner('france.csv')


Predictions for Argentina:
Penalty Winner Model Accuracy: 60.00%
Predicted Penalty Winner: Team B
Predicted Penalty Winner: Team A
Predicted Penalty Winner: Team A
Predicted Penalty Winner: Team A
Predicted Penalty Winner: Team A

Predictions for France:
Penalty Winner Model Accuracy: 40.00%
Predicted Penalty Winner: Team A
Predicted Penalty Winner: Team A
Predicted Penalty Winner: Team A
Predicted Penalty Winner: Team A
Predicted Penalty Winner: Team A
