# Step 1 - Future Men's Matches Dataframe Creation

In [None]:
import pandas as pd

# Data for the men's matches with match type
data = [
    {"Series": "India tour of Australia 2024-25", "Season": "Nov 2024 - Jan 2025", "Venue": "Australia", "Type": "Test"},
    {"Series": "Pakistan Tour of South Africa, 2024-2025", "Season": "Dec 2024 - Jan 2025", "Venue": "South Africa", "Type": "ODI"},
    {"Series": "England Tour of India 2025", "Season": "Jan - Feb 2025", "Venue": "India", "Type": "T20"},
    {"Series": "India in Zimbabwe T20I Series, 2024", "Season": "Jul 2024", "Venue": "Zimbabwe", "Type": "T20"},
    {"Series": "ICC Men's T20 World Cup Sub Regional Europe Qualifier Group B, 2024", "Season": "Jul 2024", "Venue": "Europe", "Type": "T20"},
    {"Series": "England in West Indies T20I Series, 2024/25", "Season": "Nov 2024", "Venue": "West Indies", "Type": "T20"},
    {"Series": "Pakistan in Australia ODI Series, 2024/25", "Season": "Nov 2024", "Venue": "Australia", "Type": "ODI"},
    {"Series": "Border-Gavaskar Trophy, 2024/25", "Season": "Dec 2024", "Venue": "Australia", "Type": "Test"},
]

# Convert to DataFrame
df = pd.DataFrame(data)

# Save to CSV
file_path = 'future_mens_matches.csv'
df.to_csv(file_path, index=False)

# Display DataFrame
print(df)

# Download the file
from google.colab import files
files.download(file_path)


                                              Series               Season  \
0                    India tour of Australia 2024-25  Nov 2024 - Jan 2025   
1           Pakistan Tour of South Africa, 2024-2025  Dec 2024 - Jan 2025   
2                         England Tour of India 2025       Jan - Feb 2025   
3                India in Zimbabwe T20I Series, 2024             Jul 2024   
4  ICC Men's T20 World Cup Sub Regional Europe Qu...             Jul 2024   
5        England in West Indies T20I Series, 2024/25             Nov 2024   
6          Pakistan in Australia ODI Series, 2024/25             Nov 2024   
7                    Border-Gavaskar Trophy, 2024/25             Dec 2024   

          Venue  Type  
0     Australia  Test  
1  South Africa   ODI  
2         India   T20  
3      Zimbabwe   T20  
4        Europe   T20  
5   West Indies   T20  
6     Australia   ODI  
7     Australia  Test  


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Step 2 - Load the Datasets

In [None]:
import pandas as pd
future_series = pd.read_csv("/content/future_mens_matches.csv")
odi = pd.read_csv("/content/odt.csv")
test = pd.read_csv("/content/tt.csv")
t20 = pd.read_csv("/content/twt.csv")

# Step 3 - Cleaning Historical Data

In [None]:
# Function for cleaning and extracting numeric values from the margin columns in your dataset
def clean_margin(margin_series):
  return margin_series.str.extract('(\d+)', expand = False).astype(float)
# Cleaning the margin columns in each dataset
odi['Margin'] = clean_margin(odi['Margin'])
test['Margin'] = clean_margin(test['Margin'])
t20['Margin'] = clean_margin(t20['Margin'])

# Step 4 - Calculating win ratios for each match type

In [None]:
# Cleans and calculates win ratios for each match type separately
def clean_and_calculate_win_ratio(data, winner_col):
    data_cleaned = data.dropna(subset=[winner_col])
    data_cleaned = data_cleaned[~data_cleaned[winner_col].isin(['drawn', 'abandoned', 'none', 'shared'])]
    data_cleaned['Winner'] = data_cleaned[winner_col].apply(lambda x: x.split()[0] if isinstance(x, str) else None)

    win_counts = data_cleaned['Winner'].value_counts()
    total_matches = data_cleaned['Winner'].value_counts().sum()
    win_loss_ratio = win_counts / total_matches

    return win_loss_ratio

# Calculates win ratios for each match type separately
odi_win_ratios_cleaned = clean_and_calculate_win_ratio(odi, 'Winner')
test_win_ratios_cleaned = clean_and_calculate_win_ratio(test, 'Winner')
t20_win_ratios_cleaned = clean_and_calculate_win_ratio(t20, 'Winner')

# Step 5 - Preparing historical data with win ratios and margins

In [None]:
# Preparing historical datasets with win ratios and margins for each match type
def prepare_historical_dataset_by_type(data, win_ratios, margin_col, match_type):
    # Drop rows where the margin column is NaN
    data = data.dropna(subset=[margin_col]).copy()

    # Add a new column 'win_ratio' by looking up the win ratios from the provided dictionary
    data.loc[:, 'win_ratio'] = data.apply(lambda row: win_ratios.get(row['Winner'], 0) if pd.notnull(row['Winner']) else 0, axis=1)

    # Select only the 'win_ratio' and 'margin_col' columns and rename the margin column to 'Margin'
    data = data[['win_ratio', margin_col]].rename(columns={margin_col: 'Margin'})

    return data

# Preparing historical datasets with separate win ratios and margins
odi_historical = prepare_historical_dataset_by_type(odi, odi_win_ratios_cleaned, 'Margin', 'ODI')
test_historical = prepare_historical_dataset_by_type(test, test_win_ratios_cleaned, 'Margin', 'Test')
t20_historical = prepare_historical_dataset_by_type(t20, t20_win_ratios_cleaned, 'Margin', 'T20')

# Step 6 - Training separate regression models for each match type

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Function to train and evaluate logistic regression model
def train_and_evaluate_model(historical_data):
    X = historical_data[['win_ratio']]
    y = (historical_data['Margin'] > 0).astype(int) # 1 if win, 0 if loss
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = LogisticRegression()
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    return model, accuracy
# Function to balance the data
def balance_data(historical_data):
    win_data = historical_data[historical_data['Margin'] > 0]
    loss_data = historical_data[historical_data['Margin'] <= 0]

    # Sample the minority class to balance the dataset
    if len(win_data) > len(loss_data):
        win_data = win_data.sample(len(loss_data), random_state=42)
    else:
        loss_data = loss_data.sample(len(win_data), random_state=42)

    balanced_data = pd.concat([win_data, loss_data])
    return balanced_data

# Balance the historical datasets
odi_historical = balance_data(odi_historical)
test_historical = balance_data(test_historical)
t20_historical = balance_data(t20_historical)

# Re-train the models with balanced data
odi_model, odi_accuracy = train_and_evaluate_model(odi_historical)
test_model, test_accuracy = train_and_evaluate_model(test_historical)
t20_model, t20_accuracy = train_and_evaluate_model(t20_historical)

print("Balanced ODI Model Accuracy:", odi_accuracy)
print("Balanced Test Model Accuracy:", test_accuracy)
print("Balanced T20 Model Accuracy:", t20_accuracy)

Balanced ODI Model Accuracy: 0.3333333333333333
Balanced Test Model Accuracy: 0.7142857142857143
Balanced T20 Model Accuracy: 0.25


# Step 7 - Predicting Future Series Outcomes

In [None]:
import numpy as np

# Function to predict the outcome of a series
def predict_series_outcome(series_name, match_type, model, win_ratios):
    series_matches = future_series[future_series['Series'] == series_name].copy()
    series_matches['win_ratio'] = series_matches.apply(lambda row: win_ratios.get(row['Venue'], 0), axis=1)
    series_matches['win_probability'] = model.predict_proba(series_matches[['win_ratio']])[:, 1]

    print(f"Series: {series_name}, Match Type: {match_type}")
    print("Win Ratios:", series_matches['win_ratio'].values)
    print("Win Probabilities:", series_matches['win_probability'].values)

    # Aggregate to predict series outcome
    series_matches['predicted_win'] = series_matches['win_probability'] > 0.5
    series_outcome = series_matches['predicted_win'].sum()  # Sum of predicted wins

    print("Predicted Wins:", series_matches['predicted_win'].values)
    print("Series Outcome:", series_outcome)

    return series_outcome

# Apply the prediction function to future matches
future_series['predicted_outcome'] = future_series.apply(
    lambda row: predict_series_outcome(row['Series'], row['Type'],
                                       odi_model if row['Type'] == 'ODI' else test_model if row['Type'] == 'Test' else t20_model,
                                       odi_win_ratios_cleaned if row['Type'] == 'ODI' else test_win_ratios_cleaned if row['Type'] == 'Test' else t20_win_ratios_cleaned),
    axis=1)

# Display the predictions
predicted_margins = future_series[['Series', 'Season', 'Venue', 'Type', 'predicted_outcome']]
predicted_margins

Series: India tour of Australia 2024-25, Match Type: Test
Win Ratios: [0.19692308]
Win Probabilities: [0.55885923]
Predicted Wins: [ True]
Series Outcome: 1
Series: Pakistan Tour of South Africa, 2024-2025, Match Type: ODI
Win Ratios: [0]
Win Probabilities: [0.45312382]
Predicted Wins: [False]
Series Outcome: 0
Series: England Tour of India 2025, Match Type: T20
Win Ratios: [0.09090909]
Win Probabilities: [0.41850892]
Predicted Wins: [False]
Series Outcome: 0
Series: India in Zimbabwe T20I Series, 2024, Match Type: T20
Win Ratios: [0.00909091]
Win Probabilities: [0.41654498]
Predicted Wins: [False]
Series Outcome: 0
Series: ICC Men's T20 World Cup Sub Regional Europe Qualifier Group B, 2024, Match Type: T20
Win Ratios: [0]
Win Probabilities: [0.41632692]
Predicted Wins: [False]
Series Outcome: 0
Series: England in West Indies T20I Series, 2024/25, Match Type: T20
Win Ratios: [0]
Win Probabilities: [0.41632692]
Predicted Wins: [False]
Series Outcome: 0
Series: Pakistan in Australia ODI 

Unnamed: 0,Series,Season,Venue,Type,predicted_outcome
0,India tour of Australia 2024-25,Nov 2024 - Jan 2025,Australia,Test,1
1,"Pakistan Tour of South Africa, 2024-2025",Dec 2024 - Jan 2025,South Africa,ODI,0
2,England Tour of India 2025,Jan - Feb 2025,India,T20,0
3,"India in Zimbabwe T20I Series, 2024",Jul 2024,Zimbabwe,T20,0
4,ICC Men's T20 World Cup Sub Regional Europe Qu...,Jul 2024,Europe,T20,0
5,"England in West Indies T20I Series, 2024/25",Nov 2024,West Indies,T20,0
6,"Pakistan in Australia ODI Series, 2024/25",Nov 2024,Australia,ODI,0
7,"Border-Gavaskar Trophy, 2024/25",Dec 2024,Australia,Test,1
