In [None]:
!pip install torch-geometric networkx torch

In [3]:
# prompt: load all csv files into different dataframes


import pandas as pd
import networkx as nx
import torch
from torch_geometric.data import Data
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv


In [6]:
batting_card_df = pd.read_csv("outputs/batting_card.csv")
batting_head_to_head_df = pd.read_csv("outputs/batting_head_to_head.csv")
bowling_card_df = pd.read_csv("outputs/bowling_card.csv")
bowling_head_to_head_df = pd.read_csv("outputs/bowling_head_to_head.csv")
extras_df = pd.read_csv("outputs/extras.csv")
fall_of_wickets_df = pd.read_csv("outputs/fall_of_wickets.csv")
manhattan_graph_df = pd.read_csv("outputs/manhattan_graph.csv")
manhattan_wickets_df = pd.read_csv("outputs/manhattan_wickets.csv")
over_history_df = pd.read_csv("outputs/over_history.csv")
partnership_scores_df = pd.read_csv("outputs/partnership_scores.csv")
wagon_wheel_df = pd.read_csv("outputs/wagon_wheel.csv")
wagon_wheel_summery_df = pd.read_csv("outputs/wagon_wheel_summery.csv")

In [7]:
bowling_head_to_head_df = bowling_head_to_head_df.rename(columns= {'StrikerID':'BatsManID'})
merged_df = pd.merge(batting_head_to_head_df, bowling_head_to_head_df, on=['BowlerID', 'BatsManID'])

In [8]:
all_players = set(merged_df['BowlerName_y'].unique()) | set(merged_df['BatsManName_y'].unique())

# Create a mapping of player names to numeric identifiers
player_mapping = {name: idx for idx, name in enumerate(all_players)}

In [9]:
# Construct graph
G = nx.Graph()
for _, row in merged_df.iterrows():
    bowler_idx = player_mapping[row['BowlerName_y']]
    batsman_idx = player_mapping[row['BatsManName_y']]
    G.add_edge(bowler_idx, batsman_idx, runs=row['TotalRunsConceded'], balls=row['TotalLegalBallsBowled'])

print(G)

Graph with 175 nodes and 1545 edges


In [10]:
# Convert graph to PyTorch tensors
num_nodes = len(player_mapping)
edge_index = torch.tensor(list(G.edges)).t().contiguous()

In [11]:
# Create feature matrix (identity matrix) and target variable
x = torch.eye(num_nodes)  # Identity matrix as feature matrix
y = torch.tensor(merged_df['TotalRunsConceded'].values, dtype=torch.float).unsqueeze(1)  # Ensure y has correct dimensions



In [12]:
print(x.size(0))

175


In [13]:
print(y.size(0))

1559


In [14]:
# Ensure the dimensions of x and y are compatible
assert num_nodes == y.size(0), "Number of nodes and number of data points in y must match"

AssertionError: Number of nodes and number of data points in y must match

In [None]:
# Create PyTorch Geometric data object
data = Data(x=x, edge_index=edge_index, y=y)

In [None]:
class GNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GNNModel, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, output_dim)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        return x

In [None]:
# Define model, optimizer, and loss function
input_dim = num_nodes  # Dimensionality of input features
hidden_dim = 64
output_dim = 1  # Output dimension (for regression task)
model = GNNModel(input_dim, hidden_dim, output_dim)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.MSELoss()

In [None]:
# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = criterion(out, data.y)
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item()}")

RuntimeError: The size of tensor a (175) must match the size of tensor b (1559) at non-singleton dimension 0