In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam

ModuleNotFoundError: No module named 'tensorflow'

In [2]:
!pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.19.0-cp310-cp310-win_amd64.whl.metadata (4.1 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Downloading absl_py-2.3.0-py3-none-any.whl.metadata (2.4 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-py2.py3-none-win_amd64.whl.metadata (5.3 kB)
Collecting opt-einsum>=2.3.2 (from tensorflow)
  Downloading opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)
Collecting protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<6.0.0dev,>=3.20.3 (from tensorflow)
  Downloading protobuf-5.29.5-cp310-abi3-win_amd64.whl.metadata (592 bytes)
Collecting te

In [None]:
from google.colab import files
uploaded = files.upload()
filename = list(uploaded.keys())[0]
df = pd.read_csv(filename)

# Display the first few rows
print(df.head())


In [None]:
# Load the new CSV file with interpolated data
df = pd.read_csv('train_interpolated_combined.csv')

# Melt to long format for easy manipulation
df_long = df.melt(id_vars=["frame_index"], var_name="entity", value_name="position")

# Split 'position' into 'x' and 'y'
df_long[['x', 'y']] = df_long['position'].str.split(',', expand=True).astype(float)

# Helpers to extract player ID and team
def extract_id(entity):
    parts = entity.split('_')
    if entity == 'ball' or len(parts) < 3:
        return None
    return parts[1]

def extract_team(entity):
    parts = entity.split('_')
    if entity == 'ball' or len(parts) < 3:
        return None
    return parts[-1]

# Add classification columns
df_long["type"] = df_long["entity"].apply(lambda x: 'ball' if x == 'ball' else 'player')
df_long["id"] = df_long["entity"].apply(extract_id)
df_long["team"] = df_long["entity"].apply(extract_team)

# Normalize positions
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()
df_long['x_norm'] = scaler_x.fit_transform(df_long[['x']])
df_long['y_norm'] = scaler_y.fit_transform(df_long[['y']])

# Pivot to wide format for model input
pivot_x = df_long.pivot(index='frame_index', columns='entity', values='x_norm')
pivot_y = df_long.pivot(index='frame_index', columns='entity', values='y_norm')

# Sort columns for consistency
pivot_x = pivot_x.sort_index(axis=1)
pivot_y = pivot_y.sort_index(axis=1)

# Track sorted entity names
entities = pivot_x.columns
sorted_entities = list(entities)

# Interleave x and y columns for LSTM input
interleaved_data = np.empty((pivot_x.shape[0], pivot_x.shape[1] * 2))
for idx, ent in enumerate(sorted_entities):
    interleaved_data[:, idx * 2] = pivot_x[ent].values
    interleaved_data[:, idx * 2 + 1] = pivot_y[ent].values

In [None]:

sequence_length = 30

# Generate full sequences
X, y = [], []
for i in range(len(interleaved_data) - sequence_length):
    X.append(interleaved_data[i:i+sequence_length])
    y.append(interleaved_data[i+sequence_length])

X = np.array(X)
y = np.array(y)

# 80/20 split
split_index = int(len(X) * 0.8)
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

In [None]:
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.losses import Huber

# Modify the model architecture (e.g., add more units, layers, and regularization)

model = Sequential([
    Bidirectional(LSTM(512, return_sequences=True)),
    Dropout(0.2),
    LSTM(256),
    Dropout(0.3),
    Dense(X_train.shape[2]),
    ReLU()
])

model.compile(optimizer=Adam(0.0005), loss=Huber(), metrics=['mae'])

# Compile the model with a lower learning rate
#model.compile(optimizer=Adam(0.0003), loss='mse', metrics=['mae'])

# Define early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Add a learning rate reduction callback
lr_reduction = ReduceLROnPlateau(monitor='val_loss', patience=5, verbose=1, factor=0.5)

# Train the model with early stopping and learning rate reduction
history = model.fit(X_train, y_train, epochs=200, batch_size=32, verbose=1,
                    validation_data=(X_test, y_test),
                    callbacks=[early_stopping, lr_reduction])



In [None]:
# Define train/test split and sequence length
sequence_length = 30
train_frames = int(0.8 * len(interleaved_data))
predict_frames = len(interleaved_data) - train_frames

# Start from last 10 frames of training
input_seq = interleaved_data[train_frames - sequence_length:train_frames].copy()
predicted_frames = []

# Predict all future frames
for i in range(predict_frames):
    input_reshaped = input_seq[-sequence_length:].reshape(1, sequence_length, -1)
    next_frame = model.predict(input_reshaped, verbose=0)[0]
    predicted_frames.append(next_frame)
    input_seq = np.vstack([input_seq, next_frame])

# Reverse normalization
predicted_frames = np.array(predicted_frames)
reversed_predictions = []

for frame in predicted_frames:
    unnormalized_frame = []
    for i in range(0, len(frame), 2):
        x = scaler_x.inverse_transform(frame[i].reshape(-1, 1))[0][0]
        y = scaler_y.inverse_transform(frame[i+1].reshape(-1, 1))[0][0]
        unnormalized_frame.extend([x, y])
    reversed_predictions.append(unnormalized_frame)

# Create DataFrame
output_columns = []
for ent in sorted(entities):
    output_columns.append(f"{ent}_x")
    output_columns.append(f"{ent}_y")

pred_df = pd.DataFrame(reversed_predictions, columns=output_columns)

# Dynamic frame range
pred_df.insert(0, "frame", range(train_frames, train_frames + len(pred_df)))
pred_df.to_csv(f"predicted_frames_{train_frames}_to_{train_frames + len(pred_df) - 1}.csv", index=False)
print(f"✅ Saved: predicted_frames_{train_frames}_to_{train_frames + len(pred_df) - 1}.csv")


In [None]:
# Reverse normalization for ground truth (actual values)
actual_gt = interleaved_data[train_frames:]
actual_gt_unnorm = []

for frame in actual_gt:
    unnormalized_frame = []
    for i in range(0, len(frame), 2):
        x = scaler_x.inverse_transform(frame[i].reshape(-1, 1))[0][0]
        y = scaler_y.inverse_transform(frame[i + 1].reshape(-1, 1))[0][0]
        unnormalized_frame.extend([x, y])
    actual_gt_unnorm.append(unnormalized_frame)

actual_gt_unnorm = np.array(actual_gt_unnorm)


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

predictions = model.predict(X_test)

# Unnormalize
predictions_un = scaler_x.inverse_transform(predictions[:, ::2]), scaler_y.inverse_transform(predictions[:, 1::2])
y_test_un = scaler_x.inverse_transform(y_test[:, ::2]), scaler_y.inverse_transform(y_test[:, 1::2])

# Flatten to calculate MAE, RMSE, etc.
preds_flat = np.column_stack(predictions_un).flatten()
actuals_flat = np.column_stack(y_test_un).flatten()

mae = mean_absolute_error(actuals_flat, preds_flat)
mse = mean_squared_error(actuals_flat, preds_flat)
rmse = np.sqrt(mse)
r2 = r2_score(actuals_flat, preds_flat)

print(f"\n📊 Evaluation Metrics:")
print(f"MAE:  {mae:.4f}")
print(f"MSE:  {mse:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R²:   {r2:.4f}")


In [None]:
import matplotlib.pyplot as plt

num_entities = predicted_frames.shape[1] // 2
sample_frames = 5

for i in range(sample_frames):
    plt.figure(figsize=(10, 7))
    for j in range(num_entities):
        pred_x = reversed_predictions[i][j * 2]
        pred_y = reversed_predictions[i][j * 2 + 1]
        act_x = actual_gt_unnorm[i][j * 2]
        act_y = actual_gt_unnorm[i][j * 2 + 1]

        plt.scatter(act_x, act_y, color='blue', label='Actual' if j == 0 else "")
        plt.scatter(pred_x, pred_y, color='red', marker='x', label='Predicted' if j == 0 else "")
        plt.plot([act_x, pred_x], [act_y, pred_y], color='gray', linestyle='dotted')

    plt.title(f"Frame {train_frames + i} - Actual vs Predicted")
    plt.xlabel("X")
    plt.ylabel("Y")
    plt.grid(True)
    plt.legend()
    plt.show()


In [None]:
# Example plot for a specific entity
entity_index = 7  # Change this to inspect others
print(f"🔍 Entity Index {entity_index} corresponds to: {sorted_entities[entity_index]}")

# You must have variables `reversed_predictions` and `actual_gt_unnorm` defined before this.
pred_xs = [frame[entity_index * 2] for frame in reversed_predictions]
pred_ys = [frame[entity_index * 2 + 1] for frame in reversed_predictions]
true_xs = [frame[entity_index * 2] for frame in actual_gt_unnorm]
true_ys = [frame[entity_index * 2 + 1] for frame in actual_gt_unnorm]

plt.figure(figsize=(12, 6))
plt.plot(true_xs, true_ys, label='Actual Trajectory', color='blue')
plt.plot(pred_xs, pred_ys, label='LSTM Prediction', color='red', linestyle='--')
plt.title(f"Full Trajectory - {sorted_entities[entity_index]}")
plt.xlabel("X")
plt.ylabel("Y")
plt.legend()
plt.grid(True)
plt.show()

