In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
import matplotlib.pyplot as plt  
from sklearn.metrics import mean_squared_error
from sklearn.utils import shuffle
from sklearn.preprocessing import PolynomialFeatures
import seaborn as sns

In [None]:
# Load the data
X_train = pd.read_csv('X_train.csv')
test_df = pd.read_csv('X_test.csv')

# Step 1: Filter out post-collision data
collision_filter = (X_train['x_1'] == 0) & (X_train['y_1'] == 0) & (X_train['x_2'] == 0) & (X_train['y_2'] == 0) & (X_train['x_3'] == 0) & (X_train['y_3'] == 0)
X_train_filtered = X_train[~collision_filter]

# Step 2: Identify unique initial positions
initial_conditions = X_train_filtered[X_train_filtered['t'] == 0].copy()


###test###
# Filter initial conditions at t=0
initial_conditions_test = test_df[test_df['t'] == 0]
test_features_df = initial_conditions_test[['x0_1', 'y0_1', 'x0_2', 'y0_2', 'x0_3', 'y0_3']]

###end test###

# Create a unique identifier based on rounded initial positions
initial_conditions['trajectory_id'] = (
    initial_conditions['x_1'].round(6).astype(str) + '_' +
    initial_conditions['y_1'].round(6).astype(str) + '_' +
    initial_conditions['x_2'].round(6).astype(str) + '_' +
    initial_conditions['y_2'].round(6).astype(str) + '_' +
    initial_conditions['x_3'].round(6).astype(str) + '_' +
    initial_conditions['y_3'].round(6).astype(str)
).astype('category').cat.codes



# Map this identifier back to the main dataframe based on match at t=0
X_train_filtered = X_train_filtered.merge(initial_conditions[['trajectory_id', 't', 'x_1', 'y_1', 'x_2', 'y_2', 'x_3', 'y_3']],
                                          on=['t', 'x_1', 'y_1', 'x_2', 'y_2', 'x_3', 'y_3'],
                                          how='left')

# Fill missing trajectory_ids for the rest of the times using forward fill
X_train_filtered['trajectory_id'] = X_train_filtered['trajectory_id'].ffill()

# Organize data by trajectory ID without using lambda
trajectories = {}
for trajectory_id in X_train_filtered['trajectory_id'].unique():
    trajectory_data = X_train_filtered[X_train_filtered['trajectory_id'] == trajectory_id]
    trajectory_data_sorted = trajectory_data.sort_values(by='t')
    trajectories[trajectory_id] = trajectory_data_sorted[['x_1', 'y_1', 'x_2', 'y_2', 'x_3', 'y_3']].values.flatten()