In [25]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

data = pd.read_csv('jan_train.csv')

# change this as needed for the number of satellites used
data = data[data['sat_id'].isin([0, 1, 2, 3, 4, 5, 6])]

# Convert time to seconds since first day
data['epoch'] = pd.to_datetime(data['epoch'])
reference_time = pd.Timestamp('2014-01-01 00:00:00.000')
data['time'] = (data['epoch'] - reference_time).dt.total_seconds()
data.drop(columns=['epoch'], inplace=True)
data['time'] = data['time'].astype(int)

# Fill in NA cells with average
data.fillna(data.mean(), inplace=True)

# Remove outliers 
Q1 = data.quantile(0.25)
Q3 = data.quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

outlier_mask = ((data < lower_bound) | (data > upper_bound)).any(axis=1)
data = data[~outlier_mask]

# Normalize between -1 and 1
numeric_cols = data.select_dtypes(include=[np.number]).columns.difference(['time', 'id', 'sat_id'])
scaler = MinMaxScaler(feature_range=(-1, 1))
data.loc[:, numeric_cols] = scaler.fit_transform(data[numeric_cols])

data


Unnamed: 0,id,sat_id,x,y,z,Vx,Vy,Vz,x_sim,y_sim,z_sim,Vx_sim,Vy_sim,Vz_sim,time
0,0,0,-0.245790,0.486323,-0.976791,-0.208606,-0.675218,-0.913636,-0.143951,0.461467,-0.998713,-0.207422,-0.674203,-0.915322,0
3,3,0,-0.252363,-0.321483,-0.976352,0.090341,-0.591174,0.664548,-0.150823,-0.376853,-1.000000,0.091557,-0.591108,0.663871,8409
4,4,0,-0.197762,-0.519495,-0.658086,0.141318,-0.426047,0.926969,-0.096552,-0.582583,-0.682571,0.142670,-0.426248,0.927151,11212
8,8,0,0.088839,-0.761753,0.975226,0.159600,0.157358,0.997815,0.188801,-0.834976,0.949056,0.161335,0.157320,1.000000,22424
24,24,0,-0.245577,0.487125,-0.975713,-0.209069,-0.674710,-0.915479,-0.143928,0.461423,-0.998693,-0.207537,-0.674220,-0.915317,67272
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6741,16407,6,0.965547,-0.756238,-0.354472,-0.165574,-0.328468,-0.696389,0.999285,-0.599487,-0.122165,-0.109764,-0.412325,-0.772820,634500
6759,16425,6,-0.558754,0.968131,0.901622,0.542386,0.165666,0.535382,-1.000000,0.708710,0.090128,0.339629,0.336957,0.610476,749864
6763,16429,6,1.000000,-0.072229,0.673831,0.101203,-0.460254,-0.672128,0.792117,0.316279,0.999328,0.354242,-0.519664,-0.569203,775500
6765,16431,6,0.982194,-0.792493,-0.392705,-0.173038,-0.313549,-0.681622,0.999413,-0.597712,-0.120718,-0.109352,-0.412703,-0.773274,788318
