In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report
from datetime import datetime

In [None]:
# Load dataset (replace 'data.csv' with the actual filename)
data = pd.read_csv('loaded/trajectories.csv')

In [None]:
# Remove null values
data = data.dropna()

# Preprocessing
# Parse datetime into features
data['time'] = pd.to_datetime(data['time'])
data['hour'] = data['time'].dt.hour
data['minute'] = data['time'].dt.minute
data['second'] = data['time'].dt.second
data.drop(['time'], axis=1, inplace=True)

# Encode categorical labels
label_encoder = LabelEncoder()
data['label'] = label_encoder.fit_transform(data['label'])

# Split features and target
X = data.drop(['label'], axis=1)
y = data['label']

In [None]:
# Split into train/test sets with shuffling
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

# Scaling: Fit scaler only on training data, then transform both train and test data
scaler = StandardScaler()
X_train[['lat', 'lon', 'alt']] = scaler.fit_transform(X_train[['lat', 'lon', 'alt']])
X_test[['lat', 'lon', 'alt']] = scaler.transform(X_test[['lat', 'lon', 'alt']])

In [2]:
# Initialize models
models = {
    'Random Forest': RandomForestClassifier(random_state=42),
    'K-Nearest Neighbors': KNeighborsClassifier()
}

# Train and evaluate each model
for model_name, model in models.items():
    print(f"\nTraining {model_name} model...")

    # Train the model
    model.fit(X_train, y_train)

    # Predictions
    y_pred = model.predict(X_test)

    # Evaluation
    print(f"{model_name} Classification Report:")
    print(classification_report(y_test, y_pred))



Training Random Forest model...
Random Forest Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.99      1.00     18057
           1       0.99      0.99      0.99      4727
           2       0.99      1.00      0.99      2201
           3       1.00      1.00      1.00      3869
           4       0.99      0.99      0.99       369
           5       0.99      1.00      1.00       997
           6       1.00      1.00      1.00         3
           7       1.00      1.00      1.00        62
           8       0.95      0.99      0.97       166

    accuracy                           0.99     30451
   macro avg       0.99      1.00      0.99     30451
weighted avg       0.99      0.99      0.99     30451


Training K-Nearest Neighbors model...
K-Nearest Neighbors Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.92      0.89     18057
           1       0.60      0.53 

In [None]:

# Load dataset (replace 'data.csv' with the actual filename)
data = pd.read_csv('trajectories.csv')

# Remove null values
data = data.dropna()

# Preprocessing
# Parse datetime into features
data['time'] = pd.to_datetime(data['time'])
data['hour'] = data['time'].dt.hour
data['minute'] = data['time'].dt.minute
data['second'] = data['time'].dt.second
data.drop(['time'], axis=1, inplace=True)

# Encode categorical labels
label_encoder = LabelEncoder()
data['label'] = label_encoder.fit_transform(data['label'])

# Split features and target
X = data.drop(['label'], axis=1)
y = data['label']

# Split into train/test sets with shuffling
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=True)

# Scaling: Fit scaler only on training data, then transform both train and test data
scaler = StandardScaler()
X_train[['lat', 'lon', 'alt']] = scaler.fit_transform(X_train[['lat', 'lon', 'alt']])
X_test[['lat', 'lon', 'alt']] = scaler.transform(X_test[['lat', 'lon', 'alt']])

# Initialize models
models = {
    'Random Forest': RandomForestClassifier(random_state=42),
    'K-Nearest Neighbors': KNeighborsClassifier()
}

# Train and evaluate each model
for model_name, model in models.items():
    print(f"\nTraining {model_name} model...")

    # Train the model
    model.fit(X_train, y_train)

    # Predictions
    y_pred = model.predict(X_test)

    # Evaluation
    print(f"{model_name} Classification Report:")
    print(classification_report(y_test, y_pred))
