In [2]:
"""
***********************
-----------------------
         SETUP
-----------------------
***********************
"""

'\n***********************\n-----------------------\n         SETUP\n-----------------------\n***********************\n'

In [1]:
# imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import csv
import pathlib
import os
from pathlib import Path
from sklearn import linear_model
from sklearn.impute import SimpleImputer
from sklearn import metrics
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error

In [3]:
# get the path of data
train_data_path = '/Users/rakeshmehta/Desktop/AI-Collision-Algo/Data/train_data.csv'
test_data_path = '/Users/rakeshmehta/Desktop/AI-Collision-Algo/Data/test_data.csv'
print(train_data_path)
print(test_data_path)

# put the training and testing data into a DataFrame
train_data = pd.read_csv(train_data_path)
test_data = pd.read_csv(test_data_path)

# clean up data by removing column of UNKNOWN from the training and testing DataFrames
train_data = train_data.drop(columns = ['c_object_type'])
test_data = test_data.drop(columns = ['c_object_type'])

print(train_data.head(10))
print(test_data.head(10))

/Users/rakeshmehta/Desktop/AI-Collision-Algo/Data/train_data.csv
/Users/rakeshmehta/Desktop/AI-Collision-Algo/Data/test_data.csv
   event_id  time_to_tca  mission_id       risk  max_risk_estimate  \
0         0     1.566798           5 -10.204955          -7.834756   
1         0     1.207494           5 -10.355758          -7.848937   
2         0     0.952193           5 -10.345631          -7.847406   
3         0     0.579669           5 -10.337809          -7.845880   
4         0     0.257806           5 -10.391260          -7.852942   
5         1     6.530455           5  -7.561299          -7.254301   
6         1     5.561646           5  -9.315693          -7.468904   
7         1     5.226504           5  -7.422508          -7.051001   
8         1     3.570013           5  -9.248105          -7.327533   
9         2     6.983474           2 -10.816161          -6.601713   

   max_risk_scaling  miss_distance  relative_speed  relative_position_r  \
0          8.602101      

In [31]:
# set up the Features and Lables
# training
X_train = train_data.drop(columns=['max_risk_estimate', 'risk'])
y_train = train_data['max_risk_scaling'].values
# testing
X_test = test_data.drop(columns=['max_risk_estimate', 'risk'])
y_test = test_data['max_risk_scaling'].values

# verify the shape of the matrices of data
print("--------------------------------------------")
print("                 Features")
print("--------------------------------------------")
print("Training Features Shape: ", X_train.shape)
print("Testing Features Shape: ", X_test.shape)
print("--------------------------------------------")
print("                  Labels")
print("--------------------------------------------")
print("Training Labels Shape: ", y_train.shape)
print("Testing Labels Shape: ", y_test.shape)

--------------------------------------------
                 Features
--------------------------------------------
Training Features Shape:  (162634, 100)
Testing Features Shape:  (24484, 100)
--------------------------------------------
                  Labels
--------------------------------------------
Training Labels Shape:  (162634,)
Testing Labels Shape:  (24484,)


In [32]:
# create an imputer object with strategy (e.g. mean, median, mode) to get rid of NaN values
imputer = SimpleImputer(strategy='mean')

# Apply imputation to the Data
X_train = imputer.fit_transform(X_train)
X_test = imputer.fit_transform(X_test)

# Print out shape again to verify X and y are same size
print(np.shape(X_train))
print(np.shape(y_train))
print(np.shape(X_test))
print(np.shape(y_test))

(162634, 100)
(162634,)
(24484, 100)
(24484,)


In [33]:
"""
***********************
-----------------------
      REGRESSIONS
-----------------------
***********************
"""

'\n***********************\n-----------------------\n      REGRESSIONS\n-----------------------\n***********************\n'

In [46]:
# creating and training the linear regression model
l_reg = linear_model.LinearRegression()
l_reg.fit(X_train, y_train)

In [47]:
# creating and training the bayesian ridge regression model
b_ridge = linear_model.BayesianRidge()
b_ridge.fit(X_train, y_train)

In [45]:
print("--------------------------------------------")
print("             LINEAR REGRESSION")
print("--------------------------------------------")

# Predict probabilities on the test set
l_reg_predictions = l_reg.predict(X_test)

# Evaluate the model using mean squared error (MSE)
l_reg_mse = mean_squared_error(y_test, l_reg_predictions)
print("Mean Squared Error:", l_reg_mse)

# Evaluate the model using r^2 value
print("r^2: ", l_reg.score(X_test, y_test))

# print out predictions and actual values to compare visually
print(y_test)
print(l_reg_predictions)

print("--------------------------------------------")
print("               BAYESIAN RIDGE")
print("--------------------------------------------")

# Predict probabilities on the test set
b_ridge_predictions = b_ridge.predict(X_test)

# Evaluate the model using mean squared error (MSE)
b_ridge_mse = mean_squared_error(y_test, b_ridge_predictions)
print("Mean Squared Error:", b_ridge_mse)

# Evaluate the model using r^2 value
print("r^2: ", b_ridge.score(X_test, y_test))

# print out predictions and actual values to compare visually
print(y_test)
print(b_ridge_predictions)

--------------------------------------------
             LINEAR REGRESSION
--------------------------------------------
Mean Squared Error: 11835327741.542776
r^2:  -0.2474548337668918
[ 1.78789354  1.75938621  1.82426279 ... 48.73193566 48.9873245
 48.05674977]
[55487.10545406 55487.10545406 55487.10545406 ... 55487.10545406
 55487.10545406 55487.10545406]
--------------------------------------------
               BAYESIAN RIDGE
--------------------------------------------
Mean Squared Error: 67954420003.84059
r^2:  -6.1624606906379125
[ 1.78789354  1.75938621  1.82426279 ... 48.73193566 48.9873245
 48.05674977]
[99838.15504961 99838.15504961 99838.15504961 ... 99838.15504961
 99838.15504961 99838.15504961]
--------------------------------------------
                   LASSO
--------------------------------------------
Mean Squared Error: 0.00233993185037422
r^2:  0.9999999999997534
[ 1.78789354  1.75938621  1.82426279 ... 48.73193566 48.9873245
 48.05674977]
[ 1.8605275   1.831588