In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from warnings import filterwarnings
filterwarnings("ignore")

In [2]:
tracks_dataset = pd.read_csv("Group_18_data_cleaned.csv") #read the excel file 
tracks_dataset.head()

Unnamed: 0,oid,timestamp,x,y,body_roll,body_pitch,body_yaw,head_roll,head_pitch,head_yaw,other_oid,other_class,other_x,other_y
0,50187,1842.4,495854.6403,5405751.0,0.3,-0.71,190.56,-0.25,-1.09,186.37,"[47646, 50181, 50184, 50187]","[0, 4, 4, 4]","[495923.373133135, 495899.069769386, 495899.05...","[5405744.32136751, 5405738.47595118, 5405739.1..."
1,50187,1842.5,495854.7921,5405751.0,0.3,-0.71,190.56,-0.25,-1.09,186.37,"[50181, 50187, 50184, 47646]","[4, 4, 4, 0]","[495899.234566716, 495854.792078353, 495899.22...","[5405738.39126416, 5405750.93930797, 5405739.2..."
2,50187,1842.6,495854.9438,5405751.0,0.3,-0.71,190.56,-0.25,-1.09,186.37,"[47646, 50187, 50184, 50181]","[0, 4, 4, 4]","[495921.779445452, 495854.943847121, 495899.35...","[5405744.51929698, 5405750.96626812, 5405739.1..."
3,50187,1842.7,495855.0956,5405751.0,0.3,-0.71,190.56,-0.25,-1.09,186.37,"[50187, 47646, 50184, 50181]","[4, 0, 4, 4]","[495855.09561589, 495920.943052671, 495899.490...","[5405750.99322827, 5405744.63008031, 5405739.1..."
4,50187,1842.8,495855.2569,5405751.0,0.3,-0.71,190.56,-0.25,-1.09,186.37,"[50187, 50184, 50181, 47646]","[4, 4, 4, 0]","[495855.256935427, 495899.585908147, 495899.72...","[5405751.02150176, 5405739.0332702, 5405738.08..."


In [3]:
tracks_dataset.shape

(4759, 14)

In [4]:
X = tracks_dataset[['body_roll','body_pitch','body_yaw','head_roll','head_pitch','head_yaw']]
X

Unnamed: 0,body_roll,body_pitch,body_yaw,head_roll,head_pitch,head_yaw
0,0.3,-0.71,190.56,-0.25,-1.09,186.37
1,0.3,-0.71,190.56,-0.25,-1.09,186.37
2,0.3,-0.71,190.56,-0.25,-1.09,186.37
3,0.3,-0.71,190.56,-0.25,-1.09,186.37
4,0.3,-0.71,190.56,-0.25,-1.09,186.37
...,...,...,...,...,...,...
4754,0.3,-0.71,190.56,-0.25,-1.09,186.37
4755,0.3,-0.71,190.56,-0.25,-1.09,186.37
4756,0.3,-0.71,190.56,-0.25,-1.09,186.37
4757,0.3,-0.71,190.56,-0.25,-1.09,186.37


In [5]:
# MinMaxScaler
min_max_scaler = MinMaxScaler()

X_scaled = pd.DataFrame(min_max_scaler.fit_transform(X))

# While scaling the columns names are removed, so resigining them back
X_scaled.columns = X.columns
X_scaled

Unnamed: 0,body_roll,body_pitch,body_yaw,head_roll,head_pitch,head_yaw
0,0.410112,0.690168,0.532291,0.593075,0.540117,0.518761
1,0.410112,0.690168,0.532291,0.593075,0.540117,0.518761
2,0.410112,0.690168,0.532291,0.593075,0.540117,0.518761
3,0.410112,0.690168,0.532291,0.593075,0.540117,0.518761
4,0.410112,0.690168,0.532291,0.593075,0.540117,0.518761
...,...,...,...,...,...,...
4754,0.410112,0.690168,0.532291,0.593075,0.540117,0.518761
4755,0.410112,0.690168,0.532291,0.593075,0.540117,0.518761
4756,0.410112,0.690168,0.532291,0.593075,0.540117,0.518761
4757,0.410112,0.690168,0.532291,0.593075,0.540117,0.518761


In [6]:
y = X_scaled['head_yaw']
y

0       0.518761
1       0.518761
2       0.518761
3       0.518761
4       0.518761
          ...   
4754    0.518761
4755    0.518761
4756    0.518761
4757    0.518761
4758    0.518761
Name: head_yaw, Length: 4759, dtype: float64

In [7]:
X_scaled = X_scaled.drop(['head_yaw'],axis=1)

In [8]:
X_train_full, X_test, y_train_full, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, test_size=0.3, random_state=42)

In [9]:
#importing the necessary libraries
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.ensemble import RandomForestRegressor

## Sequential Forward Floating Selection(sffs)
sffs = SFS(RandomForestRegressor(n_estimators=200,min_samples_split=8,min_samples_leaf=3,max_depth=3),
         k_features=(4),
         forward=True,
         floating=True,
         cv=0)

sffs.fit(X_train, y_train)
sffs.k_feature_names_

('body_pitch', 'body_yaw', 'head_roll', 'head_pitch')

Using the best model from project -2 (Random Forest Regressor) with the optimized features

In [10]:
X_selected_train = X_train[['body_yaw','head_roll','head_pitch','body_pitch']]
y_selected_train = y_train

In [11]:
rf = RandomForestRegressor(bootstrap= True,max_depth= 90,max_features= 3,min_samples_leaf= 3,min_samples_split= 8,
 n_estimators= 200)
rf.fit(X_selected_train, y_selected_train)

RandomForestRegressor(max_depth=90, max_features=3, min_samples_leaf=3,
                      min_samples_split=8, n_estimators=200)

In [12]:
X_selected_valid = X_valid[['body_yaw','head_roll','head_pitch','body_pitch']]
y_selected_valid = y_valid

In [13]:
pred = rf.predict(X_selected_valid)

In [15]:
from sklearn.metrics import accuracy_score, mean_squared_error
print("RMSE for Random forest using Bidirectional variable selection:",np.round(mean_squared_error(y_valid,pred),4))

RMSE for Random forest using Bidirectional variable selection: 0.0058


In [17]:
#importing the necessary libraries
from sklearn.svm import SVR
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.ensemble import RandomForestRegressor

## Sequential Forward Floating Selection(sffs)
sffs = SFS(SVR(kernel="rbf",
                C=0.2),
          k_features=(4),
         forward=True,
         floating=True,
         cv=0)

sffs.fit(X_train, y_train)
sffs.k_feature_names_

('body_roll', 'body_yaw', 'head_roll', 'head_pitch')

In [19]:
X_selected_train = X_train[['body_roll', 'body_yaw', 'head_roll', 'head_pitch']]
y_selected_train = y_train

Using the best model from project -3 (SVM Non-Linear) with the optimized features

In [42]:
from sklearn.model_selection import GridSearchCV
SVR_Radial_Basis = SVR(kernel="rbf").fit(X_selected_train,y_selected_train)

svr_parameters = {"C": np.arange(0.2,1,0.1)}
svr_cv_model= GridSearchCV(SVR_Radial_Basis,svr_parameters,cv=5).fit(X_selected_train,y_selected_train)

In [43]:
svr_cv_model.best_params_

{'C': 0.9000000000000001}

In [44]:
svr_cv_model.best_params_['C']

0.9000000000000001

In [84]:
svr_tuned = SVR(kernel="rbf",C=40).fit(X_selected_train,y_selected_train)

In [85]:
y_pred=svr_tuned.predict(X_selected_train)
y_pred

array([0.44466533, 0.5044961 , 0.5196872 , ..., 0.5044961 , 0.5044961 ,
       0.30222113])

In [86]:
from sklearn.metrics import mean_squared_error, r2_score

In [87]:
X_selected_valid2 = X_valid[['body_roll', 'body_yaw', 'head_roll', 'head_pitch']]
y_selected_valid2 = y_valid

In [88]:
y_pred=svr_tuned.predict(X_selected_valid2)

In [89]:
np.round(mean_squared_error(y_selected_valid2,y_pred),4)

0.0058

In [90]:
print("RMSE for SVM using Bidirectional variable selection:",np.round(mean_squared_error(y_selected_valid2,y_pred),4))

RMSE for SVM using Bidirectional variable selection: 0.0058


In [92]:
import pickle
# open a file, where you ant to store the data
file = open('bidirectional_feature_model.pkl', 'wb')

bidirectional_mse = []
bidirectional_mse.append(np.round(mean_squared_error(y_selected_valid,pred),4))
bidirectional_mse.append(np.round(mean_squared_error(y_selected_valid2,y_pred),4))

# dump information to that file
pickle.dump(bidirectional_mse, file)