In [1]:
import pandas as pd  
import numpy as np  
import matplotlib.pyplot as plt  
import seaborn as seabornInstance 
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression
from sklearn import metrics
import warnings
import xgboost
%matplotlib inline

In [2]:
train_dataset = pd.read_csv('/home/scg2151/waymo-project/csv_data/training/training_data_1_12.csv', names=["vx", "vy", "vz", "dx", "dy", "vfx", "vfy", "vfz", "afx", "afy", "afz", "num_v_labels", "ax", "ay", "az"])
test_dataset = pd.read_csv('/home/scg2151/waymo-project/csv_data/validation/validation_data_1_12.csv', names=["vx", "vy", "vz", "dx", "dy", "vfx", "vfy", "vfz", "afx", "afy", "afz", "num_v_labels", "ax", "ay", "az"])
train_dataset = train_dataset.drop(train_dataset.index[0])
test_dataset = test_dataset.drop(test_dataset.index[0])
print("Shape of the training data is", train_dataset.shape)
print("Shape of the validation data is", test_dataset.shape)

Shape of the training data is (17654, 15)
Shape of the validation data is (6947, 15)


In [3]:
X_train = train_dataset.iloc[:, :12]
X_test = test_dataset.iloc[:, :12]
X_train = X_train.astype(np.float)
X_test = X_test.astype(np.float)
print("Shape of the X_train is", X_train.shape)
print("Shape of the X_test is", X_test.shape)

Shape of the X_train is (17654, 12)
Shape of the X_test is (6947, 12)


In [4]:
Y_train_ax, Y_train_ay = train_dataset.iloc[:,12], train_dataset.iloc[:,13]
Y_test_ax, Y_test_ay = test_dataset.iloc[:, 12], test_dataset.iloc[:, 13]
Y_train_ax, Y_train_ay = Y_train_ax.astype(np.float), Y_train_ay.astype(np.float)
Y_test_ax, Y_test_ay = Y_test_ax.astype(np.float), Y_test_ay.astype(np.float)
print("Shape of the Y_train_ax, Y_train_ay is", Y_train_ax.shape, Y_train_ay.shape)
print("Shape of the Y_test_ax, Y_test_ay is", Y_test_ax.shape, Y_test_ay.shape)

Shape of the Y_train_ax, Y_train_ay is (17654,) (17654,)
Shape of the Y_test_ax, Y_test_ay is (6947,) (6947,)


In [159]:
# Y_train, Y_test = train_dataset.iloc[:, 12:], test_dataset.iloc[:, 12:]
# Y_train, Y_test = Y_train.astype(np.float), Y_test.astype(np.float)

In [5]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix, f1_score
import pickle

def grid_search(model_type, tuned_parameters, X_train, y_train, X_test, y_test, name):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        if name != "VotingRegressor":
            clf = GridSearchCV(model_type, tuned_parameters, cv=3)
        else:
            clf = model_type
        clf.fit(X_train, y_train)
        print("Model is", name)
        print("")
        print("Best parameters set found on development set:")
        print("")
        if name != "VotingRegressor":
            print(clf.best_params_)
        print("")

        print("The model is trained on the full development set.")
        print("The scores are computed on the full evaluation set.")
        print("")
        y_true, y_pred = y_test, clf.predict(X_test)
        print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))  
        print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))  
        print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
        
        return clf

In [18]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import GradientBoostingRegressor
from mlxtend.regressor import StackingRegressor
from xgboost import XGBRegressor
from sklearn.ensemble import VotingRegressor


def train_ax():
    
    model = RandomForestRegressor()

    tuned_parameters = [{
    'max_depth': [60, 80],
    'max_features': [2, 3],
    'n_estimators': [100, 200, 500]
    }]

    random_forest_regressor = grid_search(model, tuned_parameters, X_train, Y_train_ax, X_test, Y_test_ax, "RandomForestRegressor")
    
    model = AdaBoostRegressor()
    
    tuned_parameters = {
         'n_estimators': [50, 100],
         'learning_rate' : [0.1,0.3,1],
         'loss' : ['square', 'exponential']
    }
        
    adaboost_regressor = grid_search(model, tuned_parameters, X_train, Y_train_ax, X_test, Y_test_ax, "AdaBoostRegressor")

    model = ExtraTreesRegressor()
    tuned_parameters = [{
        'n_estimators': [500], 
        'max_features': [5, 10]
    }]
    
    extra_trees_regressor = grid_search(model, tuned_parameters, X_train, Y_train_ax, X_test, Y_test_ax, "ExtraTreesRegressor")

    model = GradientBoostingRegressor()
    tuned_parameters = [{
        'n_estimators':[100], 
        'learning_rate': [0.02, 0.01], 
        'max_depth':[6,4], 
        'min_samples_leaf':[3,5,9], 
        'max_features':[1.0,0.3,0.1] 
    }]
    
    gradient_boosting_regressor = grid_search(model, tuned_parameters, X_train, Y_train_ax, X_test, Y_test_ax, "GradientBoostingRegressor")
    
    
    estimators = [
     ('rfr', random_forest_regressor),
     ('abr', adaboost_regressor),
     ('etr', extra_trees_regressor),
     ('gbr', gradient_boosting_regressor)
    ]
    
    model = VotingRegressor(estimators)
    
    tuned_parameters = [{
        
    }]
    
    voting_regressor = grid_search(model, tuned_parameters, X_train, Y_train_ax, X_test, Y_test_ax, "VotingRegressor")
    
    print("Saving the weights of the model")
    pickle.dump(voting_regressor, open("voting_regressor_ax.sav", 'wb'))

In [19]:
train_ax()

Model is RandomForestRegressor

Best parameters set found on development set:

{'max_depth': 60, 'max_features': 3, 'n_estimators': 500}

The model is trained on the full development set.
The scores are computed on the full evaluation set.

Mean Absolute Error: 0.3006799610771246
Mean Squared Error: 0.24145193599545425
Root Mean Squared Error: 0.4913775900419699
Model is AdaBoostRegressor

Best parameters set found on development set:

{'n_estimators': 50, 'learning_rate': 0.1, 'loss': 'exponential'}

The model is trained on the full development set.
The scores are computed on the full evaluation set.

Mean Absolute Error: 0.33847888604625914
Mean Squared Error: 0.27571302999741376
Root Mean Squared Error: 0.525083831399724
Model is ExtraTreesRegressor

Best parameters set found on development set:

{'n_estimators': 500, 'max_features': 10}

The model is trained on the full development set.
The scores are computed on the full evaluation set.

Mean Absolute Error: 0.30811764451818585
Me

In [22]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import VotingRegressor

def train_ay():
    
    model = RandomForestRegressor()

    tuned_parameters = [{
    'max_depth': [60, 80],
    'max_features': [2, 3],
    'n_estimators': [100, 200]
    }]

    random_forest_regressor = grid_search(model, tuned_parameters, X_train, Y_train_ay, X_test, Y_test_ay, "RandomForestRegressor")
    
    model = AdaBoostRegressor()
    
    tuned_parameters = {
         'n_estimators': [50, 100],
         'learning_rate' : [0.1,0.3,1],
         'loss' : ['square', 'exponential']
    }
        
    adaboost_regressor = grid_search(model, tuned_parameters, X_train, Y_train_ay, X_test, Y_test_ay, "AdaBoostRegressor")

    model = ExtraTreesRegressor()
    tuned_parameters = [{
        'n_estimators': [500], 
        'max_features': [5, 10]
    }]
    
    extra_trees_regressor = grid_search(model, tuned_parameters, X_train, Y_train_ay, X_test, Y_test_ay, "ExtraTreesRegressor")

    model = GradientBoostingRegressor()
    tuned_parameters = [{
        'n_estimators':[100], 
        'learning_rate': [0.02, 0.01], 
        'max_depth':[6,4], 
        'min_samples_leaf':[3,5,9], 
        'max_features':[1.0,0.3,0.1] 
    }]
    
    gradient_boosting_regressor = grid_search(model, tuned_parameters, X_train, Y_train_ay, X_test, Y_test_ay, "GradientBoostingRegressor")
    
    estimators = [
     ('rfr', random_forest_regressor),
     ('abr', adaboost_regressor),
     ('etr', extra_trees_regressor),
     ('gbr', gradient_boosting_regressor)
    ]
    
    model = VotingRegressor(estimators)
    
    tuned_parameters = [{
        
    }]
    
    voting_regressor = grid_search(model, tuned_parameters, X_train, Y_train_ay, X_test, Y_test_ay, "VotingRegressor")
    
    print("Saving the weights of the model")
    pickle.dump(voting_regressor, open("voting_regressor_ay.sav", 'wb'))

In [23]:
train_ay()

Model is RandomForestRegressor

Best parameters set found on development set:

{'max_depth': 60, 'max_features': 3, 'n_estimators': 200}

The model is trained on the full development set.
The scores are computed on the full evaluation set.

Mean Absolute Error: 0.25060053238122937
Mean Squared Error: 0.16930031741179055
Root Mean Squared Error: 0.41146119794190866
Model is AdaBoostRegressor

Best parameters set found on development set:

{'n_estimators': 100, 'learning_rate': 0.1, 'loss': 'exponential'}

The model is trained on the full development set.
The scores are computed on the full evaluation set.

Mean Absolute Error: 0.2879121633963699
Mean Squared Error: 0.18929846315048285
Root Mean Squared Error: 0.4350844322088333
Model is ExtraTreesRegressor

Best parameters set found on development set:

{'n_estimators': 500, 'max_features': 10}

The model is trained on the full development set.
The scores are computed on the full evaluation set.

Mean Absolute Error: 0.2778544675184178
