In [0]:
import pandas as pd  
import numpy as np  
import matplotlib.pyplot as plt
from sklearn import metrics
import warnings
import xgboost
from sklearn.model_selection import learning_curve
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.ensemble import VotingRegressor
%matplotlib inline

In [11]:
from google.colab import drive
drive.mount('/content/gdrive')
import os
os.chdir("/content/gdrive/My Drive/RandomForest")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
train_dataset = pd.read_csv('training_data_1_12.csv', names=["vx", "vy", "vz", "dx", "dy", "vfx", "vfy", "vfz", "afx", "afy", "afz", "num_v_labels", "ax", "ay", "az"])
test_dataset = pd.read_csv('validation_data_1_12.csv', names=["vx", "vy", "vz", "dx", "dy", "vfx", "vfy", "vfz", "afx", "afy", "afz", "num_v_labels", "ax", "ay", "az"])
train_dataset = train_dataset.drop(train_dataset.index[0])
test_dataset = test_dataset.drop(test_dataset.index[0])

In [0]:
X_train = train_dataset.iloc[:, :12]
X_test = test_dataset.iloc[:, :12]
X_train = X_train.astype(np.float)
X_test = X_test.astype(np.float)

In [0]:
Y_train_ax, Y_train_ay, Y_train_az = train_dataset.iloc[:,12], train_dataset.iloc[:,13], train_dataset.iloc[:, 14]
Y_test_ax, Y_test_ay, Y_test_az = test_dataset.iloc[:, 12], test_dataset.iloc[:, 13], test_dataset.iloc[:, 14]
Y_train_ax, Y_train_ay, Y_train_az = Y_train_ax.astype(np.float), Y_train_ay.astype(np.float), Y_train_az.astype(np.float)
Y_test_ax, Y_test_ay, Y_test_az = Y_test_ax.astype(np.float), Y_test_ay.astype(np.float), Y_test_az.astype(np.float)

In [0]:
from sklearn.model_selection import GridSearchCV
import pickle

def grid_search(model_type, tuned_parameters, X_train, y_train, X_test, y_test, name):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        if name != "VotingRegressor":
            clf = GridSearchCV(model_type, tuned_parameters, cv=5)
        else:
            clf = model_type
        clf.fit(X_train, y_train)
        print("Model is", name)
        print("")
        print("Best parameters set found on development set:")
        print("")
        if name != "VotingRegressor":
            print(clf.best_params_)
        print("")

        print("The model is trained on the full development set.")
        print("The scores are computed on the full evaluation set.")
        print("")
        y_true, y_pred = y_test, clf.predict(X_test)
        print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))  
        print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))  
        print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
        
        return clf

In [0]:
def train_ax():
    
    model = RandomForestRegressor()

    tuned_parameters = [{
    'max_depth': [60, 80],
    'max_features': [2, 3],
    'n_estimators': [100, 200, 500]
    }]

    random_forest_regressor = grid_search(model, tuned_parameters, X_train, Y_train_ax, X_test, Y_test_ax, "RandomForestRegressor")
    
    model = AdaBoostRegressor()
    
    tuned_parameters = {
         'n_estimators': [50, 100],
         'learning_rate' : [0.1,0.3,1],
         'loss' : ['square', 'exponential']
    }
        
    adaboost_regressor = grid_search(model, tuned_parameters, X_train, Y_train_ax, X_test, Y_test_ax, "AdaBoostRegressor")

    model = ExtraTreesRegressor()
    tuned_parameters = [{
        'n_estimators': [500], 
        'max_features': [5, 10]
    }]
    
    extra_trees_regressor = grid_search(model, tuned_parameters, X_train, Y_train_ax, X_test, Y_test_ax, "ExtraTreesRegressor")

    model = GradientBoostingRegressor()
    tuned_parameters = [{
        'n_estimators':[100], 
        'learning_rate': [0.02, 0.01], 
        'max_depth':[6,4],
        'max_features':[1.0,0.3] 
    }]
    
    gradient_boosting_regressor = grid_search(model, tuned_parameters, X_train, Y_train_ax, X_test, Y_test_ax, "GradientBoostingRegressor")
    
    
    estimators = [
     ('rfr', random_forest_regressor),
     ('abr', adaboost_regressor),
     ('etr', extra_trees_regressor),
     ('gbr', gradient_boosting_regressor)
    ]
    
    model = VotingRegressor(estimators)
    
    tuned_parameters = [{
        
    }]
    
    voting_regressor = grid_search(model, tuned_parameters, X_train, Y_train_ax, X_test, Y_test_ax, "VotingRegressor")
    
    print("Saving the weights of the model")
    pickle.dump(voting_regressor, open("voting_regressor_ax.sav", 'wb'))

In [17]:
train_ax()

Model is RandomForestRegressor

Best parameters set found on development set:

{'max_depth': 80, 'max_features': 3, 'n_estimators': 500}

The model is trained on the full development set.
The scores are computed on the full evaluation set.

Mean Absolute Error: 0.3005670583620109
Mean Squared Error: 0.24215637656709194
Root Mean Squared Error: 0.4920938696703018
Model is AdaBoostRegressor

Best parameters set found on development set:

{'learning_rate': 0.1, 'loss': 'exponential', 'n_estimators': 50}

The model is trained on the full development set.
The scores are computed on the full evaluation set.

Mean Absolute Error: 0.33937997174358997
Mean Squared Error: 0.2767001951263556
Root Mean Squared Error: 0.5260229986667461
Model is ExtraTreesRegressor

Best parameters set found on development set:

{'max_features': 10, 'n_estimators': 500}

The model is trained on the full development set.
The scores are computed on the full evaluation set.

Mean Absolute Error: 0.3033962245076216
Mea

In [0]:
def train_ay():
    
    model = RandomForestRegressor()

    tuned_parameters = [{
    'max_depth': [60, 80],
    'max_features': [2, 3],
    'n_estimators': [100, 200]
    }]

    random_forest_regressor = grid_search(model, tuned_parameters, X_train, Y_train_ay, X_test, Y_test_ay, "RandomForestRegressor")
    
    model = AdaBoostRegressor()
    
    tuned_parameters = {
         'n_estimators': [50, 100],
         'learning_rate' : [0.1,0.3,1],
         'loss' : ['square', 'exponential']
    }
        
    adaboost_regressor = grid_search(model, tuned_parameters, X_train, Y_train_ay, X_test, Y_test_ay, "AdaBoostRegressor")

    model = ExtraTreesRegressor()
    tuned_parameters = [{
        'n_estimators': [500], 
        'max_features': [5, 10]
    }]
    
    extra_trees_regressor = grid_search(model, tuned_parameters, X_train, Y_train_ay, X_test, Y_test_ay, "ExtraTreesRegressor")

    model = GradientBoostingRegressor()
    tuned_parameters = [{
        'n_estimators':[100], 
        'learning_rate': [0.02, 0.01], 
        'max_depth':[6,4], 
        'min_samples_leaf':[3,5,9], 
        'max_features':[1.0,0.3,0.1] 
    }]
    
    gradient_boosting_regressor = grid_search(model, tuned_parameters, X_train, Y_train_ay, X_test, Y_test_ay, "GradientBoostingRegressor")
    
    estimators = [
     ('rfr', random_forest_regressor),
     ('abr', adaboost_regressor),
     ('etr', extra_trees_regressor),
     ('gbr', gradient_boosting_regressor)
    ]
    
    model = VotingRegressor(estimators)
    
    tuned_parameters = [{
        
    }]
    
    voting_regressor = grid_search(model, tuned_parameters, X_train, Y_train_ay, X_test, Y_test_ay, "VotingRegressor")
    
    print("Saving the weights of the model")
    pickle.dump(voting_regressor, open("voting_regressor_ay.sav", 'wb'))

In [19]:
train_ay()

Model is RandomForestRegressor

Best parameters set found on development set:

{'max_depth': 80, 'max_features': 3, 'n_estimators': 100}

The model is trained on the full development set.
The scores are computed on the full evaluation set.

Mean Absolute Error: 0.251823796832479
Mean Squared Error: 0.16807306184054044
Root Mean Squared Error: 0.4099671472698031
Model is AdaBoostRegressor

Best parameters set found on development set:

{'learning_rate': 0.1, 'loss': 'exponential', 'n_estimators': 50}

The model is trained on the full development set.
The scores are computed on the full evaluation set.

Mean Absolute Error: 0.2679748369615588
Mean Squared Error: 0.1781475842283439
Root Mean Squared Error: 0.4220753300399632
Model is ExtraTreesRegressor

Best parameters set found on development set:

{'max_features': 10, 'n_estimators': 500}

The model is trained on the full development set.
The scores are computed on the full evaluation set.

Mean Absolute Error: 0.2775866530080545
Mean 

In [0]:
def train_az():
    
    model = RandomForestRegressor()

    tuned_parameters = [{
    'max_depth': [60, 80],
    'max_features': [2, 3],
    'n_estimators': [100, 200]
    }]

    random_forest_regressor = grid_search(model, tuned_parameters, X_train, Y_train_az, X_test, Y_test_az, "RandomForestRegressor")
    
    model = AdaBoostRegressor()
    
    tuned_parameters = {
         'n_estimators': [50, 100],
         'learning_rate' : [0.1,0.3,1],
         'loss' : ['square', 'exponential']
    }
        
    adaboost_regressor = grid_search(model, tuned_parameters, X_train, Y_train_az, X_test, Y_test_az, "AdaBoostRegressor")

    model = ExtraTreesRegressor()
    tuned_parameters = [{
        'n_estimators': [500], 
        'max_features': [5, 10]
    }]
    
    extra_trees_regressor = grid_search(model, tuned_parameters, X_train, Y_train_az, X_test, Y_test_az, "ExtraTreesRegressor")

    model = GradientBoostingRegressor()
    tuned_parameters = [{
        'n_estimators':[100], 
        'learning_rate': [0.02, 0.01], 
        'max_depth':[6,4], 
        'min_samples_leaf':[3,5,9], 
        'max_features':[1.0,0.3,0.1] 
    }]
    
    gradient_boosting_regressor = grid_search(model, tuned_parameters, X_train, Y_train_az, X_test, Y_test_az, "GradientBoostingRegressor")
    
    estimators = [
     ('rfr', random_forest_regressor),
     ('abr', adaboost_regressor),
     ('etr', extra_trees_regressor),
     ('gbr', gradient_boosting_regressor)
    ]
    
    model = VotingRegressor(estimators)
    
    tuned_parameters = [{
        
    }]
    
    voting_regressor = grid_search(model, tuned_parameters, X_train, Y_train_az, X_test, Y_test_az, "VotingRegressor")
    
    print("Saving the weights of the model")
    pickle.dump(voting_regressor, open("voting_regressor_az.sav", 'wb'))

In [21]:
train_az()

Model is RandomForestRegressor

Best parameters set found on development set:

{'max_depth': 80, 'max_features': 3, 'n_estimators': 100}

The model is trained on the full development set.
The scores are computed on the full evaluation set.

Mean Absolute Error: 0.10907147420254557
Mean Squared Error: 0.038867682640267784
Root Mean Squared Error: 0.19714888445098486
Model is AdaBoostRegressor

Best parameters set found on development set:

{'learning_rate': 0.1, 'loss': 'exponential', 'n_estimators': 50}

The model is trained on the full development set.
The scores are computed on the full evaluation set.

Mean Absolute Error: 0.11993783749819377
Mean Squared Error: 0.038080729281410125
Root Mean Squared Error: 0.19514284327489473
Model is ExtraTreesRegressor

Best parameters set found on development set:

{'max_features': 10, 'n_estimators': 500}

The model is trained on the full development set.
The scores are computed on the full evaluation set.

Mean Absolute Error: 0.10459767745746