# NNSW Example
Example Application of our Framework on the NNSW Dataset

## Import Packages

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

import framework
import framework.regression.models
import framework.data.management

## Some Definitions

In [2]:
rand = 42 # To ensure reproducible results
with_som = False
only_som = False
with_pca = False

## Load Data

In [3]:
framework.data.management.load_data('data/nnsw.pkl',['class_id', 'start_latitude', 'start_longitude', 'end_latitude', 'end_longitude', 'length', 'region_id', 'sinuosity',
 'slope_1', 'slope_2', 'support_points_km', 'surface_id'], 'fuzzy_velocity', imputer=None)

Load Data...
# Drop entries containing NaN
#  0 entries droped
# Finished loading dataset from "data/nnsw.pkl" with shape (16711, 13)



## Scale and Split Data

In [4]:
feature_scaler = StandardScaler()
framework.data.management.scale_data(feature_scaler, ['class_id', 'start_latitude', 'start_longitude', 'end_latitude', 'end_longitude', 'length', 'region_id', 'sinuosity',
 'slope_1', 'slope_2', 'support_points_km', 'surface_id'])

framework.data.management.split_data(test_size=0.3, random_state=rand, shuffle=True)

Scale Data...

Split Data...
# X_train_scaled shape: (11697, 12)
# y_train shape: (11697,)
# X_test_scaled shape: (5014, 12)
# y_test shape: (5014,)



## Train Models

In [5]:
results = pd.DataFrame()
results = framework.regression.models.train_models(results, n_esti=2000,generate_som_clusters=with_som, som_only = only_som, generate_pca=with_pca, pca_components=2)

## Results

In [6]:
results = framework.regression.models.test_models(results, savepred=False)
print("")
print(results.sort_values(by=['prediction R2'], ascending=False))


  Method  Prediction Time feature importance  prediction R2  prediction RMSE  \
2     et         0.609682                 {}       0.960231         4.707877   
5     rf         0.524224                 {}       0.955557         4.976847   
1     br         1.536322                 {}       0.955539         4.977863   
8    svr         0.353375                 {}       0.953056         5.114954   
3     gb         0.102415                 {}       0.951521         5.197915   
7    som         0.229511                 {}       0.919246         6.708635   
6  ridge         0.000291                 {}       0.886828         7.941832   
4     lr         0.003927                 {}       0.886827         7.941875   
0     ab         1.249499                 {}       0.851619         9.093701   

                                              scores  
2  {'explained_variance': 0.9602748287789373, 'ma...  
5  {'explained_variance': 0.9555865309859402, 'ma...  
1  {'explained_variance': 0.95557