# Biomaule Example
Example Application of our Framework on the Biomaule Dataset

## Import Packages

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

import framework
import framework.regression.models
import framework.data.management

## Some Definitions
You can change if additional features are generated and used here

In [2]:
rand = 42           # To ensure reproducible results
with_som = True     # True if you want to use the unsupervised SOM to generate additional features
only_som = False    # True if only those SOM generated features should be used as features
with_pca = False    # True if PCA generated features should be used

## Load Data

In [3]:
framework.data.management.load_data('data/biomaule.pkl',['class_id', 'start_latitude', 'start_longitude', 'end_latitude', 'end_longitude', 'length', 'region_id', 'sinuosity',
 'slope_1', 'slope_2', 'support_points_km', 'surface_id'], 'fuzzy_velocity', imputer=None)

Load Data...
# Drop entries containing NaN
#  12 entries droped
# Finished loading dataset from "data/biomaule.pkl" with shape (12679, 13)



## Scale and Split Data

In [4]:
feature_scaler = StandardScaler()
framework.data.management.scale_data(feature_scaler, ['class_id', 'start_latitude', 'start_longitude', 'end_latitude', 'end_longitude', 'length', 'region_id', 'sinuosity',
 'slope_1', 'slope_2', 'support_points_km', 'surface_id'])

framework.data.management.split_data(test_size=0.3, random_state=rand, shuffle=True)

Scale Data...

Split Data...
# X_train_scaled shape: (8875, 12)
# y_train shape: (8875,)
# X_test_scaled shape: (3804, 12)
# y_test shape: (3804,)



## Train Models

In [5]:
results = pd.DataFrame()
results = framework.regression.models.train_models(results, n_esti=200, generate_som_clusters=with_som, som_only=only_som, generate_pca=with_pca, pca_components=2)

Start training of Models...
# Training: som
# Training: lr
# Training: ab
# Training: et
# Training: br
# Training: gb
# Training: rf
# Training: ridge
# Training: svr


## Results

In [7]:
results = framework.regression.models.test_models(results, savepred=False)
print("")
print(results.sort_values(by=['prediction R2'], ascending=False))


   Method  Prediction Time                                 feature importance  \
10     br         2.121131                                                 {}   
1      br         1.975742                                                 {}   
14     rf         0.967069  {'class_id': 0.8105018037942056, 'start_latitu...   
5      rf         0.674866  {'class_id': 0.8105018037942056, 'start_latitu...   
2      et         0.808253  {'class_id': 0.6614093010414074, 'start_latitu...   
11     et         1.012052  {'class_id': 0.6614093010414074, 'start_latitu...   
3      gb         0.079676  {'class_id': 0.710349067693097, 'start_latitud...   
12     gb         0.075575  {'class_id': 0.710349067693097, 'start_latitud...   
17    svr         0.982088                                                 {}   
8     svr         0.617841                                                 {}   
0      ab         1.151067  {'class_id': 0.6886096776758782, 'start_latitu...   
9      ab         1.128674 