# TEDA Regressor: *An Online Processing of Vehicular Data on the Edge Through an Unsupervised TinyML Regression Technique*

## ✍🏾Authors:  Pedro Andrade, Ivanovitch Silva, Marianne Diniz, Thommas Flores, Daniel G. Costa, and Eduardo Soares.

### First, we have to install some dependencies:

In [None]:
!pip3 install wandb -qU

In [None]:
!pip3 install imutils

In [None]:
!pip3 install codecarbon

### Importing libraries:

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.lines import Line2D
import statsmodels.api as sm
import padasip as pa

from sklearn.metrics import mean_squared_error

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px

import datetime

import wandb
import logging
import xlsxwriter

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold, StratifiedKFold,TimeSeriesSplit
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from IPython.display import Image
from sklearn.linear_model import LinearRegression
from sklearn import tree
from xgboost import XGBClassifier, XGBRegressor
import xgboost as xgb
%config IPCompleter.greedy=True

### Code Carbon

In [3]:
from codecarbon import EmissionsTracker

### Implemented modules and Padasip module

In [4]:
from padasip.filters.base_filter import AdaptiveFilter
from TedaRegressor import DataCloud, TEDARegressor

## Data Acquisition:

In [5]:
df = pd.read_excel("full_data.xlsx")

#### Looking the DataFrame:

In [6]:
df.head()

Unnamed: 0,Trip Time,fuel,Speed,Temperature,Pressure,fuel diff,Trip Distance,Distance Diff,fuel_mL
0,0,0.014118,0,317.15,37.000015,0.0,8.7e-05,0.0,0.0
1,1,0.014377,1,317.15,48.00002,0.00026,0.000234,0.000147,0.25956
2,2,0.01473,4,317.15,54.000022,0.000353,0.00092,0.000686,0.3529
3,3,0.0151,5,317.15,53.000022,0.000369,0.002066,0.001146,0.36931
4,4,0.015669,8,317.15,47.00002,0.000569,0.004019,0.001953,0.56914


#### Function to treat the dataset:

In [7]:
def treating_dataset(name_series, window, N_splits):
    sum_series = name_series
    window = window
    model_data = pd.DataFrame()
    model_data['t'] = [x for x in sum_series]
    for i in range(1, window+1):
        model_data['t + ' + str(i)] = model_data['t'].shift(-i)

    model_data.dropna(axis=0, inplace=True)
    
    X = model_data.iloc[:,0:-1].values
    y = model_data.iloc[:, -1].values
    
    tscv = TimeSeriesSplit(n_splits=N_splits)
    
    for train_index, test_index in tscv.split(X):
    #print("TRAIN:", train_index, "TEST:", test_index)
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
    
    
    return X_train, y_train, X_test, y_test

## WandB importing:

In [10]:
!wandb login 40f0acf51f839c9bfbed5c1f24e945ed8245e110

wandb: Appending key for api.wandb.ai to your netrc file: C:\Users\Samsung/.netrc


In [11]:
# Initialize a new W&B run to track this job
run = wandb.init(project="TEDARegressor", job_type="dataset-creation")

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
wandb: Currently logged in as: pedrohmeira (embarca-ai). Use `wandb login --relogin` to force relogin


In [12]:
# Create a sample dataset to log as an artifact
#f = open('my-dataset.txt', 'w')
#f.write('Imagine this is a big dataset.')
#f.close()

# Create a new artifact, which is a sample dataset
dataset = wandb.Artifact('full_data', type='dataset')
# Add files to the artifact, in this case a simple text file
dataset.add_file('full_data.xlsx')
# Log the artifact to save it as an output of this run
run.log_artifact(dataset)

<wandb.sdk.wandb_artifacts.Artifact at 0x28cc5b9bdf0>

In [13]:
# configure logging
logging.basicConfig(level=logging.INFO,
                    format="%(asctime)s %(message)s",
                    datefmt='%d-%m-%Y %H:%M:%S')

# reference for a logging obj
logger = logging.getLogger()

## Sweep Variables Definition

#### Metric Dictionaire:

In [14]:
metric_dict = {
    'name': 'mse_TEDA',
    'goal': 'minimize'   
    }

#### Hyperparatemers variation Dictionaire:

In [15]:
parameters_dict = {
    #TEDA Ensemble
    'Window': {
        'values': [2, 3, 4, 5, 6]
        },
    'Factor': {
        'values': [0.000001, 0.1, 0.25, 0.5, 0.75, 0.9, 1]
        },
    'Threshold': {
          'values': [1.5, 1.75, 2, 2.25, 3]
        },
    'Init': {
      'values': ["relu", "tanh1", "tanh2", "zero"]
    }, 
}

#### Creating the Sweep:

In [16]:
seed = 41
sweep_config = {
    "method": "grid",
    "metric": metric_dict,
    "parameters": parameters_dict,

}
sweep_id = wandb.sweep(sweep_config, project="TEDARegressor")

Create sweep with ID: wza6vy2g
Sweep URL: https://wandb.ai/embarca-ai/TEDARegressor/sweeps/wza6vy2g


#### We are going to choose the ```fuel_mL``` column:

In [17]:
#TEDA Regressor
MSE_TEDA = []
feature=df['fuel_mL']

#### Defining our training procedure:

In [18]:
def train():
    with wandb.init() as run:
        
        # create codecarbon tracker
        # codecarbon is too much verbose, change the log level for more info
        tracker = EmissionsTracker(log_level="critical")
        tracker.start()
        
        X_train, y_train, X_test, y_test = treating_dataset(name_series = feature, window=wandb.config.Window, N_splits = 5)

        tedaRegressor=TEDARegressor(m=wandb.config.Window, mu=wandb.config.Factor, threshold=wandb.config.Threshold, activation_function=wandb.config.Init)        #regressor=AutoCloud(m=window, mu=MU, threshold=THR, activation_function=I)
        

        for t in X_test:
            #TEDARegressor
            tedaRegressor.run(np.array(t))
        
        # get co2 emissions from tracker
        # "CO2 emission (in Kg)"
        emissions = tracker.stop() #CONFERIR SE É AQUI MESMO

        # MSE TEDARegressor
        mse_TEDA = mean_squared_error(y_test[1:-1], TEDARegressor.Ypred)
        MSE_TEDA.append(mse_TEDA)

        run.summary['mse_TEDA'] = mse_TEDA
        wandb.log({"mse_TEDA": mse_TEDA})
        
        # energy unit is kWh
        run.summary["Energy_Consumed"] = tracker.final_emissions_data.energy_consumed
        run.summary["Energy_RAM"] = tracker.final_emissions_data.ram_energy
        run.summary["Energy_GPU"] = tracker.final_emissions_data.gpu_energy
        run.summary["Energy_CPU"] = tracker.final_emissions_data.cpu_energy
        # kg
        run.summary["CO2_Emissions"] = tracker.final_emissions_data.emissions


#### So, now we are ready to start sweeping! 🧹🧹🧹

In [19]:
wandb.agent(sweep_id, train)

wandb: Agent Starting Run: woliz7kh with config:
wandb: 	Factor: 1e-06
wandb: 	Init: relu
wandb: 	Threshold: 1.5
wandb: 	Window: 2
Exception in thread NetStatThr:
Traceback (most recent call last):
  File "c:\Users\Samsung\anaconda3\lib\threading.py", line 932, in _bootstrap_inner
Exception in thread ChkStopThr:
Traceback (most recent call last):
  File "c:\Users\Samsung\anaconda3\lib\threading.py", line 932, in _bootstrap_inner
    self.run()
  File "c:\Users\Samsung\anaconda3\lib\threading.py", line 870, in run
        self._target(*self._args, **self._kwargs)
  File "c:\Users\Samsung\anaconda3\lib\site-packages\wandb\sdk\wandb_run.py", line 276, in check_stop_status
self.run()
  File "c:\Users\Samsung\anaconda3\lib\threading.py", line 870, in run
    self._loop_check_status(
  File "c:\Users\Samsung\anaconda3\lib\site-packages\wandb\sdk\wandb_run.py", line 214, in _loop_check_status
        self._target(*self._args, **self._kwargs)local_handle = request()
  File "c:\Users\Samsung\an

15-03-2023 21:37:53 NumExpr defaulting to 8 threads.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
mse_TEDA,▁

0,1
CO2_Emissions,0.0
Energy_CPU,0.0
Energy_Consumed,0.0
Energy_GPU,0.0
Energy_RAM,0.0
mse_TEDA,0.58043


wandb: Agent Starting Run: wxkyav8i with config:
wandb: 	Factor: 1e-06
wandb: 	Init: relu
wandb: 	Threshold: 1.5
wandb: 	Window: 3
