# 1. Import packages

In [1]:
import sys
sys.path.append('..')
from modules import preprocess, benchmarking, graphics

Invoking __init__.py for modules


# 2. Load dataset

In [2]:
df = preprocess.load_csv_data('../dataset/Occupancy_Estimation.csv')

# 3. Data preprocessing
- Step 1: Transform features
  - Binary results for Room_Occupancy_Count
  - Drop Date and Time features
- Step 2: Clean data
  - Drop duplicated rows

In [3]:
# Step 1: Recode Room_Occupancy_Count column
preprocess.recode_dataset_output(df)

# Step 2: Filter Date and Time columns
preprocess.remove_time_columns(df)

df.head()

Unnamed: 0,S1_Temp,S2_Temp,S3_Temp,S4_Temp,S1_Light,S2_Light,S3_Light,S4_Light,S1_Sound,S2_Sound,S3_Sound,S4_Sound,S5_CO2,S5_CO2_Slope,S6_PIR,S7_PIR,Room_Occupancy_Count
0,24.94,24.75,24.56,25.38,121,34,53,40,0.08,0.19,0.06,0.06,390,0.769231,0,0,1
1,24.94,24.75,24.56,25.44,121,33,53,40,0.93,0.05,0.06,0.06,390,0.646154,0,0,1
2,25.0,24.75,24.5,25.44,121,34,53,40,0.43,0.11,0.08,0.06,390,0.519231,0,0,1
3,25.0,24.75,24.56,25.44,121,34,53,40,0.41,0.1,0.1,0.09,390,0.388462,0,0,1
4,25.0,24.75,24.56,25.44,121,34,54,40,0.18,0.06,0.06,0.06,390,0.253846,0,0,1


# 4. Benchmarking

In [4]:
# Split dataset into features and output
X = preprocess.get_features(df)
y = preprocess.get_output(df)

# Split dataset into training and test
X_train, X_test, y_train, y_test = preprocess.split_dataset(X, y)

## Create benchmarking Dataframe:
- Create benchmarking's df structure.
- Track models training with CodeCarbon and Eco2AI.
- Store in benchmarking's df tracking results.
- Store in benchmarking's df evaluation metrics.

In [None]:
df_benchmarking = benchmarking.create_benchmarking(X_train, y_train, X_test, y_test)

[codecarbon INFO @ 21:25:16] [setup] RAM Tracking...
[codecarbon INFO @ 21:25:16] [setup] GPU Tracking...
[codecarbon INFO @ 21:25:16] No GPU found.
[codecarbon INFO @ 21:25:16] [setup] CPU Tracking...
[codecarbon INFO @ 21:25:18] CPU Model on constant consumption mode: Intel(R) Core(TM) i5-9400F CPU @ 2.90GHz
[codecarbon INFO @ 21:25:18] >>> Tracker's metadata:
[codecarbon INFO @ 21:25:18]   Platform system: Linux-5.19.0-45-generic-x86_64-with-glibc2.35
[codecarbon INFO @ 21:25:18]   Python version: 3.10.8
[codecarbon INFO @ 21:25:18]   Available RAM : 4.935 GB
[codecarbon INFO @ 21:25:18]   CPU count: 4
[codecarbon INFO @ 21:25:18]   CPU model: Intel(R) Core(TM) i5-9400F CPU @ 2.90GHz
[codecarbon INFO @ 21:25:18]   GPU count: None
[codecarbon INFO @ 21:25:18]   GPU model: None


Fitting 5 folds for each of 120 candidates, totalling 600 fits


[codecarbon INFO @ 21:25:36] Energy consumed for RAM : 0.000000 kWh. RAM Power : 0.08306407928466797 W
[codecarbon INFO @ 21:25:36] Energy consumed for all CPUs : 0.000136 kWh. All CPUs Power : 32.5 W
[codecarbon INFO @ 21:25:36] 0.000136 kWh of electricity used since the begining.
[codecarbon INFO @ 21:25:52] Energy consumed for RAM : 0.000001 kWh. RAM Power : 0.08323287963867188 W
[codecarbon INFO @ 21:25:52] Energy consumed for all CPUs : 0.000278 kWh. All CPUs Power : 32.5 W
[codecarbon INFO @ 21:25:52] 0.000279 kWh of electricity used since the begining.
[codecarbon INFO @ 21:26:00] Energy consumed for RAM : 0.000001 kWh. RAM Power : 0.08331298828125 W
[codecarbon INFO @ 21:26:00] Energy consumed for all CPUs : 0.000355 kWh. All CPUs Power : 32.5 W
[codecarbon INFO @ 21:26:00] 0.000355 kWh of electricity used since the begining.
[codecarbon INFO @ 21:26:01] [setup] RAM Tracking...
[codecarbon INFO @ 21:26:01] [setup] GPU Tracking...
[codecarbon INFO @ 21:26:01] No GPU found.
[code

Best estimator LR:  Pipeline(steps=[('scaler', RobustScaler()),
                ('estimator',
                 LogisticRegression(C=78.47599703514607, max_iter=500,
                                    penalty='l1', solver='liblinear'))])
Precision:  1.0
Recall:  0.9982993197278912
F1:  0.9991489361702128
<class 'sklearn.model_selection._search.RandomizedSearchCV'>


[codecarbon INFO @ 21:26:03] CPU Model on constant consumption mode: Intel(R) Core(TM) i5-9400F CPU @ 2.90GHz
[codecarbon INFO @ 21:26:03] >>> Tracker's metadata:
[codecarbon INFO @ 21:26:03]   Platform system: Linux-5.19.0-45-generic-x86_64-with-glibc2.35
[codecarbon INFO @ 21:26:03]   Python version: 3.10.8
[codecarbon INFO @ 21:26:03]   Available RAM : 4.935 GB
[codecarbon INFO @ 21:26:03]   CPU count: 4
[codecarbon INFO @ 21:26:03]   CPU model: Intel(R) Core(TM) i5-9400F CPU @ 2.90GHz
[codecarbon INFO @ 21:26:03]   GPU count: None
[codecarbon INFO @ 21:26:03]   GPU model: None


Fitting 5 folds for each of 10 candidates, totalling 50 fits


[codecarbon INFO @ 21:26:22] Energy consumed for RAM : 0.000000 kWh. RAM Power : 0.08400964736938477 W
[codecarbon INFO @ 21:26:22] Energy consumed for all CPUs : 0.000136 kWh. All CPUs Power : 32.5 W
[codecarbon INFO @ 21:26:22] 0.000136 kWh of electricity used since the begining.
[codecarbon INFO @ 21:26:37] Energy consumed for RAM : 0.000001 kWh. RAM Power : 0.0841670036315918 W
[codecarbon INFO @ 21:26:37] Energy consumed for all CPUs : 0.000271 kWh. All CPUs Power : 32.5 W
[codecarbon INFO @ 21:26:37] 0.000272 kWh of electricity used since the begining.
[codecarbon INFO @ 21:26:52] Energy consumed for RAM : 0.000001 kWh. RAM Power : 0.08432149887084961 W
[codecarbon INFO @ 21:26:52] Energy consumed for all CPUs : 0.000408 kWh. All CPUs Power : 32.5 W
[codecarbon INFO @ 21:26:52] 0.000409 kWh of electricity used since the begining.
[codecarbon INFO @ 21:27:07] Energy consumed for RAM : 0.000001 kWh. RAM Power : 0.0843343734741211 W
[codecarbon INFO @ 21:27:07] Energy consumed for a

Best estimator RF:  RandomForestClassifier(bootstrap=False, max_depth=90, max_features=3,
                       min_samples_leaf=4, min_samples_split=8)
Precision:  1.0
Recall:  0.9982993197278912
F1:  0.9991489361702128


[codecarbon INFO @ 21:27:16] CPU Model on constant consumption mode: Intel(R) Core(TM) i5-9400F CPU @ 2.90GHz
[codecarbon INFO @ 21:27:16] >>> Tracker's metadata:
[codecarbon INFO @ 21:27:16]   Platform system: Linux-5.19.0-45-generic-x86_64-with-glibc2.35
[codecarbon INFO @ 21:27:16]   Python version: 3.10.8
[codecarbon INFO @ 21:27:16]   Available RAM : 4.935 GB
[codecarbon INFO @ 21:27:16]   CPU count: 4
[codecarbon INFO @ 21:27:16]   CPU model: Intel(R) Core(TM) i5-9400F CPU @ 2.90GHz
[codecarbon INFO @ 21:27:16]   GPU count: None
[codecarbon INFO @ 21:27:16]   GPU model: None


Fitting 5 folds for each of 25 candidates, totalling 125 fits


[codecarbon INFO @ 21:27:34] Energy consumed for RAM : 0.000000 kWh. RAM Power : 0.0848093032836914 W
[codecarbon INFO @ 21:27:34] Energy consumed for all CPUs : 0.000136 kWh. All CPUs Power : 32.5 W
[codecarbon INFO @ 21:27:34] 0.000136 kWh of electricity used since the begining.


In [None]:
df_benchmarking

## Save benchmarking's DataFrame in csv file

In [None]:
preprocess.save_in_csv_file(df_benchmarking, 'benchmarking.csv')

## Graphics

In [None]:
graphics.plot_models_energy_consumed(df_benchmarking)

In [None]:
graphics.plot_models_evaluation_metrics(df_benchmarking)