In [1]:
import pandas as pd
import numpy as np

import mlflow

In [2]:
filepath = '../data/power-plant/power_plant.csv'
df_master = pd.read_csv(filepath, index_col=None)

In [3]:
df_master.head()

Unnamed: 0,AT,V,AP,RH,PE
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,59.08,444.37
2,5.11,39.4,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.5,1009.23,96.62,473.9


In [4]:
df = df_master.copy()
df.shape

(47840, 5)

## Add column "device_id" for an IoT device Id

In [5]:
df['device_id'] = [np.random.randint(1, 5) for _ in range(df.shape[0])]

In [6]:
df.head()

Unnamed: 0,AT,V,AP,RH,PE,device_id
0,14.96,41.76,1024.07,73.17,463.26,3
1,25.18,62.96,1020.04,59.08,444.37,1
2,5.11,39.4,1012.16,92.14,488.56,1
3,20.86,57.32,1010.24,76.64,446.48,4
4,10.82,37.5,1009.23,96.62,473.9,4


## Model training

In [7]:
mlflow.set_experiment('power_plant_experiment')
mlflow.autolog()

INFO: 'power_plant_experiment' does not exist. Creating a new experiment


In [15]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [18]:
def train_model(device_df: pd.DataFrame) -> float:
    y = device_df['PE']
    X = device_df[['AT', 'V', 'AP', 'RH']]
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = RandomForestRegressor(max_depth=5, random_state=0)
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    metrics = {'mse': rmse, 'mae': mae, 'r2': r2}
    
    return metrics

In [19]:
for i in range(1, 5):
    print(train_model(df[df['device_id']==i]))

{'mse': 4.159620592572173, 'mae': 3.1735338032940072, 'r2': 0.9420741626871578}
{'mse': 4.227831010426759, 'mae': 3.180174037125359, 'r2': 0.9381556797044399}
{'mse': 4.287467717865279, 'mae': 3.2632089788029144, 'r2': 0.9372554958338691}
{'mse': 4.310140215140643, 'mae': 3.2235743185378807, 'r2': 0.9323840835926536}
