In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import sklearn.metrics as sm
from sklearn.linear_model import ElasticNet
import joblib

import random
import datetime


In [2]:
df = pd.read_csv('../datasets/energy_consumption.csv')


In [3]:
df['machine_id'].unique()

array(['M3', 'M1', 'M2', 'M4', 'M5'], dtype=object)

In [8]:
dict_model = {}

for machine in df['machine_id'].unique():
    
    df_tmp = df.query(f'machine_id == "{machine}"').copy()
    X = df_tmp.drop(['energy','condition','machine_id'],axis=1).copy()
    y = df_tmp['energy'].copy()
    
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)
    
    model = ElasticNet()
    model.fit(X_train,y_train)
    y_pred = model.predict(X_test)
    score = sm.r2_score(y_test,y_pred)
    dict_model[machine] = {'model':model, 'r2': score}


In [9]:
with open('../list_models.pkl','wb') as f_out:
    joblib.dump(dict_model,f_out)

### Test model on dummy data

In [10]:
starting_date = pd.Timestamp(datetime.datetime.now()).round('H')

In [11]:
dates = pd.date_range(start=starting_date, end=starting_date+pd.Timedelta('7days'),freq='1H')

In [12]:
dict_plan = {}
for m in df['machine_id'].unique():
    select_sample = [random.choice([True, False]) for x in dates]
    dict_plan[m] = df.query(f'machine_id=="{m}"').sample(len(dates)).iloc[select_sample,:]

In [13]:
dict_plan[m]

Unnamed: 0,energy,condition,volume,weight,machine_id,speed
3404,820.7,C000Z,89.0,14.0,M5,12.2
1409,830.1,C000O,90.0,15.0,M5,12.0
3749,741.3,C000E,81.0,15.0,M5,13.4
3256,1304.8,C000F,137.0,19.0,M5,7.7
487,890.2,C000Z,96.0,16.0,M5,11.3
...,...,...,...,...,...,...
451,1177.9,C000W,125.0,22.0,M5,8.5
300,784.8,C000Z,87.0,14.0,M5,12.8
2432,1179.8,C000X,127.0,20.0,M5,8.5
3317,921.2,C000Y,98.0,15.0,M5,10.8
