In [151]:
import pandas as pd
import numpy as np
import configparser
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import FunctionTransformer

In [152]:
dataset = pd.read_csv('../data/raw/smartgrid.csv')
dataset.head()

Unnamed: 0,tau1,tau2,tau3,tau4,p1,p2,p3,p4,g1,g2,g3,g4,stab,stabf
0,2.95906,3.079885,8.381025,9.780754,3.763085,-0.782604,-1.257395,-1.723086,0.650456,0.859578,0.887445,0.958034,0.055347,unstable
1,9.304097,4.902524,3.047541,1.369357,5.067812,-1.940058,-1.872742,-1.255012,0.413441,0.862414,0.562139,0.78176,-0.005957,stable
2,8.971707,8.848428,3.046479,1.214518,3.405158,-1.207456,-1.27721,-0.920492,0.163041,0.766689,0.839444,0.109853,0.003471,unstable
3,0.716415,7.6696,4.486641,2.340563,3.963791,-1.027473,-1.938944,-0.997374,0.446209,0.976744,0.929381,0.362718,0.028871,unstable
4,3.134112,7.608772,4.943759,9.857573,3.525811,-1.125531,-1.845975,-0.554305,0.79711,0.45545,0.656947,0.820923,0.04986,unstable


In [153]:
config = configparser.ConfigParser()
config.read('../pipeline.cfg')

['../pipeline.cfg']

In [154]:
x_features = dataset.drop(labels=list(config.get('GENERAL', 'VARS_TO_DROP').split(', ')), axis=1)
y_target = dataset[config.get('GENERAL', 'TARGET')]

In [155]:
x_train, x_test, y_train, y_test = train_test_split(x_features, y_target, test_size=0.2, shuffle=True, random_state=2025)

In [156]:
def transform_stabf(y):
    return np.where(y == 'stable', 1, 0)

In [157]:
stabf_transformer = FunctionTransformer(transform_stabf, validate=False)

In [158]:
y_train_df = y_train.to_frame()
y_test_df = y_test.to_frame()

In [159]:
smartgrid_pipeline = Pipeline([
    ('stabf_transform', stabf_transformer),
    ('scaler', StandardScaler()) ])

In [160]:
smartgrid_pipeline.fit_transform(y_train_df)

array([[-0.75437984],
       [-0.75437984],
       [ 1.32559215],
       ...,
       [-0.75437984],
       [ 1.32559215],
       [-0.75437984]])

In [161]:
y_train_transformed = smartgrid_pipeline.fit_transform(y_train_df)
df_features_processd = pd.DataFrame(x_train, columns=x_train.columns)
df_features_processd['stabf'] = y_train_transformed
df_features_processd.to_csv('../data/processed/feautures_for_models.csv', index=False)

In [162]:
y_test_transformed = smartgrid_pipeline.transform(y_test_df)
df_features_processd_test = pd.DataFrame(x_test, columns=x_test.columns)
df_features_processd_test['stabf'] = y_test_transformed
df_features_processd_test.to_csv('../data/processed/test_dataset.csv', index=False)

In [163]:
import pickle

with open('../artifacts/pipeline.pkl','wb') as f:
    pickle.dump(smartgrid_pipeline, f)