# DE pipeline

In [None]:
# Create table in DB to store the Data. This process will be done by the DE team.

import sqlalchemy
from loguru import logger
import pandas as pd
from sklearn import datasets
import numpy as np
disk_engine = sqlalchemy.create_engine('sqlite:///data_db.db', echo=False)

try:
    with disk_engine.connect() as con:
        con.execute("SELECT 1")
    logger.info('Engine is valid')
except Exception as e:
    logger.info(f'Engine invalid: {str(e)}')

In [None]:
iris = datasets.load_iris()
df_iris = pd.DataFrame(data = np.c_[iris['data'], iris['target']],
            columns = iris['feature_names'] + ['target'])
df_iris.head()

In [None]:
df_iris.columns = df_iris.columns.str.replace(" ","_").str.replace("_\(cm\)","",regex=True)
df_iris['target'] = df_iris['target'].astype(int)
df_iris.head()

In [None]:
df_iris.shape

In [None]:
df_iris.to_sql(name='iris', 
            con = disk_engine,
            if_exists='replace',
            index=False,
            dtype={'sepal_length': sqlalchemy.types.Float(precision=4, asdecimal=True),
                   'sepal_width': sqlalchemy.types.Float(precision=4, asdecimal=True),
                   'petal_length': sqlalchemy.types.Float(precision=4, asdecimal=True),
                   'petal_width': sqlalchemy.types.Float(precision=4, asdecimal=True),
                   'target': sqlalchemy.types.INTEGER()},
            chunksize=100,
            method="multi")

# ML Pipeline

In [None]:
# This is the modelling process which we will run in a pipeline whenever there is a data refresh happing or due to manual trigger.

import sqlalchemy
from loguru import logger
import pandas as pd
import json
import uuid
from sklearn.linear_model import LogisticRegression

def logistic_regression_to_json(lrmodel, file=None):
    if file is not None:
        serialize = lambda x: json.dump(x, file)
    else:
        serialize = json.dumps
    data = {}
    data['init_params'] = lrmodel.get_params()
    data['model_params'] = mp = {}
    for p in ('coef_', 'intercept_','classes_', 'n_iter_'):
        mp[p] = getattr(lrmodel, p).tolist()
    return serialize(data)


disk_engine = sqlalchemy.create_engine('sqlite:///data_db.db', echo=False)

try:
    with disk_engine.connect() as con:
        con.execute("SELECT 1")
    logger.info('engine is valid')
except Exception as e:
    logger.info(f'Engine invalid: {str(e)}')


iris_df_from_db = pd.read_sql_query('SELECT * FROM iris', disk_engine)
iris_df_from_db

In [None]:
X = iris_df_from_db.drop(columns = ['target'])
y = iris_df_from_db[['target']]

In [None]:
X

In [None]:
y

In [None]:
lr_model = LogisticRegression()
lr_model.fit(X, y)
model_response = logistic_regression_to_json(lr_model)
model_id = str(uuid.uuid4())
logger.info(f"Storing results for : {model_id}.")
parameters_df = pd.DataFrame({'model_id':[model_id], "response":[model_response] })
parameters_df

In [None]:
parameters_df['model_id'] = parameters_df['model_id'].astype(str)
parameters_df.dtypes

In [None]:
parameters_df.to_sql(name='model_parameters',
                    con= disk_engine,
                    if_exists='replace',
                    index=False,
                    dtype={'response': sqlalchemy.types.JSON(), 
                           'model_id': sqlalchemy.types.String()},
                    chunksize=100,
                    method="multi")

# Model serving

In [None]:
import sqlalchemy
from loguru import logger
import pandas as pd
import json
import uuid
from sklearn.linear_model import LogisticRegression

def logistic_regression_from_json(jstring):
    data = json.loads(jstring)
    model = LogisticRegression(**data['init_params'])
    for name, p in data['model_params'].items():
        setattr(model, name, np.array(p))
    return model

disk_engine = sqlalchemy.create_engine('sqlite:///data_db.db', echo=False)

try:
    with disk_engine.connect() as con:
        con.execute("SELECT 1")
    logger.info('Engine is valid.')
except Exception as e:
    logger.info(f'Engine invalid: {str(e)}')

iris_df_from_db = pd.read_sql_query('SELECT * FROM iris', disk_engine)
X = iris_df_from_db.drop(columns = ['target'])
X


In [None]:
model_parameters_from_db = pd.read_sql_query('SELECT * FROM model_parameters', disk_engine)
d = eval((model_parameters_from_db[model_parameters_from_db['model_id'] == model_id]['response'][0]))
model_parameters_json = d.replace("'", "\"")
model_parameters_json

In [None]:
model_object = logistic_regression_from_json(model_parameters_json)
model_object.predict(X)