## ORM

In [1]:
from sqlalchemy import create_engine
from sqlalchemy.engine.base import Engine
from sqlalchemy_utils import database_exists, create_database

from sqlalchemy.orm import declarative_base, Session, sessionmaker
from sqlalchemy import Column, Integer, String, TypeDecorator
# from sqlalchemy.types import Column, String, TypeDecorator
# from sqlalchemy.ext.declarative import declarative_base


class HexByteString(TypeDecorator):
    """  
        Class to store model weights in postgress
    """

    impl = String

    def process_bind_param(self, value, dialect):
        if not isinstance(value, bytes):
            raise TypeError("HexByteString columns support only bytes values.")
        return value.hex()

    def process_result_value(self, value, dialect):
        return bytes.fromhex(value) if value else None


# Base = declarative_base()
# class MyModel(Base):
#     data = Column(HexByteString)

import json

from typing import Dict, Any, List
from functools import lru_cache

In [2]:
## Connect to db
def connec_to_db() -> Engine:
    """
        Connect to postgress database
    """
    #     # ToDo: Add reading variables from linux 
    #     import os
    #     print(os.environ["test1"])
    
    postgress_url = "172.19.0.2"
    postgress_password = "password"
    postgress_user = "postgres"
    postgress_db = "test"
    
    engine = create_engine(f'postgresql+psycopg2://{postgress_user}:'+\
                           f'{postgress_password}@{postgress_url}/'+
                           f'{postgress_db}'
                          )
    if not database_exists(engine.url):
        create_database(engine.url)

    #print(engine.url)
    return engine

Base = declarative_base()
class ModelSignature(Base):
    """
        ORM for model's signatures
    """
    __tablename__ = "model_signature"

    model_type = Column(String, primary_key=True)
    fit_params_json = Column(String)
    python_library_path = Column(String)
    
    def __repr__(self):
        return "{model_type=%s, fit_params_json=%s, python_library_path=%s}"%(self.model_type, 
                                                      self.fit_params_json, self.python_library_path
                                                     )
    
    def _to_json(self):
        return {"model_type": self.model_type,
                "fit_params_json": json.loads(self.fit_params_json),
                "python_library_path": self.python_library_path
               }

def load_model_signature_to_db():
    engine = connec_to_db()
    Base.metadata.create_all(engine)
    
    Session = sessionmaker(bind=engine)
    session = Session()

    fn_json = "model_signature.json"
    model_signatures_json = json.load(open(fn_json, 'r'))
    for dct in model_signatures_json:
        model_type = dct['model_type']
        fit_params_json = json.dumps(dct['fit_params_json'])
        python_library_path = dct['python_library_path']
        model_signature = ModelSignature(model_type = model_type, 
                                         fit_params_json = fit_params_json,
                                         python_library_path = python_library_path
                                        )
        session.add(model_signature)
    
    session.commit()
    return



class WrongModelType(Exception):
    pass

def get_model_params(model_type: str, session: Session) -> Dict[str, str]:
    """
        Get avaliable params for model_type
    """
    row = session.query(ModelSignature).get(model_type)
    
    if hasattr(row, "fit_params_json"):
        return json.loads(row.fit_params_json)
    else:
        raise WrongModelType(f"There are no model_type={model_type}")

@lru_cache(None)
def get_list_model_signatures(session: Session) -> List[Dict[str, str]]:
    """
        Get list and all avaliables models types with their fit_params_json
    """
    resp = session.query(ModelSignature).all()
    json_shema_list = [line._to_json() for line in resp]
    return json_shema_list


def _change_json_schema(json_shema_list: List[Dict[str, str]]) -> Dict[str, str]:
    """
        Change one json format to more usefull    
    """
    return {line['model_type']: line['fit_params_json'] for line in json_shema_list}

class WrongModelType(Exception):
    pass


def get_model_signature(session: Session, model_type: str) -> Dict[str, str]:
    """
        Get single model_signature
    """
    resp = session.query(ModelSignature).filter_by(model_type=model_type).all()
    if len(resp) == 0:
        raise WrongModelType(f"There is no model_type={model_type}")
    elif len(resp) > 1:
        raise WrongModelType(f"There are several model with these model_type={model_type}")
    else:
        return resp[0]
    
class WrongFitParams(Exception):
    pass

from pydoc import locate
def check_fit_params_json(model_signature: ModelSignature,
                          input_json: Dict[str, Any]) -> None:
    """
        Check that input types are valid
    """
        
    json_shema = model_signature._to_json()['fit_params_json']
    
    for key, value in input_json.items():
        if key not in json_shema:
            raise WrongFitParams(f"Unknown params {key}")

        valid_type = json_shema[key]
        if not isinstance(value, locate(valid_type)):
            raise WrongFitParams(f"{key} = {value} that is not valid type " + \
                                 f"(valid_type is {valid_type}, these type is {str(type(value))}")
    return

In [3]:
# get_model_signature(session, "RandomForestClassifier")

In [4]:
# from sklearn.linear_model.LogisticRegression

In [5]:
## Удалить таблицу
engine = connec_to_db()
ModelSignature.__table__.drop(engine)

## Записываем все в базу
engine = load_model_signature_to_db()

In [6]:
# Создаем сессию
engine = connec_to_db()
    
Session = sessionmaker(bind=engine)
session = Session()

In [7]:
# Тестируем проверку типов
input_json = {"min_samples_leaf": 1}
model_signature = get_model_signature(session = session, 
                                  model_type = "RandomForestClassifier")
    
check_fit_params_json(model_signature = model_signature,
                      input_json = input_json
                     )

In [8]:
# model_signature._to_json()

In [9]:
model_signature = get_model_signature(session = session, 
                                  model_type = "RandomForestClassifier")
    
check_fit_params_json(model_signature = model_signature,
                      input_json = {}
                     )

In [10]:

# elif len(resp)
# resp._to_json()

In [11]:
# session.query(ModelSignature).filter_by(model_type="RandomForestClassifier").first()._to_json()['python_library_path']

### FastApi

In [12]:
#exceptions.py
class WrongInputData(Exception):
    """
        Error at input_data_json
    """
    pass

# utils.py
def __json_to_dataframe(json_input):
    """
        Change input json to 
    """
    try:
        data = pd.read_json(json_input)
        #ToDo: Check that types are not objects or string
        # These will cause error with hight probability
        return data
    except Exception as e:
        raise WrongInputData(e)

def import_sklearn_model_class(python_library_path):
    """
        Интроспекция для загрузки модуля sklearn
    """
    from_str = '.'.join(python_library_path.split('.')[:2])
    res = __import__(from_str)
    res = getattr(res, python_library_path.split('.')[1])
    res = getattr(res, python_library_path.split('.')[2])
    return res

import pandas as pd
def fit_model(model_name: str, 
              model_type: str,
              fit_params_json: str, 
              input_data_json: str,
              target_column: str = "y",
              session: Session = session
             ) -> None:
    """
        fit model
    """
    data  = __json_to_dataframe(input_data_json)
    if not target_column in data:
        raise WrongInputData(f'There are no "{target_column}" at input_data_json')
    
    model_signature = get_model_signature(session = session, 
                                      model_type = model_type
                                     )
    python_library_path   = model_signature.python_library_path

    check_fit_params_json(model_signature = model_signature,
                          input_json = fit_params_json
                         )
    
#     print(python_library_path)
#     print(import_sklearn_model_class(python_library_path))
    model_class = import_sklearn_model_class(python_library_path)
    model = model_class(**fit_params_json)
    
    features = list(data.keys())
    features.remove(target_column)

    model.fit(X = data[features], 
              y = data[target_column]
             )
    
    return model
# 

In [13]:
# Тестируем создание модели
# ToDO: Переписать сигнатуры
model = fit_model(model_name = "test123",
          model_type = "RandomForestClassifier",
          fit_params_json = {},
          input_data_json = pd.read_csv("fastapi_microservice/datasets/iris/data.csv").to_json()
         )

In [14]:
# model_class(n_estimators=0.0, )

In [15]:
# model_type
# model_library
# model_bin
# model_

# model_type = ""
# model_name
# fit_params_json
# python_library_path

In [25]:
import pickle
from io import BytesIO

Base = declarative_base()
class ModelInstance(Base):
    """
        ORM for model's instances
    """
    __tablename__ = "model_instance"

    model_name = Column(String, primary_key=True)
    model_type = Column(String)
    fit_params_json = Column(String)
    python_library_path = Column(String)
    model_bin = Column(HexByteString)
    features  = Column(String)
    target_column = Column(String)
    
    def __repr__(self):
        return f"model_name={self.model_name}\n" + \
               f"model_type={self.model_type}" + \
               f"fit_params_json={self.fit_params_json}" + \
               f"python_library_path={self.python_library_path}"
    
    def _get_features(self):
        return json.loads(self.features)
    
    def fit(self, 
            data: pd.DataFrame, 
            target_column: str = 'y'
           ) -> None:
        model_class = self._import_sklearn_model_class()
        fit_params = json.loads(self.fit_params_json)
        model = model_class(**fit_params)

        self.features = list(data.keys())
        self.features.remove(target_column)
        self.features = json.dumps(self.features)
        self.target_column = target_column
        
        model.fit(X = data[self._get_features()], 
                  y = data[self.target_column]
                 )
        
        self.model_bin = ModelInstance._model_to_buff(model)
        
    def predict(self, data: pd.DataFrame) -> Dict[Any, Any]:
        model = ModelInstance._buff_to_model(model.bin)
#         print(self._get_features())
#         data['predict'] = model.predict(X = data[self._get_features()])
#         return data['predict'].to_dict()
    
    def _import_sklearn_model_class(self):
        """
            Интроспекция для загрузки модуля sklearn
        """
        from_str = '.'.join(self.python_library_path.split('.')[:2])
        res = __import__(from_str)
        res = getattr(res, self.python_library_path.split('.')[1])
        res = getattr(res, self.python_library_path.split('.')[2])
        return res
    
    @classmethod
    def _model_to_buff(cls, model_python) -> bytes:
        buffer = BytesIO()
        pickle.dump(model_python, buffer)
        buffer.seek(0)
        return buffer.read()
    
    @classmethod
    def _buff_to_model(csl, model_bin) -> Any:
        model_python = pickle.loads(model_bin)
        return model_python

    def get_model(self) -> Any:
        return ModelInstance._buff_to_model(self.model_bin)

In [None]:
## Удалить таблицу
engine = connec_to_db()
ModelInstance.__table__.drop(engine)

In [None]:
Base.metadata.create_all(engine)

In [22]:
Session = sessionmaker(bind=engine)
session = Session()

In [23]:
model_instance = ModelInstance(model_name  = "test_real",
                               model_type  = "RandomForestClassifier",
                               fit_params_json = "{}",
                               python_library_path = "sklearn.ensemble.RandomForestClassifier",
#                                model_bin = model_to_buff(model)
                              )

data = pd.read_csv('fastapi_microservice/datasets/iris/data.csv')
model_instance.fit(data = data, target_column = "y")
session.add(model_instance)
session.commit()

In [24]:
# o = session.query(ModelInstance).get("test_real")
session.query(ModelInstance).get("test_real").get_model().predict(data)
# predict(
#     data[o._get_features()]
# )

Feature names unseen at fit time:
- y
Feature names must be in the same order as they were in fit.



ValueError: X has 5 features, but RandomForestClassifier is expecting 4 features as input.

In [44]:
session.query(ModelInstance).get("test_real0").get_model().predict(data)

Feature names unseen at fit time:
- y
Feature names must be in the same order as they were in fit.



ValueError: X has 5 features, but RandomForestClassifier is expecting 4 features as input.

In [102]:
# data = pd.read_csv("fastapi_microservice/datasets/iris/data.csv")

In [103]:
# data['p'] = 0

In [104]:
# 0

In [79]:
# import pickle
# from io import BytesIO
# def model_to_buff(model_python):
#     buffer = BytesIO()
#     pickle.dump(model_python, buffer)
#     buffer.seek(0)
#     return buffer.read()

# def buff_to_model(model_bin):
#     model_python = pickle.loads(model_bin)
#     return model_python

In [112]:
model_instance = ModelInstance(model_name  = "test",
              model_type  = "test",
              fit_params_json = "test",
              python_library_path = "test",
              model_bin = model_to_buff(model)
             )

session.add(model_instance)
session.commit()

In [114]:
data = pd.read_csv('fastapi_microservice/datasets/iris/data.csv')

In [119]:
model_instance.python_library_path

'sklearn.ensemble.RandomForestClassifier'

In [124]:
model_instance = ModelInstance(model_name  = "test_real",
                               model_type  = "RandomForestClassifier",
                               fit_params_json = "{}",
                               python_library_path = "sklearn.ensemble.RandomForestClassifier",
#                                model_bin = model_to_buff(model)
                              )

data = pd.read_csv('fastapi_microservice/datasets/iris/data.csv')
model_instance.fit(data = data, target_column = "y")
# session.add(model_instance)
# session.commit()

KeyError: '["0", "1", "2", "3"]'

In [2]:
session.query(ModelInstance).

NameError: name 'session' is not defined

In [117]:
session.query(ModelInstance).get("test").get_model()

RandomForestClassifier()

In [90]:
buff_to_model(session.query(ModelInstance).get("test").model_bin).predict(pd.read_csv("fastapi_microservice/datasets/iris/data.csv").drop("y", axis = 1))

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [None]:
model_instance

In [66]:
# model.estimators_

In [67]:
# buff_to_model(model_to_buff(model)).estimators_

In [None]:
model_bin = get_model_bin(model)

In [46]:
pickle.loads(model_bin.read()).predict(pd.read_csv("fastapi_microservice/datasets/iris/data.csv").drop("y", axis = 1))

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [47]:
0

0

In [45]:
model_bin = get_model_bin(model)

In [36]:
model_bin

<_io.BytesIO at 0x7f3bc4192090>

model_name=test
model_type=testfit_params_json=testpython_library_path=test

In [101]:
# model.predict(data[features])

In [81]:
model.fit(data, data['y'], n_estimators = 100)

TypeError: fit() got an unexpected keyword argument 'n_estimators'

In [73]:
fit_model(model_name = "test123",
          model_type = "RandomForestClassifier",
          fit_params_json = {},
          input_data_json = pd.read_csv("fastapi_microservice/datasets/iris/data.csv").to_json()
         )

TypeError: fit() got an unexpected keyword argument 'n_estimators'

In [54]:
data = pd.read_csv("fastapi_microservice/datasets/iris/data.csv")
# rename(columns = {"y": "target"})
# to_csv("data.csv")

In [62]:
data

Unnamed: 0,0,1,2,3,y
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [12]:
get_model_signature(session = session, 
                                     model_type = "RandomForestClassifier")

{'n_estimators': 'int',
 'max_depth': 'int',
 'random_state': 'int',
 'min_samples_leaf': 'int',
 'learning_rate': 'int'}

In [10]:
get_list_model_signatures(session)

[{'model_type': 'RandomForestClassifier',
  'fit_params_json': {'n_estimators': 'int',
   'max_depth': 'int',
   'random_state': 'int',
   'min_samples_leaf': 'int',
   'learning_rate': 'int'},
  'python_library_path': 'sklearn.ensemble.RandomForestClassifier'},
 {'model_type': 'LogisticRegression',
  'fit_params_json': {'n_estimators': 'int',
   'max_depth': 'int',
   'random_state': 'int',
   'min_samples_leaf': 'int',
   'learning_rate': 'int'},
  'python_library_path': 'sklearn.linear_model.LogisticRegression'},
 {'model_type': 'GradientBoostingClassifier',
  'fit_params_json': {'penalty': 'str',
   'max_depth': 'int',
   'random_state': 'int',
   'min_samples_leaf': 'int'},
  'python_library_path': 'sklearn.ensemble.GradientBoostingClassifier'}]

In [13]:
def import_sklearn_model_class(session, model_type):
    """
        Интроспекция для загрузки модуля sklearn
    """
    from_str = '.'.join(model_type.split('.')[:2])
    res = __import__(from_str)
    res = getattr(res, model_type.split('.')[1])
    res = getattr(res, model_type.split('.')[2])
    return res

In [15]:
model_a = import_sklearn_model_class("sklearn.ensemble.RandomForestClassifier")

In [None]:
model_a()

In [4]:
from sklearn.ensemble import RandomForestClassifier

In [5]:
# type(input_json['learning_rate'])

In [47]:
json_shema = get_model_signatures(session)

In [13]:
input_json = {"learning_rate": 1}

In [55]:
json_shema

[{'model_type': 'RandomForestClassifier',
  'fit_params_json': {'n_estimators': 'int',
   'max_depth': 'int',
   'random_state': 'int',
   'min_samples_leaf': 'int',
   'learning_rate': 'float'}},
 {'model_type': 'LogisticRegression',
  'fit_params_json': {'n_estimators': 'int',
   'max_depth': 'int',
   'random_state': 'int',
   'min_samples_leaf': 'int',
   'learning_rate': 'float'}},
 {'model_type': 'GradientBoostingClassifier',
  'fit_params_json': {'penalty': 'str',
   'max_depth': 'int',
   'random_state': 'int',
   'min_samples_leaf': 'int'}}]

In [45]:
json_shema[0]['fit_params_json']

'{"n_estimators": "int", "max_depth": "int", "random_state": "int", "min_samples_leaf": "int", "learning_rate": "float"}'

{'RandomForestClassifier': {'n_estimators': 'int',
  'max_depth': 'int',
  'random_state': 'int',
  'min_samples_leaf': 'int',
  'learning_rate': 'float'},
 'LogisticRegression': {'n_estimators': 'int',
  'max_depth': 'int',
  'random_state': 'int',
  'min_samples_leaf': 'int',
  'learning_rate': 'float'},
 'GradientBoostingClassifier': {'penalty': 'str',
  'max_depth': 'int',
  'random_state': 'int',
  'min_samples_leaf': 'int'}}

In [None]:
 res = {}
    for line in resp:
        dct = line._to_json()
        res[dct['model_type']] = dct['fit_params_json']
    
    return res


In [41]:
class WrongFitParams(Exception):
    pass

from pydoc import locate
def check_fit_params_json(session: Session,
                          model_type: str, 
                          input_json: Dict[str, Any]) -> None:
    """
        Check that input types are valid
    """
    
    all_json_shema = get_model_signatures(session)
    
    for key, value in input_json.items():
        if key not in json_shema:
            raise WrongFitParams(f"Unknown params {key}")

        valid_type = json_shema[key]
        if not isinstance(value, locate(valid_type)):
            raise WrongFitParams(f"{key} = {value} that is not valid type {valid_type}")
    return

check_fit_params_json(json_shema = json_shema['RandomForestClassifier'],
                      input_json = input_json
                     )

{"n_estimators": "int", "max_depth": "int", "random_state": "int", "min_samples_leaf": "int", "learning_rate": "float"}


TypeError: string indices must be integers

In [21]:
json_shema['RandomForestClassifier']

TypeError: list indices must be integers or slices, not str

In [6]:
get_model_params("RandomForestClassifier", session)

{'n_estimators': 'int',
 'max_depth': 'int',
 'random_state': 'int',
 'min_samples_leaf': 'int',
 'learning_rate': 'float'}

In [7]:
get_model_params("RandomForestClassifier097", session)

WrongModelType: There are no model_type=RandomForestClassifier097

#### PyTest

In [10]:
import pytest


@pytest.fixture(scope="class")
def db_class(request):
    class DummyDB:
        pass

    # set a class attribute on the invoking test context
    request.cls.db = DummyDB()
    
# content of test_unittest_db.py

import unittest
import pytest


@pytest.mark.usefixtures("db_class")
class MyTest(unittest.TestCase):
    def test_method1(self):
        assert hasattr(self, "db")
        assert 0, self.db  # fail for demo purposes

    def test_method2(self):
        assert 0, self.db  # fail for demo purposes



In [None]:
# ModelSignature.__table__.drop(engine)

In [65]:
engine = connec_to_db()
Base.metadata.create_all(engine)

postgresql+psycopg2://postgres:password@172.19.0.2/test


In [66]:
def load_model_signature_to_db():
    engine = connec_to_db()
    Base.metadata.create_all(engine)
    
    Session = sessionmaker(bind=engine)
    session = Session()

    fn_json = "model_signature.json"
    model_signatures_json = json.load(open(fn_json, 'r'))
    for dct in model_signatures_json:
        model_type = dct['model_type']
        fit_params_json = json.dumps(dct['fit_params_json'])
        model_signature = ModelSignature(model_type = model_type, 
                                         fit_params_json = fit_params_json
                                        )
        session.add(model_signature)
    
    session.commit()
    res = session.query(ModelSignature).filter_by(model_type="RandomForestClassifier").first()
    return res
#     session.commit()

In [67]:
    engine = connec_to_db()
    Base.metadata.create_all(engine)
    
    Session = sessionmaker(bind=engine)
    session = Session()

    fn_json = "model_signature.json"
    model_signatures_json = json.load(open(fn_json, 'r'))
    for dct in model_signatures_json:
        model_type = dct['model_type']
        fit_params_json = json.dumps(dct['fit_params_json'])
        model_signature = ModelSignature(model_type = model_type, 
                                         fit_params_json = fit_params_json
                                        )
        session.add(model_signature)
    
    session.commit()
    res = session.query(ModelSignature).filter_by(model_type="RandomForestClassifier").first()
    res

postgresql+psycopg2://postgres:password@172.19.0.2/test


IntegrityError: (psycopg2.errors.UniqueViolation) duplicate key value violates unique constraint "ModelSignature_pkey"
DETAIL:  Key (model_type)=(RandomForestClassifier) already exists.

[SQL: INSERT INTO "ModelSignature" (model_type, fit_params_json) VALUES (%(model_type)s, %(fit_params_json)s)]
[parameters: ({'model_type': 'RandomForestClassifier', 'fit_params_json': '{"n_estimators": "int", "max_depth": "int", "random_state": "int", "min_samples_leaf": "int", "learning_rate": "float"}'}, {'model_type': 'LogisticRegression', 'fit_params_json': '{"n_estimators": "int", "max_depth": "int", "random_state": "int", "min_samples_leaf": "int", "learning_rate": "float"}'}, {'model_type': 'GradientBoostingClassifier', 'fit_params_json': '{"penalty": "str", "max_depth": "int", "random_state": "int", "min_samples_leaf": "int"}'})]
(Background on this error at: https://sqlalche.me/e/14/gkpj)

In [68]:
load_model_signature_to_db()

postgresql+psycopg2://postgres:password@172.19.0.2/test


IntegrityError: (psycopg2.errors.UniqueViolation) duplicate key value violates unique constraint "ModelSignature_pkey"
DETAIL:  Key (model_type)=(RandomForestClassifier) already exists.

[SQL: INSERT INTO "ModelSignature" (model_type, fit_params_json) VALUES (%(model_type)s, %(fit_params_json)s)]
[parameters: ({'model_type': 'RandomForestClassifier', 'fit_params_json': '{"n_estimators": "int", "max_depth": "int", "random_state": "int", "min_samples_leaf": "int", "learning_rate": "float"}'}, {'model_type': 'LogisticRegression', 'fit_params_json': '{"n_estimators": "int", "max_depth": "int", "random_state": "int", "min_samples_leaf": "int", "learning_rate": "float"}'}, {'model_type': 'GradientBoostingClassifier', 'fit_params_json': '{"penalty": "str", "max_depth": "int", "random_state": "int", "min_samples_leaf": "int"}'})]
(Background on this error at: https://sqlalche.me/e/14/gkpj)

In [69]:
engine = connec_to_db()
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()

res = session.query(ModelSignature).filter_by(model_type="RandomForestClassifier").first()
res

postgresql+psycopg2://postgres:password@172.19.0.2/test


{model_type=RandomForestClassifier, fit_params_json={"n_estimators": "int", "max_depth": "int", "random_state": "int", "min_samples_leaf": "int", "learning_rate": "float"}}

In [70]:
# type(session)
# Session

In [89]:
from typing import Dict, Any, List
from functools import lru_cache

class WrongModelType(Exception):
    pass

def get_model_params(model_type: str, session: Session) -> Dict[str, str]:
    """
        Get avaliable params for model_type
    """
    row = session.query(ModelSignature).get(model_type)
    
    if hasattr(row, "fit_params_json"):
        return json.loads(fit_params_json)
    else:
        raise WrongModelType(f"There are no model_type={model_type}")

@lru_cache(None)
def get_model_signatures(session: Session) -> List[Dict[str, Any]]:
    """
        Get list and all avaliables models params
    """
    resp = session.query(ModelSignature).all()
    return [line._to_json() for line in resp]

In [90]:
get_model_params("RandomForestClassifier", session)

{'penalty': 'str',
 'max_depth': 'int',
 'random_state': 'int',
 'min_samples_leaf': 'int'}

In [91]:
session.query(ModelSignature).all()[0]

{model_type=RandomForestClassifier, fit_params_json={"n_estimators": "int", "max_depth": "int", "random_state": "int", "min_samples_leaf": "int", "learning_rate": "float"}}

In [92]:
get_model_signatures(session)

[{'model_type': 'RandomForestClassifier',
  'fit_params_json': '{"n_estimators": "int", "max_depth": "int", "random_state": "int", "min_samples_leaf": "int", "learning_rate": "float"}'},
 {'model_type': 'LogisticRegression',
  'fit_params_json': '{"n_estimators": "int", "max_depth": "int", "random_state": "int", "min_samples_leaf": "int", "learning_rate": "float"}'},
 {'model_type': 'GradientBoostingClassifier',
  'fit_params_json': '{"penalty": "str", "max_depth": "int", "random_state": "int", "min_samples_leaf": "int"}'}]

In [32]:
session.query(ModelSignature).get("RandomForestClassifier")

['_sa_instance_state',
 'model_type',
 'fit_params_json',
 '__module__',
 '__doc__',
 '__tablename__',
 '__repr__',
 '_sa_class_manager',
 '__table__',
 '__init__',
 '__mapper__',
 'registry',
 'metadata',
 '__abstract__',
 '__dict__',
 '__weakref__',
 '_sa_registry',
 '__hash__',
 '__str__',
 '__getattribute__',
 '__setattr__',
 '__delattr__',
 '__lt__',
 '__le__',
 '__eq__',
 '__ne__',
 '__gt__',
 '__ge__',
 '__new__',
 '__reduce_ex__',
 '__reduce__',
 '__subclasshook__',
 '__init_subclass__',
 '__format__',
 '__sizeof__',
 '__dir__',
 '__class__']

In [20]:
res

In [None]:
 our_user = (session.query(ModelSignature).filter_by(model_type="ed").first()) 
our_user

In [None]:
session.query(ModelSignature)

In [25]:
from sqlalchemy.orm import sessionmaker
Session = sessionmaker(bind=engine)
session = Session()

model_signature = ModelSignature(model_type="ed", fit_params_json="Ed Jones")
session.add(model_signature)

our_user = (session.query(ModelSignature).filter_by(model_type="ed").first()) 
our_user

<User(model_type='ed', fit_params_json='Ed Jones')>

In [37]:
from typing import Union
import json
possible_models = [{"model_type": "RandomForestClassifier",
                    "fit_params_json": {"n_estimators": "int",
                                        "max_depth": "int",
                                        "random_state": "int",
                                        "min_samples_leaf": "int",
                                        "learning_rate": "float"
                                       }
                   },
                   {"model_type": "LogisticRegression",
                    "fit_params_json": {"n_estimators": "int",
                                        "max_depth": "int",
                                        "random_state": "int",
                                        "min_samples_leaf": "int",
                                        "learning_rate": "float"
                                       }
                   },
                   {"model_type": "GradientBoostingClassifier",
                   "fit_params_json": {"penalty": "str",
                                       "max_depth": "int",
                                       "random_state": "int",
                                       "min_samples_leaf": "int"
                                       }
                   }
                  ]




In [38]:
json.dump(possible_models, 
          open("model_signature.json", 'w+'))


In [39]:
json.load(open("model_signature.json", 'r'))

[{'model_type': 'RandomForestClassifier',
  'fit_params_json': {'n_estimators': 'int',
   'max_depth': 'int',
   'random_state': 'int',
   'min_samples_leaf': 'int',
   'learning_rate': 'float'}},
 {'model_type': 'LogisticRegression',
  'fit_params_json': {'n_estimators': 'int',
   'max_depth': 'int',
   'random_state': 'int',
   'min_samples_leaf': 'int',
   'learning_rate': 'float'}},
 {'model_type': 'GradientBoostingClassifier',
  'fit_params_json': {'penalty': 'str',
   'max_depth': 'int',
   'random_state': 'int',
   'min_samples_leaf': 'int'}}]

In [30]:
json.load(open("model_signature.json", 'r'))

[{'model_type': 'RandomForestClassifier',
  'fit_params_json': '{"n_estimators": "int", "max_depth": "int", "random_state": "int", "min_samples_leaf": "int", "learning_rate": "float"}'},
 {'model_type': 'LogisticRegression',
  'fit_params_json': '{"n_estimators": "int", "max_depth": "int", "random_state": "int", "min_samples_leaf": "int", "learning_rate": "float"}'},
 {'model_type': 'GradientBoostingClassifier',
  'fit_params_json': '{"penalty": "str", "max_depth": "int", "random_state": "int", "min_samples_leaf": "int"}'}]

postgresql+psycopg2://postgres:password@172.19.0.2/test


In [14]:
type(engine)

sqlalchemy.engine.base.Engine