In [1]:
from typing import Union, List, Type, TypeVar, Generic
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
from fastapi.encoders import jsonable_encoder
from schemas import FpmkCreate
import schemas
import pandas as pd
import json
import numpy as np

In [2]:
import sys

In [3]:
ScalerType = TypeVar("ScalerType", bound=Union[MinMaxScaler, RobustScaler, StandardScaler])

In [24]:
class CustomisedScaler(Generic[ScalerType]):
    def __init__(self, scaler_name: str):
        self.scaler = getattr(sys.modules[__name__], scaler_name)()

    def transform(self, fpmk_train: List[FpmkCreate], fpmk_in: List[FpmkCreate]) -> List[FpmkCreate]:
        fpmk_hist = pd.DataFrame(data=jsonable_encoder(fpmk_train))
        fpmk_hist['fpmk'] = self.scaler.fit(fpmk_hist['fpmk'].to_numpy().reshape(-1, 1))
        fpmk_out = pd.DataFrame(data=jsonable_encoder(fpmk_in))
        fpmk_out['fpmk'] = self.scaler.transform(fpmk_out['fpmk'].to_numpy().reshape(-1, 1))
        res = []
        for item in json.loads(fpmk_out.to_json(orient='records')):
            res.append(FpmkCreate(**item))
        return res

    def inverse_transform(self, fpmk_train: List[FpmkCreate], fpmk_in: List[FpmkCreate]) -> List[FpmkCreate]:
        fpmk_hist = pd.DataFrame(data=jsonable_encoder(fpmk_train))
        fpmk_hist['fpmk'] = self.scaler.fit(fpmk_hist['fpmk'].to_numpy().reshape(-1, 1))
        fpmk_out = pd.DataFrame(data=jsonable_encoder(fpmk_in))
        fpmk_out['fpmk'] = self.scaler.inverse_transform(fpmk_out['fpmk'].to_numpy().reshape(-1, 1))
        res = []
        for item in json.loads(fpmk_out.to_json(orient='records')):
            res.append(FpmkCreate(**item))
        return res

In [5]:
data_in = pd.read_csv('sample/sample.csv')

In [6]:
data_in['fpmk'].to_numpy().reshape(-1,1)

array([[7],
       [5],
       [6],
       [7],
       [4],
       [5],
       [3],
       [2]], dtype=int64)

In [7]:
res = []

In [8]:
for item in json.loads(data_in.to_json(orient='records')):
    res.append(FpmkCreate(**item))

In [25]:
cs = CustomisedScaler('MinMaxScaler')

In [26]:
res

[FpmkCreate(system='RSC', subsystem='DOR', mileage=1000, unit='km', fpmk=7.0, date=datetime.date(2022, 3, 31)),
 FpmkCreate(system='RSC', subsystem='DOR', mileage=1500, unit='km', fpmk=5.0, date=datetime.date(2022, 4, 30)),
 FpmkCreate(system='RSC', subsystem='DOR', mileage=2000, unit='km', fpmk=6.0, date=datetime.date(2022, 5, 31)),
 FpmkCreate(system='RSC', subsystem='DOR', mileage=2300, unit='km', fpmk=7.0, date=datetime.date(2022, 6, 30)),
 FpmkCreate(system='RSC', subsystem='DOR', mileage=2700, unit='km', fpmk=4.0, date=datetime.date(2022, 7, 31)),
 FpmkCreate(system='RSC', subsystem='DOR', mileage=3200, unit='km', fpmk=5.0, date=datetime.date(2022, 8, 31)),
 FpmkCreate(system='RSC', subsystem='DOR', mileage=3400, unit='km', fpmk=3.0, date=datetime.date(2022, 9, 30)),
 FpmkCreate(system='RSC', subsystem='DOR', mileage=4000, unit='km', fpmk=2.0, date=datetime.date(2022, 10, 31))]

In [27]:
t = cs.transform(fpmk_train=res, fpmk_in=res)

In [28]:
cs.inverse_transform(fpmk_train=res, fpmk_in=t)

[FpmkCreate(system='RSC', subsystem='DOR', mileage=1000, unit='km', fpmk=7.0, date=datetime.date(2022, 3, 31)),
 FpmkCreate(system='RSC', subsystem='DOR', mileage=1500, unit='km', fpmk=5.0, date=datetime.date(2022, 4, 30)),
 FpmkCreate(system='RSC', subsystem='DOR', mileage=2000, unit='km', fpmk=6.0, date=datetime.date(2022, 5, 31)),
 FpmkCreate(system='RSC', subsystem='DOR', mileage=2300, unit='km', fpmk=7.0, date=datetime.date(2022, 6, 30)),
 FpmkCreate(system='RSC', subsystem='DOR', mileage=2700, unit='km', fpmk=4.0, date=datetime.date(2022, 7, 31)),
 FpmkCreate(system='RSC', subsystem='DOR', mileage=3200, unit='km', fpmk=5.0, date=datetime.date(2022, 8, 31)),
 FpmkCreate(system='RSC', subsystem='DOR', mileage=3400, unit='km', fpmk=3.0, date=datetime.date(2022, 9, 30)),
 FpmkCreate(system='RSC', subsystem='DOR', mileage=4000, unit='km', fpmk=2.0, date=datetime.date(2022, 10, 31))]

In [24]:
from typing import List, Union

In [27]:
fpmk_1 = schemas.Fpmk(date='2021-03-31', fpmk=12, subsystem='DOR', system='RSC')

ValidationError: 2 validation errors for Fpmk
mileage
  field required (type=value_error.missing)
unit
  field required (type=value_error.missing)

In [None]:
fpmk_2 = schemas.Fpmk(date='2021-04-30', fpmk=13, subsystem='DOR', system='RSC')

In [18]:
fpmk_3 = schemas.Fpmk(date='2021-05-31', fpmk=18, subsystem='DOR', system='RSC')

NameError: name 'schemas' is not defined

In [9]:
fpmk_list = [fpmk_1, fpmk_2, fpmk_3]

In [50]:
import pandas as pd
import json

In [51]:
df = pd.DataFrame(data=jsonable_encoder(fpmk_list))

In [52]:
df['date'] = pd.to_datetime(df['date'])

In [64]:
df['date']

(3,)

In [53]:
res = []
for item in json.loads(df.to_json(orient='records')):
    res.append(schemas.Fpmk(**item))

In [54]:
res

[Fpmk(system='RSC', subsystem='DOR', fpmk=12, date=datetime.date(2021, 3, 31)),
 Fpmk(system='RSC', subsystem='DOR', fpmk=13, date=datetime.date(2021, 4, 30)),
 Fpmk(system='RSC', subsystem='DOR', fpmk=18, date=datetime.date(2021, 5, 31))]

In [59]:
'system' in df.columns

True

In [58]:
for c in df.columns:
    print(c)

system
subsystem
fpmk
date


In [15]:
df.set_index('date', inplace=True)

In [16]:
df = df.asfreq('M')

In [90]:
df['system'].mode().values[0]

'RSC'

In [18]:
df['fpmk'].index.shift(1)[-1]

Timestamp('2021-06-30 00:00:00', freq='M')

In [39]:
res = df['fpmk']

In [40]:
res

date
2021-03-31    12
2021-04-30    13
2021-05-31    18
Freq: M, Name: fpmk, dtype: int64

In [28]:
res = pd.DataFrame(res, columns=['fpmk'])

In [47]:
res

date
2021-03-31    12
2021-04-30    13
2021-05-31    18
Freq: M, Name: fpmk, dtype: int64

In [34]:
res = res.reset_index()

In [82]:
res['sys_cd'] = 'RSC'

In [36]:
res.index.values

array([0, 1, 2], dtype=int64)

In [1]:
import numpy as np

In [25]:
np.log(30)

3.4011973816621555

In [27]:
res.index.values

array(['2021-03-31T00:00:00.000000000', '2021-04-30T00:00:00.000000000',
       '2021-05-31T00:00:00.000000000'], dtype='datetime64[ns]')

In [43]:
[x for x in range(1, 6)]

[1, 2, 3, 4, 5]

In [66]:
1.1**10 * 25000

64843.56150250006

In [85]:
np.arange(3, 6).reshape(-1, 1)

array([[3],
       [4],
       [5]])