In [483]:
import polars as pl
import pandas as pd
from xgboost import XGBRegressor, XGBRFRegressor, plot_importance
import lightgbm as lgb
from datetime import datetime
from dateutil.relativedelta import relativedelta
from pyecharts.charts import Line,Bar,Page
from pyecharts import options as opts
from mlforecast import MLForecast
from mlforecast.lag_transforms import ExpandingMean, RollingMean
from mlforecast.target_transforms import Differences
from utilsforecast.plotting import plot_series
from sklearn.pipeline import Pipeline
from sklearn.ensemble import VotingRegressor
import pickle

today= datetime.now()

In [486]:
df=pl.read_parquet("C:\\Users\\smishra14\\OneDrive - Stryker\\data\\Endoscopy.parquet")
df=df[['CatalogNumber','SALES_DATE','`Act Orders Rev','`Fcst Stat Prelim Rev']]
df1=df.filter(pl.col('SALES_DATE')<today-relativedelta(months=1))
df1=df1.group_by(['CatalogNumber','SALES_DATE']).sum()
df1=df1.sort('SALES_DATE',descending=False)
cc=df.group_by('CatalogNumber').sum().sort('`Act Orders Rev',descending=True)

def preprocess1(df):
    df=df.with_columns(month=pl.col('SALES_DATE').dt.month())
    df=df.with_columns(days=(pl.col('SALES_DATE')-datetime(year=2021,month=1,day=1)).dt.total_days())
    return df

def plot(df,pred,cat):
    pdf=df.filter(pl.col('CatalogNumber')==cat)
    prd=pl.DataFrame({'SALES_DATE':pl.date_range(today.replace(day=1),today.replace(day=1)+relativedelta(months=len(pred)-1), "1mo",eager=True),'XGBoost':pred})
    pdf=pdf.with_columns(pl.col('SALES_DATE').cast(pl.Date))
    pdf=pdf.join(prd,on='SALES_DATE', how="full", coalesce=True)
    pdf=pdf.sort('SALES_DATE',descending=False)
    line=Line()
    line.add_xaxis(pdf['SALES_DATE'].to_list()).add_yaxis('Act Orders',pdf['`Act Orders Rev'].to_list(),label_opts=opts.LabelOpts(is_show=False)) \
    .add_yaxis('XGBoost',pdf['XGBoost'].to_list(),label_opts=opts.LabelOpts(is_show=False)).set_global_opts(title_opts=opts.ComponentTitleOpts(title=cat))
    #.add_yaxis('Days',pp.filter(pl.col('CatalogNumber')==cat)['days'].to_list(),label_opts=opts.LabelOpts(is_show=False)) \
    #line.load_javascript()
    #return line.render_notebook()
    return line

In [471]:
pp=preprocess1(df1)
for i in cc['CatalogNumber'][:40]:
    p1=pp.filter(pl.col('CatalogNumber')==i).to_pandas()
    mod1=XGBRegressor()
    mod2=XGBRegressor(booster='gblinear')
    #mod2=LGBRegressor(linear_tree= True)
    pipe=VotingRegressor([('xgbm',mod1),('lgbm',mod2)])
    pipe.fit(X=p1[['month','days']],y=p1['`Act Orders Rev'])
    with open(f'models/{i}.pkl','wb') as f:
        pickle.dump(pipe,f)

In [488]:
fdf=pl.DataFrame({'SALES_DATE':pl.date_range(today.replace(day=1),today.replace(day=1)+relativedelta(months=60), "1mo",eager=True)})
fp=preprocess1(fdf)
page = Page(layout=opts.PageLayoutOpts(flex_wrap=2))
for i in cc['CatalogNumber'][:40]:
    with open(f'models/{i}.pkl','rb') as f:
        pr=pickle.load(f)
    pred=pr.predict(fp[['month','days']].to_pandas())
    page.add(plot(df1,pred,i))
page.load_javascript()

<pyecharts.render.display.Javascript at 0x29db7987a10>

In [489]:
page.render_notebook()

In [330]:
cat=cc['CatalogNumber'][1]
pr=XGBRFRegressor()
pr.load_model(f'models/{cat}.pkl')
pred=pr.predict(fp[['month','days']])
plot(df1,pred,cat)

In [None]:
def preprocess(df):
    df=df.with_columns(lag3=pl.col('`Act Orders Rev').shift(3).over('CatalogNumber'))
    df=df.with_columns(pl.when(pl.col('lag3').is_null()).then(pl.col('`Act Orders Rev')).otherwise(pl.col('lag3')).alias('lag3'))
    df=df.with_columns(lag4=pl.col('`Act Orders Rev').shift(4).over('CatalogNumber'))
    df=df.with_columns(pl.when(pl.col('lag4').is_null()).then(pl.col('`Act Orders Rev')).otherwise(pl.col('lag4')).alias('lag4'))
    df=df.with_columns(lag5=pl.col('`Act Orders Rev').shift(5).over('CatalogNumber'))
    df=df.with_columns(pl.when(pl.col('lag5').is_null()).then(pl.col('`Act Orders Rev')).otherwise(pl.col('lag5')).alias('lag5'))
    df=df.with_columns(lag6=pl.col('`Act Orders Rev').shift(6).over('CatalogNumber'))
    df=df.with_columns(pl.when(pl.col('lag6').is_null()).then(pl.col('`Act Orders Rev')).otherwise(pl.col('lag6')).alias('lag6'))
    df=df.with_columns(lag12=pl.col('`Act Orders Rev').shift(12).over('CatalogNumber'))
    df=df.with_columns(pl.when(pl.col('lag12').is_null()).then(pl.col('`Act Orders Rev')).otherwise(pl.col('lag12')).alias('lag12'))
    df=df.with_columns(month=pl.col('SALES_DATE').dt.month())
    return df