In [675]:
import polars as pl
import pandas as pd
from xgboost import XGBRegressor, XGBRFRegressor, plot_importance
import lightgbm as lgb
from datetime import datetime
from dateutil.relativedelta import relativedelta
from pyecharts.charts import Line,Bar,Page, Grid
from pyecharts import options as opts
from mlforecast import MLForecast
from mlforecast.lag_transforms import ExpandingMean, RollingMean
from mlforecast.target_transforms import Differences
from utilsforecast.plotting import plot_series
from sklearn.pipeline import Pipeline
from sklearn.ensemble import VotingRegressor
import pickle
import math
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = "plotly_white"

today= datetime.now()

In [695]:
df=pl.read_parquet("C:\\Users\\smishra14\\OneDrive - Stryker\\data\\Endoscopy.parquet")
df=df[['CatalogNumber','SALES_DATE','`Act Orders Rev','`Fcst Stat Prelim Rev']]
df1=df.filter(pl.col('SALES_DATE')<today-relativedelta(months=1))
df1=df1.group_by(['CatalogNumber','SALES_DATE']).sum()
df1=df1.sort('SALES_DATE',descending=False)
cc=df.group_by('CatalogNumber').sum().sort('`Act Orders Rev',descending=True)

def preprocess1(df):
    df=df.with_columns(month=pl.col('SALES_DATE').dt.month())
    df=df.with_columns(days=(pl.col('SALES_DATE')-datetime(year=2021,month=1,day=1)).dt.total_days())
    return df

def plot(df,pred,cat):
    pdf=df.filter(pl.col('CatalogNumber')==cat)
    prd=pl.DataFrame({'SALES_DATE':pl.date_range(today.replace(day=1),today.replace(day=1)+relativedelta(months=len(pred)-1), "1mo",eager=True),'XGBoost':pred})
    pdf=pdf.with_columns(pl.col('SALES_DATE').cast(pl.Date))
    pdf=pdf.join(prd,on='SALES_DATE', how="full", coalesce=True)
    pdf=pdf.sort('SALES_DATE',descending=False)
    line=Line(init_opts=opts.InitOpts(width="630px",height="370px"))
    line.add_xaxis(pdf['SALES_DATE'].to_list()).add_yaxis('Act Orders',pdf['`Act Orders Rev'].to_list(),label_opts=opts.LabelOpts(is_show=False)) \
    .add_yaxis('XGBoost',pdf['XGBoost'].to_list(),label_opts=opts.LabelOpts(is_show=False)).set_global_opts(title_opts=opts.TitleOpts(title=cat),
                                                            tooltip_opts=opts.TooltipOpts(is_show=True))
    #.add_yaxis('Days',pp.filter(pl.col('CatalogNumber')==cat)['days'].to_list(),label_opts=opts.LabelOpts(is_show=False)) \
    #line.load_javascript()
    #return line.render_notebook()
    return line

def pplot(df,pred,cat):
    pdf=df.filter(pl.col('CatalogNumber')==cat)
    prd=pl.DataFrame({'SALES_DATE':pl.date_range(today.replace(day=1),today.replace(day=1)+relativedelta(months=len(pred)-1), "1mo",eager=True),'XGBoost':pred})
    pdf=pdf.with_columns(pl.col('SALES_DATE').cast(pl.Date))
    pdf=pdf.join(prd,on='SALES_DATE', how="full", coalesce=True)
    pdf=pdf.sort('SALES_DATE',descending=False)
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=pdf['SALES_DATE'].to_list(),y=pdf['`Act Orders Rev'].to_list(),mode='lines',name='Act Orders',text=cat))
    fig.add_trace(go.Scatter(x=pdf['SALES_DATE'].to_list(),y=pdf['XGBoost'].to_list(),mode='lines',name='XGBoost'))
    return fig

def create_dynamic_grid(charts, charts_per_row=2, height="2200px", width="1350px"):
    total_charts = len(charts)
    rows = math.ceil(total_charts / charts_per_row)
    
    # Calculate the position for each chart
    grid_positions = []
    row_height = 1.1 / rows
    col_width = 1.0 / charts_per_row
    
    for i in range(total_charts):
        row = i // charts_per_row
        col = i % charts_per_row
        
        top = row * row_height
        bottom = (row + 1) * row_height
        left = col * col_width
        right = (col + 1) * col_width
        
        # Add some padding
        pos = {
            "pos_top": f"{int(top * 100+2)}%",
            "pos_bottom": f"{int(100 - bottom * 100+2)}%",
            "pos_left": f"{int(left * 100 + 5)}%",
            "pos_right": f"{int(100 - right * 100 + 5)}%"
        }
        grid_positions.append(pos)
    
    # Create grid with positioned charts
    grid = Grid(init_opts=opts.InitOpts(height=height,width=width))
    for i, chart in enumerate(charts):
        grid.add(grid_index=i, chart=chart, grid_opts=opts.GridOpts(**grid_positions[i],tooltip_opts=opts.TooltipOpts(is_show=True), is_contain_label=True))
    
    return grid

In [471]:
pp=preprocess1(df1)
for i in cc['CatalogNumber'][:40]:
    p1=pp.filter(pl.col('CatalogNumber')==i).to_pandas()
    mod1=XGBRegressor()
    mod2=XGBRegressor(booster='gblinear')
    #mod2=LGBRegressor(linear_tree= True)
    pipe=VotingRegressor([('xgbm',mod1),('lgbm',mod2)])
    pipe.fit(X=p1[['month','days']],y=p1['`Act Orders Rev'])
    with open(f'models/{i}.pkl','wb') as f:
        pickle.dump(pipe,f)

In [696]:
fdf=pl.DataFrame({'SALES_DATE':pl.date_range(today.replace(day=1),today.replace(day=1)+relativedelta(months=60), "1mo",eager=True)})
fp=preprocess1(fdf)
page = []
for i in cc['CatalogNumber'][:20]:
    with open(f'models/{i}.pkl','rb') as f:
        pr=pickle.load(f)
    pred=pr.predict(fp[['month','days']].to_pandas())
    page.append(pplot(df1,pred,i))
rows = math.ceil(len(page) / 2)
fig1 = make_subplots(rows=rows, cols=2,subplot_titles=[i for i in cc['CatalogNumber'][:20]])
for i, chart in enumerate(page):
    fig1.add_traces([chart.data[0],chart.data[1]],rows=math.floor((i+2)/2), cols=i%2+1)
fig1.update_layout(height=2200)
fig1.show()

In [629]:
fdf=pl.DataFrame({'SALES_DATE':pl.date_range(today.replace(day=1),today.replace(day=1)+relativedelta(months=60), "1mo",eager=True)})
fp=preprocess1(fdf)
#page = Page(layout=opts.PageLayoutOpts(display="flex",justify_content="flex-start",flex_wrap="wrap",margin="3px",))
page = []
#page=Page(layout=opts.PageLayoutOpts(display="table"))
for i in cc['CatalogNumber'][:20]:
    with open(f'models/{i}.pkl','rb') as f:
        pr=pickle.load(f)
    pred=pr.predict(fp[['month','days']].to_pandas())
    #page.add(plot(df1,pred,i))
    page.append(plot(df1,pred,i))
grid=create_dynamic_grid(page,2)
grid.load_javascript()

<pyecharts.render.display.Javascript at 0x29de7636e70>

In [630]:
grid.render_notebook()

In [330]:
cat=cc['CatalogNumber'][1]
pr=XGBRFRegressor()
pr.load_model(f'models/{cat}.pkl')
pred=pr.predict(fp[['month','days']])
plot(df1,pred,cat)

In [None]:
def preprocess(df):
    df=df.with_columns(lag3=pl.col('`Act Orders Rev').shift(3).over('CatalogNumber'))
    df=df.with_columns(pl.when(pl.col('lag3').is_null()).then(pl.col('`Act Orders Rev')).otherwise(pl.col('lag3')).alias('lag3'))
    df=df.with_columns(lag4=pl.col('`Act Orders Rev').shift(4).over('CatalogNumber'))
    df=df.with_columns(pl.when(pl.col('lag4').is_null()).then(pl.col('`Act Orders Rev')).otherwise(pl.col('lag4')).alias('lag4'))
    df=df.with_columns(lag5=pl.col('`Act Orders Rev').shift(5).over('CatalogNumber'))
    df=df.with_columns(pl.when(pl.col('lag5').is_null()).then(pl.col('`Act Orders Rev')).otherwise(pl.col('lag5')).alias('lag5'))
    df=df.with_columns(lag6=pl.col('`Act Orders Rev').shift(6).over('CatalogNumber'))
    df=df.with_columns(pl.when(pl.col('lag6').is_null()).then(pl.col('`Act Orders Rev')).otherwise(pl.col('lag6')).alias('lag6'))
    df=df.with_columns(lag12=pl.col('`Act Orders Rev').shift(12).over('CatalogNumber'))
    df=df.with_columns(pl.when(pl.col('lag12').is_null()).then(pl.col('`Act Orders Rev')).otherwise(pl.col('lag12')).alias('lag12'))
    df=df.with_columns(month=pl.col('SALES_DATE').dt.month())
    return df