# Fit the model to the RS10 data 

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model    import *
from sklearn.metrics         import *
from imblearn.pipeline import Pipeline
from sklearn.preprocessing   import StandardScaler
from sklearn.preprocessing   import OneHotEncoder
from sklearn.preprocessing   import LabelEncoder
from   sklearn.compose            import *
from   sklearn.ensemble           import RandomForestClassifier, ExtraTreesClassifier, IsolationForest
from   sklearn.experimental       import enable_iterative_imputer
from   sklearn.impute             import *
from   sklearn.linear_model       import LogisticRegression, PassiveAggressiveClassifier, RidgeClassifier, SGDClassifier
from   sklearn.metrics            import roc_auc_score # We have not covered it yet in class. The basics - AUC is from 0 to 1 and higher is better.
#from   sklearn.pipeline           import Pipeline
from   sklearn.preprocessing      import *
from   sklearn.tree               import *
from   sklearn.metrics            import balanced_accuracy_score
from   sklearn.feature_selection import *

In [None]:
df_RS10 = pd.read_excel('Rts_975_988-990/RS10_975_988-990_Raw.xlsx','Sheet1')
# 4661 rows 

df_access = pd.read_excel('Rts_975_988-990/RGG Access  2 04 2021.xlsx', 'RGG Access') # All accounts 

df_neighborhood = pd.read_excel('Rts_975_988-990/Route_Neighborhood_1-1_Final.xlsx')

In [None]:
# CAUTION: MAKE SURE THE VALUES ARE SORTED BY ROUTE THEN DAY IN THIS TABLE BEFORE UPLOADING IT
df_weights = pd.read_excel('Rts_975_988-990/Weights_0927-1219.xlsx',usecols='A,B,M')
df_weights.columns=['Route','Day','Weights']
df_weights=df_weights.iloc[1:,]

In [None]:
df_weights['Route']=df_weights['Route'].astype('int')
df_weights.replace('Monday',1,inplace=True)
df_weights.replace('Tuesday',2,inplace=True)
df_weights.replace('Wednesday',3,inplace=True)
df_weights.replace('Thursday',4,inplace=True)
df_weights.replace('Friday',5,inplace=True)

In [None]:
df_RS10['Total_Vol']=(df_RS10['Z1RUNT']*df_RS10['Z1RSIZ'])+(df_RS10['Z1UNIT']*df_RS10['Z1SIZE'])+(df_RS10['Z1OUNT']*df_RS10['Z1OSIZ'])

route_total_weights=pd.pivot_table(df_RS10,index=['Z1ROUT','Z1DAY'],values=['Total_Vol'],aggfunc=np.sum).reset_index()
route_merged=route_total_weights.merge(df_weights,left_on=['Z1ROUT','Z1DAY'],right_on=['Route','Day'],how='left')
route_merged['Wt/Vol']=(route_merged['Weights']*3000)/route_merged['Total_Vol']

df_RS10=df_RS10.drop(columns=['Total_Vol'])

In [None]:
def prepare_df(df_RS10, df_access, df_neighborhood):

     df_RS10.dropna(axis=0, how='all', thresh=None, subset=['Z1SVC#'], inplace=True)
     df_access = df_access.drop_duplicates(subset='Acct #', keep="first")
     df = df_RS10.merge(df_access[['Acct #','Code']], left_on='Z1SVC#', right_on='Acct #', how="left")
     df['Has Key'] = np.where(df['Code'].isnull(), False, True)

     ## CAUTION: Route-Neighborhood mapping should have ALL the routes in RS10. Currently that is not happening
     df = df.merge(df_neighborhood, left_on='Z1ROUT', right_on='Route', how="left")
     df['Neighborhood']=df['Neighborhood'].fillna(df['Neighborhood'].value_counts().index[0])

     ## CAUTION: Z1COMM has nulls
     df['Z1COMM']=df['Z1COMM'].fillna(df['Z1COMM'].value_counts().index[0])

     ## CAUTION: Z1TIMS has nulls
     df['Z1TIMS']=df['Z1TIMS'].fillna(df['Z1TIMS'].value_counts().index[0])

     ## CAUTION: Z1CO might have nulls
     df['Z1CO']=df['Z1CO'].fillna(df['Z1CO'].value_counts().index[0])


     # We will populate the combinations
     df['16 gal'] = np.where(((df['Z1RSIZ']==16)),df['Z1RUNT'],0) # See if there is a 16 in Z1RSIZ - if it is there, fill with Z1RUNT. If not, fill with 0. 
     df['20 gal'] = np.where(((df['Z1RSIZ']==20)),df['Z1RUNT'],0)
     df['32 gal'] = np.where(((df['Z1RSIZ']==32)),df['Z1RUNT'],0)
     df['64 gal'] = np.where(((df['Z1RSIZ']==64)),df['Z1RUNT'],0)
     df['96 gal'] = np.where(((df['Z1RSIZ']==96)),df['Z1RUNT'],0)
     df['1 yrd'] = np.where(((df['Z1RSIZ']==100)),df['Z1RUNT'],0)
     df['1.5 yrd'] = np.where(((df['Z1RSIZ']==150)),df['Z1RUNT'],0)
     df['2 yrd'] = np.where(((df['Z1RSIZ']==200)),df['Z1RUNT'],0)
     df['3 yrd'] = np.where(((df['Z1RSIZ']==300)),df['Z1RUNT'],0)
     df['4 yrd'] = np.where(((df['Z1RSIZ']==400)),df['Z1RUNT'],0)
     df['5 yrd'] = np.where(((df['Z1RSIZ']==500)),df['Z1RUNT'],0)
     df['6 yrd'] = np.where(((df['Z1RSIZ']==600)),df['Z1RUNT'],0)

     df['16 gal'] = np.where(((df['Z1SIZE']==16)),df['16 gal'] + df['Z1UNIT'],df['16 gal']) 
     df['20 gal'] = np.where(((df['Z1SIZE']==20)),df['20 gal'] + df['Z1UNIT'],df['20 gal'])
     df['32 gal'] = np.where(((df['Z1SIZE']==32)),df['32 gal'] + df['Z1UNIT'],df['32 gal'])
     df['64 gal'] = np.where(((df['Z1SIZE']==64)),df['64 gal'] + df['Z1UNIT'],df['64 gal'])
     df['96 gal'] = np.where(((df['Z1SIZE']==96)),df['96 gal'] + df['Z1UNIT'],df['96 gal'])
     df['1 yrd'] = np.where(((df['Z1SIZE']==100)),df['1 yrd'] + df['Z1UNIT'],df['1 yrd'])
     df['1.5 yrd'] = np.where(((df['Z1SIZE']==150)),df['1.5 yrd'] + df['Z1UNIT'],df['1.5 yrd'])
     df['2 yrd'] = np.where(((df['Z1SIZE']==200)),df['2 yrd'] + df['Z1UNIT'],df['2 yrd'])
     df['3 yrd'] = np.where(((df['Z1SIZE']==300)),df['3 yrd'] + df['Z1UNIT'],df['3 yrd'])
     df['4 yrd'] = np.where(((df['Z1SIZE']==400)),df['4 yrd'] + df['Z1UNIT'],df['4 yrd'])
     df['5 yrd'] = np.where(((df['Z1SIZE']==500)),df['5 yrd'] + df['Z1UNIT'],df['5 yrd'])
     df['6 yrd'] = np.where(((df['Z1RSIZ']==600)),df['6 yrd'] + df['Z1UNIT'],df['6 yrd'])


     df['16 gal'] = np.where((df['Z1OSIZ']==16),df['16 gal']+df['Z1OUNT'],df['16 gal']) 
     df['20 gal'] = np.where((df['Z1OSIZ']==20),df['20 gal']+df['Z1OUNT'],df['20 gal'])
     df['32 gal'] = np.where((df['Z1OSIZ']==32),df['32 gal']+df['Z1OUNT'],df['32 gal'])
     df['64 gal'] = np.where((df['Z1OSIZ']==64),df['64 gal']+df['Z1OUNT'],df['64 gal'])
     df['96 gal'] = np.where((df['Z1OSIZ']==96),df['96 gal']+df['Z1OUNT'],df['96 gal'])
     df['1 yrd'] = np.where((df['Z1OSIZ']==100),df['1 yrd']+df['Z1OUNT'],df['1 yrd'])
     df['1.5 yrd'] = np.where((df['Z1OSIZ']==150),df['1.5 yrd']+df['Z1OUNT'],df['1.5 yrd'])
     df['2 yrd'] = np.where((df['Z1OSIZ']==200),df['2 yrd']+df['Z1OUNT'],df['2 yrd'])
     df['3 yrd'] = np.where((df['Z1OSIZ']==300),df['3 yrd']+df['Z1OUNT'],df['3 yrd'])
     df['4 yrd'] = np.where((df['Z1OSIZ']==400),df['4 yrd']+df['Z1OUNT'],df['4 yrd'])
     df['5 yrd'] = np.where((df['Z1OSIZ']==500),df['5 yrd']+df['Z1OUNT'],df['5 yrd'])
     df['6 yrd'] = np.where((df['Z1OSIZ']==600),df['6 yrd']+df['Z1OUNT'],df['6 yrd'])

     # Creating #Units (total num of recycling, compost and garbage)
     df['#Units']=df['Z1RUNT'] + df['Z1OUNT'] + df['Z1UNIT']
     df

     # Renaming the columns
     df = df.rename(columns={'Z1COMM':'Commodity','Z1CO':'Company', 'Z1DAY':'Day'})


     # Create a pivot
     df_new=pd.pivot_table(df, index=['Z1SVC#','Commodity','Day', 'Company', 'Has Key', 'Neighborhood'],\
          values=[ '16 gal', '20 gal', '32 gal', '64 gal', '96 gal', '1 yrd', '1.5 yrd', '2 yrd', '3 yrd', '4 yrd', '5 yrd', '6 yrd', '#Units'],aggfunc=np.sum).reset_index()

     final = df_new[['Z1SVC#','Commodity', '16 gal', '20 gal', '32 gal', '64 gal', '96 gal', '1 yrd', '1.5 yrd', '2 yrd', '3 yrd', '4 yrd', '5 yrd', '6 yrd', '#Units', 'Day', 'Company', 'Has Key', 'Neighborhood']]
     
     return final

In [None]:
final=prepare_df(df_RS10, df_access, df_neighborhood)

In [None]:
len(final)

4527

In [None]:

final.to_excel('Prepared_RS10_Routes975_998-990.xlsx',index=False)

In [None]:
pred_df=pd.read_excel('/work/Final_Predictions_Routes975_998-990.xlsx')

In [None]:
len(pred_df)

4527

In [None]:
FinalRS10_with_pred= df_RS10.merge(pred_df,left_on=['Z1SVC#','Z1COMM','Z1DAY','Z1CO'],right_on=['Z1SVC#','Commodity','Day', 'Company'],how='left')

In [None]:
FinalRS10_with_pred

In [None]:
route_merged

Unnamed: 0,Z1ROUT,Z1DAY,Total_Vol,Route,Day,Weights,Wt/Vol
0,975,1,14206,975,1,2.321667,0.490286
1,975,2,29446,975,2,4.426667,0.450995
2,975,3,38728,975,3,5.486389,0.424994
3,975,4,15044,975,4,2.630833,0.524628
4,975,5,16506,975,5,2.214167,0.402429
5,988,1,24896,988,1,3.4475,0.415428
6,988,2,18720,988,2,3.227222,0.517183
7,988,3,20896,988,3,2.718889,0.390346
8,988,4,18656,988,4,2.801389,0.450481
9,988,5,24672,988,5,2.695833,0.327801


In [None]:
FinalRS10_with_pred=FinalRS10_with_pred.merge(route_merged,left_on=['Z1ROUT','Z1DAY'],right_on=['Z1ROUT','Z1DAY'],how='left')

In [None]:
FinalRS10_with_pred

Unnamed: 0,Z1CO,Z1ADR#,Z1HOUS,Z1STNM,Z1APT#,Z1CITY,Z1ST,Z1ZIP,Z1DAY,Z1ROUT,...,Day_x,Company,Has Key,Neighborhood,time_pred,Total_Vol,Route,Day_y,Weights,Wt/Vol
0,1,3762,2257,CHESTNUT ST,,SAN FRANCISCO,CA,94123,1,975,...,1.0,1.0,False,Marina,80.562840,14206,975,1,2.321667,0.490286
1,1,17478,3201,PIERCE ST,,SAN FRANCISCO,CA,94123,1,975,...,1.0,1.0,False,Marina,49.190000,14206,975,1,2.321667,0.490286
2,1,22530,2034,UNION ST,,SAN FRANCISCO,CA,94123,1,975,...,1.0,1.0,False,Marina,49.277106,14206,975,1,2.321667,0.490286
3,1,45668,1,LETTERMAN DR,BC,SAN FRANCISCO,CA,94129,1,975,...,1.0,1.0,False,Marina,142.315558,14206,975,1,2.321667,0.490286
4,1,45254,1,LETTERMAN DR,150,SAN FRANCISCO,CA,94129,1,975,...,1.0,1.0,True,Marina,77.244433,14206,975,1,2.321667,0.490286
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4656,1,3189,66,CASA WY,,SAN FRANCISCO,CA,94123,2,990,...,2.0,1.0,False,Marina,49.190000,27904,990,2,4.838333,0.520176
4657,1,14445,393,MARINA BLVD,,SAN FRANCISCO,CA,94123,2,990,...,2.0,1.0,False,Marina,49.505768,27904,990,2,4.838333,0.520176
4658,1,14447,425,MARINA BLVD,,SAN FRANCISCO,CA,94123,2,990,...,2.0,1.0,False,Marina,80.562840,27904,990,2,4.838333,0.520176
4659,1,286,465,AVILA ST,,SAN FRANCISCO,CA,94123,2,990,...,2.0,1.0,False,Marina,49.190000,27904,990,2,4.838333,0.520176


In [None]:
len(FinalRS10_with_pred)

4661

In [None]:
len(df_RS10)

4661

In [None]:
FinalRS10_with_pred.to_excel('RS10_with_predictions.xlsx')

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=938c6ad9-491d-4307-bf8a-c751a244ce4f' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>