In [None]:
import pandas as pd
import glob
from functools import reduce
import numpy as np

pd.options.display.float_format = '{:.6f}'.format
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Weights for each of the model

### Ranking looks as follows
1) Drift - 0.5 <br>
2) poly - 0.1 <br>
3) ETS - 0.4 <br>
These are empirical weights obtained after trial and error!

In [170]:
weights = [0.5, 0.1, 0.4]

# Feature: Dosage units

In [171]:
# Importing data
folder_path = 'average_spend_dosage_unit\\'
drift_df = pd.read_csv(folder_path+"Drift_method_avg_spend_dosage_unit.csv")
ets_df = pd.read_csv(folder_path+"ETS_avg_spend_dosage_unit.csv")
poly_deg_2_df = pd.read_csv(folder_path+"poly_deg_2_spend_dosage_unit.csv")

In [172]:
# Adding suffix to mark column names
drift_df.columns = ['Brand Name']+['drift_'+i for i in drift_df.columns[1:]]
ets_df.columns = ['Brand Name']+['ets_'+i for i in ets_df.columns[1:]]
poly_deg_2_df.columns = ['Brand Name']+['poly_'+i for i in poly_deg_2_df.columns[1:]]

In [175]:
# Making sure all the drug names and are in order
list(set(drift_df['Brand Name'].values).difference(ets_df['Brand Name'].values))
list(set(drift_df['Brand Name'].values).difference(poly_deg_2_df['Brand Name'].values))
list(set(poly_deg_2_df['Brand Name'].values).difference(ets_df['Brand Name'].values))

[]

[]

[]

In [176]:
df_list = [drift_df, ets_df, poly_deg_2_df]
avg_spending_dosage_unit_final_df = pd.DataFrame()
avg_spending_dosage_unit_final_df['Brand Name'] = drift_df['Brand Name']


def weighted_average(val):
    return np.average(val.values.tolist(), weights=weights)

# Generating predictions
for year in range(2026, 2032):
    col = "Average Spending Per Dosage Unit (Weighted)_"+str(year)
    
    # Extract the predictions from all the approaches
    df_list = [drift_df[['Brand Name', 'drift_'+col]],ets_df[['Brand Name', 'ets_'+col]], 
               poly_deg_2_df[['Brand Name', 'poly_'+col]]]
    
    # Concatenate into a dataframe
    df_conc = pd.concat([drift_df[['drift_'+col]], 
                        ets_df[['ets_'+col]], poly_deg_2_df[['poly_'+col]]], axis=1)
    
    # Take weighted mean from all approaches 
    df_conc[col+'_mean'] = df_conc.apply(weighted_average, axis=1)
    avg_spending_dosage_unit_final_df[col+'_mean'] = df_conc[col+'_mean']
    
    df_conc.head()

Unnamed: 0,drift_Average Spending Per Dosage Unit (Weighted)_2026,ets_Average Spending Per Dosage Unit (Weighted)_2026,poly_Average Spending Per Dosage Unit (Weighted)_2026,Average Spending Per Dosage Unit (Weighted)_2026_mean
0,0.280547,0.249837,0.238065,0.260483
1,0.302472,0.253301,0.287671,0.291635
2,0.0,4.883804,4.521046,2.296799
3,20.713665,20.806655,23.013047,21.642717
4,0.0,2.430425,0.0,0.243042


Unnamed: 0,drift_Average Spending Per Dosage Unit (Weighted)_2027,ets_Average Spending Per Dosage Unit (Weighted)_2027,poly_Average Spending Per Dosage Unit (Weighted)_2027,Average Spending Per Dosage Unit (Weighted)_2027_mean
0,0.327376,0.254726,0.239885,0.285115
1,0.366158,0.257699,0.306369,0.331396
2,0.0,5.189272,5.035196,2.533005
3,19.707254,20.707117,23.527293,21.335256
4,0.0,1.810044,0.0,0.181004


Unnamed: 0,drift_Average Spending Per Dosage Unit (Weighted)_2028,ets_Average Spending Per Dosage Unit (Weighted)_2028,poly_Average Spending Per Dosage Unit (Weighted)_2028,Average Spending Per Dosage Unit (Weighted)_2028_mean
0,0.411668,0.259616,0.241389,0.328351
1,0.490344,0.262096,0.326907,0.402144
2,0.0,5.494741,5.649358,2.809217
3,17.828622,20.607579,24.109199,20.618748
4,0.0,1.189664,0.0,0.118966


Unnamed: 0,drift_Average Spending Per Dosage Unit (Weighted)_2029,ets_Average Spending Per Dosage Unit (Weighted)_2029,poly_Average Spending Per Dosage Unit (Weighted)_2029,Average Spending Per Dosage Unit (Weighted)_2029_mean
0,0.569013,0.264506,0.242575,0.407987
1,0.743891,0.266493,0.349285,0.538309
2,0.0,5.80021,6.363533,3.125434
3,14.175726,20.508041,24.758765,19.042173
4,0.0,0.569283,0.0,0.056928


Unnamed: 0,drift_Average Spending Per Dosage Unit (Weighted)_2030,ets_Average Spending Per Dosage Unit (Weighted)_2030,poly_Average Spending Per Dosage Unit (Weighted)_2030,Average Spending Per Dosage Unit (Weighted)_2030_mean
0,0.874712,0.269396,0.243445,0.561673
1,1.287205,0.27089,0.373504,0.820093
2,0.0,6.105678,7.177721,3.481656
3,6.753968,20.408502,25.475992,15.608231
4,0.0,0.0,0.0,0.0


Unnamed: 0,drift_Average Spending Per Dosage Unit (Weighted)_2031,ets_Average Spending Per Dosage Unit (Weighted)_2031,poly_Average Spending Per Dosage Unit (Weighted)_2031,Average Spending Per Dosage Unit (Weighted)_2031_mean
0,1.493752,0.274285,0.243997,0.871904
1,2.509663,0.275287,0.399562,1.442185
2,0.0,6.411147,8.091921,3.877883
3,0.0,20.308964,26.260879,12.535248
4,0.0,0.0,0.0,0.0


### Generate predictions for year 2026

In [180]:
df_2026_dosage = avg_spending_dosage_unit_final_df[['Brand Name', 'Average Spending Per Dosage Unit (Weighted)_2026_mean']]\
                                .sort_values(by='Average Spending Per Dosage Unit (Weighted)_2026_mean', ascending=False)

df_2026_dosage['rank'] = df_2026_dosage['Average Spending Per Dosage Unit (Weighted)_2026_mean'].rank(method='dense', ascending=False)

In [181]:
df_2026_dosage.to_csv("forecasted\\dosage\\df_2026_dosage.csv", index=False)

### Generate predictions for year 2027

In [182]:
df_2027_dosage = avg_spending_dosage_unit_final_df[['Brand Name', 'Average Spending Per Dosage Unit (Weighted)_2027_mean']]\
                                .sort_values(by='Average Spending Per Dosage Unit (Weighted)_2027_mean', ascending=False)

df_2027_dosage['rank'] = df_2027_dosage['Average Spending Per Dosage Unit (Weighted)_2027_mean'].rank(method='dense', ascending=False)

In [183]:
df_2027_dosage.to_csv("forecasted\\dosage\\df_2027_dosage.csv", index=False)

### Generate predictions for year 2028

In [184]:
df_2028_dosage = avg_spending_dosage_unit_final_df[['Brand Name', 'Average Spending Per Dosage Unit (Weighted)_2028_mean']]\
                                .sort_values(by='Average Spending Per Dosage Unit (Weighted)_2028_mean', ascending=False)

df_2028_dosage['rank'] = df_2028_dosage['Average Spending Per Dosage Unit (Weighted)_2028_mean'].rank(method='dense', ascending=False)

In [185]:
df_2028_dosage.to_csv("forecasted\\dosage\\df_2028_dosage.csv", index=False)

### Generate predictions for year 2029

In [186]:
df_2029_dosage = avg_spending_dosage_unit_final_df[['Brand Name', 'Average Spending Per Dosage Unit (Weighted)_2029_mean']]\
                                .sort_values(by='Average Spending Per Dosage Unit (Weighted)_2029_mean', ascending=False)

df_2029_dosage['rank'] = df_2029_dosage['Average Spending Per Dosage Unit (Weighted)_2029_mean'].rank(method='dense', ascending=False)

In [187]:
df_2029_dosage.to_csv("forecasted\\dosage\\df_2029_dosage.csv", index=False)

### Generate predictions for year 2030

In [188]:
df_2030_dosage = avg_spending_dosage_unit_final_df[['Brand Name', 'Average Spending Per Dosage Unit (Weighted)_2030_mean']]\
                                .sort_values(by='Average Spending Per Dosage Unit (Weighted)_2030_mean', ascending=False)

df_2030_dosage['rank'] = df_2030_dosage['Average Spending Per Dosage Unit (Weighted)_2030_mean'].rank(method='dense', ascending=False)

In [189]:
df_2030_dosage.to_csv("forecasted\\dosage\\df_2030_dosage.csv", index=False)

### Generate predictions for year 2031

In [190]:
df_2031_dosage = avg_spending_dosage_unit_final_df[['Brand Name', 'Average Spending Per Dosage Unit (Weighted)_2031_mean']]\
                                .sort_values(by='Average Spending Per Dosage Unit (Weighted)_2031_mean', ascending=False)

df_2031_dosage['rank'] = df_2031_dosage['Average Spending Per Dosage Unit (Weighted)_2031_mean'].rank(method='dense', ascending=False)

In [191]:
df_2031_dosage.to_csv("forecasted\\dosage\\df_2031_dosage.csv", index=False)

# Feature: Claims

In [192]:
# Importing data
folder_path = 'average_spend_claims\\'
drift_df = pd.read_csv(folder_path+"Drift_method_avg_spend_claim.csv")
ets_df = pd.read_csv(folder_path+"ETS_avg_spend_claims.csv")
poly_deg_2_df = pd.read_csv(folder_path+"poly_deg_2_spend_claims.csv")

In [193]:
# Adding suffix to mark column names
drift_df.columns = ['Brand Name']+['drift_'+i for i in drift_df.columns[1:]]
ets_df.columns = ['Brand Name']+['ets_'+i for i in ets_df.columns[1:]]
poly_deg_2_df.columns = ['Brand Name']+['poly_'+i for i in poly_deg_2_df.columns[1:]]

In [196]:
# Making sure all the drug names and are in order

list(set(drift_df['Brand Name'].values).difference(ets_df['Brand Name'].values))
list(set(drift_df['Brand Name'].values).difference(poly_deg_2_df['Brand Name'].values))
list(set(poly_deg_2_df['Brand Name'].values).difference(ets_df['Brand Name'].values))

[]

[]

[]

In [197]:
df_list = [drift_df, ets_df, poly_deg_2_df]
avg_spending_dosage_unit_final_df = pd.DataFrame()
avg_spending_dosage_unit_final_df['Brand Name'] = drift_df['Brand Name']

def weighted_average(val):
    return np.average(val.values.tolist(), weights=weights)

# Generating predictions
for year in range(2026, 2032):
    col = "Average Spending Per Claim_"+str(year)
    
    # Extract the predictions from all the approaches
    df_list = [drift_df[['Brand Name', 'drift_'+col]], 
               ets_df[['Brand Name', 'ets_'+col]], poly_deg_2_df[['Brand Name', 'poly_'+col]]]
    
    # Concatenate into a dataframe
    df_conc = pd.concat([drift_df[['drift_'+col]], 
                        ets_df[['ets_'+col]], poly_deg_2_df[['poly_'+col]]], axis=1)
    
    # Take weighted mean from all approaches 
    df_conc[col+'_mean'] = df_conc.apply(weighted_average, axis=1)
    avg_spending_dosage_unit_final_df[col+'_mean'] = df_conc[col+'_mean']
    
    df_conc.head()

Unnamed: 0,drift_Average Spending Per Claim_2026,ets_Average Spending Per Claim_2026,poly_Average Spending Per Claim_2026,Average Spending Per Claim_2026_mean
0,36.798312,30.931964,31.151522,33.952961
1,41.240226,32.808166,37.531531,38.913542
2,0.0,382.719703,374.310203,187.996051
3,469.547984,1070.755139,1167.144709,808.70739
4,274.161761,582.010179,0.0,195.281898


Unnamed: 0,drift_Average Spending Per Claim_2027,ets_Average Spending Per Claim_2027,poly_Average Spending Per Claim_2027,Average Spending Per Claim_2027_mean
0,44.33037,31.624565,31.911089,38.092077
1,52.155123,33.560809,40.260767,45.537949
2,0.0,410.565898,421.890562,209.812815
3,0.0,1039.027524,1161.778434,568.614126
4,0.0,515.481505,0.0,51.54815


Unnamed: 0,drift_Average Spending Per Claim_2028,ets_Average Spending Per Claim_2028,poly_Average Spending Per Claim_2028,Average Spending Per Claim_2028_mean
0,57.888072,32.317167,32.678322,45.247082
1,73.439173,34.313452,43.248294,57.450249
2,0.0,438.412092,477.001245,234.641707
3,0.0,1007.29991,1159.282056,564.442814
4,0.0,448.952831,0.0,44.895283


Unnamed: 0,drift_Average Spending Per Claim_2029,ets_Average Spending Per Claim_2029,poly_Average Spending Per Claim_2029,Average Spending Per Claim_2029_mean
0,83.195784,33.009769,33.45322,58.280157
1,116.894106,35.066096,46.494113,80.551308
2,0.0,466.258287,539.642253,262.48273
3,0.0,975.572295,1159.655577,561.41946
4,0.0,382.424157,0.0,38.242416


Unnamed: 0,drift_Average Spending Per Claim_2030,ets_Average Spending Per Claim_2030,poly_Average Spending Per Claim_2030,Average Spending Per Claim_2030_mean
0,132.365052,33.702371,34.235784,83.247077
1,210.011821,35.818739,49.998224,128.587074
2,0.0,494.104481,609.813584,293.335882
3,0.0,943.844681,1162.898996,559.544066
4,0.0,315.895483,0.0,31.589548


Unnamed: 0,drift_Average Spending Per Claim_2031,ets_Average Spending Per Claim_2031,poly_Average Spending Per Claim_2031,Average Spending Per Claim_2031_mean
0,231.932821,34.394972,35.026013,133.416313
1,419.52668,36.571382,53.760628,234.924729
2,0.0,521.950676,687.51524,327.201164
3,0.0,912.117067,1169.012313,558.816632
4,0.0,249.366809,0.0,24.936681


###  Generate predictions for year 2026

In [198]:
df_2026_claims = avg_spending_dosage_unit_final_df[['Brand Name', 'Average Spending Per Claim_2026_mean']]\
                                .sort_values(by='Average Spending Per Claim_2026_mean', ascending=False)

df_2026_claims['rank'] = df_2026_claims['Average Spending Per Claim_2026_mean'].rank(method='dense', ascending=False)

df_2026_claims.to_csv("forecasted\\claim\\df_2026_claims.csv", index=False)

### Generate predictions for year 2027

In [199]:
df_2027_claims = avg_spending_dosage_unit_final_df[['Brand Name', 'Average Spending Per Claim_2027_mean']]\
                                .sort_values(by='Average Spending Per Claim_2027_mean', ascending=False)

df_2027_claims['rank'] = df_2027_claims['Average Spending Per Claim_2027_mean'].rank(method='dense', ascending=False)

df_2027_claims.to_csv("forecasted\\claim\\df_2027_claims.csv", index=False)

### Generate predictions for year 2028

In [200]:
df_2028_claims = avg_spending_dosage_unit_final_df[['Brand Name', 'Average Spending Per Claim_2028_mean']]\
                                .sort_values(by='Average Spending Per Claim_2028_mean', ascending=False)

df_2028_claims['rank'] = df_2028_claims['Average Spending Per Claim_2028_mean'].rank(method='dense', ascending=False)

df_2028_claims.to_csv("forecasted\\claim\\df_2028_claims.csv", index=False)

### Generate predictions for year 2029

In [201]:
df_2029_claims = avg_spending_dosage_unit_final_df[['Brand Name', 'Average Spending Per Claim_2029_mean']]\
                                .sort_values(by='Average Spending Per Claim_2029_mean', ascending=False)

df_2029_claims['rank'] = df_2029_claims['Average Spending Per Claim_2029_mean'].rank(method='dense', ascending=False)

df_2029_claims.to_csv("forecasted\\claim\\df_2029_claims.csv", index=False)

### Generate predictions for year 2030

In [202]:
df_2030_claims = avg_spending_dosage_unit_final_df[['Brand Name', 'Average Spending Per Claim_2030_mean']]\
                                .sort_values(by='Average Spending Per Claim_2030_mean', ascending=False)

df_2030_claims['rank'] = df_2030_claims['Average Spending Per Claim_2030_mean'].rank(method='dense', ascending=False)

df_2030_claims.to_csv("forecasted\\claim\\df_2030_claims.csv", index=False)

### Generate predictions for year 2031

In [203]:
df_2026_claims = avg_spending_dosage_unit_final_df[['Brand Name', 'Average Spending Per Claim_2026_mean']]\
                                .sort_values(by='Average Spending Per Claim_2026_mean', ascending=False)

df_2026_claims['rank'] = df_2026_claims['Average Spending Per Claim_2026_mean'].rank(method='dense', ascending=False)

df_2026_claims.to_csv("forecasted\\claim\\df_2026_claims.csv", index=False)

# Feature: Beneficiaries

In [204]:
# Importing data
folder_path = 'average_spend_beneficiaries\\'
drift_df = pd.read_csv(folder_path+"Drift_method_avg_spend_beneficiaries.csv")
ets_df = pd.read_csv(folder_path+"ETS_avg_spend_beneficiaries.csv")
poly_deg_2_df = pd.read_csv(folder_path+"poly_deg_2_spend_beneficiaries.csv")

In [205]:
# Adding suffix to mark column names
drift_df.columns = ['Brand Name']+['drift_'+i for i in drift_df.columns[1:]]
ets_df.columns = ['Brand Name']+['ets_'+i for i in ets_df.columns[1:]]
poly_deg_2_df.columns = ['Brand Name']+['poly_'+i for i in poly_deg_2_df.columns[1:]]

In [208]:
# Making sure all the drug names and are in order
list(set(drift_df['Brand Name'].values).difference(ets_df['Brand Name'].values))
list(set(drift_df['Brand Name'].values).difference(poly_deg_2_df['Brand Name'].values))
list(set(poly_deg_2_df['Brand Name'].values).difference(ets_df['Brand Name'].values))

[]

[]

[]

In [209]:
df_list = [drift_df, ets_df, poly_deg_2_df]
avg_spending_dosage_unit_final_df = pd.DataFrame()
avg_spending_dosage_unit_final_df['Brand Name'] = drift_df['Brand Name']

def weighted_average(val):
    return np.average(val.values.tolist(), weights=weights)

# Generating predictions
for year in range(2026, 2032):
    col = "Average Spending Per Beneficiary_"+str(year)
    
    # Extract the predictions from all the approaches
    df_list = [drift_df[['Brand Name', 'drift_'+col]], 
               ets_df[['Brand Name', 'ets_'+col]], poly_deg_2_df[['Brand Name', 'poly_'+col]]]
    
    # Concatenate into a dataframe
    df_conc = pd.concat([drift_df[['drift_'+col]], 
                        ets_df[['ets_'+col]], poly_deg_2_df[['poly_'+col]]], axis=1)
    
    # Take weighted mean from all approaches 
    df_conc[col+'_mean'] = df_conc.apply(weighted_average, axis=1)
    avg_spending_dosage_unit_final_df[col+'_mean'] = df_conc[col+'_mean']
    
    df_conc.head()

Unnamed: 0,drift_Average Spending Per Beneficiary_2026,ets_Average Spending Per Beneficiary_2026,poly_Average Spending Per Beneficiary_2026,Average Spending Per Beneficiary_2026_mean
0,62.066954,80.122283,64.462921,64.830874
1,145.753973,90.297973,81.688784,114.582298
2,2253.563242,1025.434659,1625.398432,1879.48446
3,9412.319981,6587.074647,0.0,5364.867455
4,0.0,1365.392935,0.0,136.539294


Unnamed: 0,drift_Average Spending Per Beneficiary_2027,ets_Average Spending Per Beneficiary_2027,poly_Average Spending Per Beneficiary_2027,Average Spending Per Beneficiary_2027_mean
0,62.926613,81.644848,61.841724,64.364481
1,210.622855,93.969279,82.914997,147.874354
2,2460.43514,902.639647,1645.971434,1978.870108
3,13754.424845,6452.236294,0.0,7522.436052
4,0.0,1049.778327,0.0,104.977833


Unnamed: 0,drift_Average Spending Per Beneficiary_2028,ets_Average Spending Per Beneficiary_2028,poly_Average Spending Per Beneficiary_2028,Average Spending Per Beneficiary_2028_mean
0,64.473997,83.167412,58.787838,64.068875
1,337.117174,97.640585,83.862781,211.867758
2,2832.804558,779.844635,1679.843795,2166.32426
3,21859.687259,6317.39794,0.0,11561.583423
4,0.0,734.163719,0.0,73.416372


Unnamed: 0,drift_Average Spending Per Beneficiary_2029,ets_Average Spending Per Beneficiary_2029,poly_Average Spending Per Beneficiary_2029,Average Spending Per Beneficiary_2029_mean
0,67.362449,84.689977,55.301261,64.270727
1,595.37641,101.311891,84.532136,341.632248
2,3527.894137,657.049622,1727.015515,2520.458237
3,37619.91973,6182.559587,0.0,19428.215823
4,0.0,418.549111,0.0,41.854911


Unnamed: 0,drift_Average Spending Per Beneficiary_2030,ets_Average Spending Per Beneficiary_2030,poly_Average Spending Per Beneficiary_2030,Average Spending Per Beneficiary_2030_mean
0,72.974298,86.212542,51.381995,65.661201
1,1148.789057,104.983197,84.923061,618.862073
2,4878.353891,534.25461,1787.486593,3207.597044
3,69640.709512,6047.721233,0.0,35425.126879
4,0.0,102.934503,0.0,10.29345


Unnamed: 0,drift_Average Spending Per Beneficiary_2031,ets_Average Spending Per Beneficiary_2031,poly_Average Spending Per Beneficiary_2031,Average Spending Per Beneficiary_2031_mean
0,84.338291,87.735106,47.030039,69.754672
1,2393.967513,108.654503,85.035558,1241.86343
2,7613.034893,411.459597,1861.25703,4592.166218
3,137684.887799,5912.88288,0.0,69433.732188
4,0.0,0.0,0.0,0.0


### Generate predictions for year 2026

In [210]:
df_2026_bene = avg_spending_dosage_unit_final_df[['Brand Name', 'Average Spending Per Beneficiary_2026_mean']]\
                                .sort_values(by='Average Spending Per Beneficiary_2026_mean', ascending=False)

df_2026_bene['rank'] = df_2026_bene['Average Spending Per Beneficiary_2026_mean'].rank(method='dense', ascending=False)

df_2026_bene.to_csv("forecasted\\bene\\df_2026_bene.csv", index=False)

### Generate predictions for year 2027

In [211]:
df_2027_bene = avg_spending_dosage_unit_final_df[['Brand Name', 'Average Spending Per Beneficiary_2027_mean']]\
                                .sort_values(by='Average Spending Per Beneficiary_2027_mean', ascending=False)

df_2027_bene['rank'] = df_2027_bene['Average Spending Per Beneficiary_2027_mean'].rank(method='dense', ascending=False)

df_2027_bene.to_csv("forecasted\\bene\\df_2027_bene.csv", index=False)

### Generate predictions for year 2028

In [212]:
df_2028_bene = avg_spending_dosage_unit_final_df[['Brand Name', 'Average Spending Per Beneficiary_2028_mean']]\
                                .sort_values(by='Average Spending Per Beneficiary_2028_mean', ascending=False)

df_2028_bene['rank'] = df_2028_bene['Average Spending Per Beneficiary_2028_mean'].rank(method='dense', ascending=False)

df_2028_bene.to_csv("forecasted\\bene\\df_2028_bene.csv", index=False)

### Generate predictions for year 2029

In [213]:
df_2029_bene = avg_spending_dosage_unit_final_df[['Brand Name', 'Average Spending Per Beneficiary_2029_mean']]\
                                .sort_values(by='Average Spending Per Beneficiary_2029_mean', ascending=False)

df_2029_bene['rank'] = df_2029_bene['Average Spending Per Beneficiary_2029_mean'].rank(method='dense', ascending=False)

df_2029_bene.to_csv("forecasted\\bene\\df_2029_bene.csv", index=False)

### Generate predictions for year 2030

In [214]:
df_2030_bene = avg_spending_dosage_unit_final_df[['Brand Name', 'Average Spending Per Beneficiary_2030_mean']]\
                                .sort_values(by='Average Spending Per Beneficiary_2030_mean', ascending=False)

df_2030_bene['rank'] = df_2030_bene['Average Spending Per Beneficiary_2030_mean'].rank(method='dense', ascending=False)

df_2030_bene.to_csv("forecasted\\bene\\df_2030_bene.csv", index=False)

### Generate predictions for year 2031

In [215]:
df_2031_bene = avg_spending_dosage_unit_final_df[['Brand Name', 'Average Spending Per Beneficiary_2031_mean']]\
                                .sort_values(by='Average Spending Per Beneficiary_2031_mean', ascending=False)

df_2031_bene['rank'] = df_2031_bene['Average Spending Per Beneficiary_2031_mean'].rank(method='dense', ascending=False)

df_2031_bene.to_csv("forecasted\\bene\\df_2031_bene.csv", index=False)

# Feature: Total Spend

In [216]:
# Importing data
folder_path = 'total_spending\\'
drift_df = pd.read_csv(folder_path+"Drift_method_total_spend.csv")
ets_df = pd.read_csv(folder_path+"ETS_total_spend.csv")
poly_deg_2_df = pd.read_csv(folder_path+"poly_deg_2_total_spend.csv")

In [217]:
# Adding suffix to mark column names
drift_df.columns = ['Brand Name']+['drift_'+i for i in drift_df.columns[1:]]
ets_df.columns = ['Brand Name']+['ets_'+i for i in ets_df.columns[1:]]
poly_deg_2_df.columns = ['Brand Name']+['poly_'+i for i in poly_deg_2_df.columns[1:]]

In [220]:
# Making sure all the drug names and are in order
list(set(drift_df['Brand Name'].values).difference(ets_df['Brand Name'].values))
list(set(drift_df['Brand Name'].values).difference(poly_deg_2_df['Brand Name'].values))
list(set(poly_deg_2_df['Brand Name'].values).difference(ets_df['Brand Name'].values))

[]

[]

[]

In [221]:
df_list = [drift_df, ets_df, poly_deg_2_df]
avg_spending_dosage_unit_final_df = pd.DataFrame()
avg_spending_dosage_unit_final_df['Brand Name'] = drift_df['Brand Name']

def weighted_average(val):
    return np.average(val.values.tolist(), weights=weights)

# Generating predictions
for year in range(2026, 2032):
    col = "Total Spending_"+str(year)
    
    # Extract the predictions from all the approaches
    df_list = [drift_df[['Brand Name', 'drift_'+col]], 
               ets_df[['Brand Name', 'ets_'+col]], poly_deg_2_df[['Brand Name', 'poly_'+col]]]
    
    # Concatenate into a dataframe
    df_conc = pd.concat([drift_df[['drift_'+col]], 
                        ets_df[['ets_'+col]], poly_deg_2_df[['poly_'+col]]], axis=1)
    
    # Take weighted mean from all approaches 
    df_conc[col+'_mean'] = df_conc.apply(weighted_average, axis=1)
    avg_spending_dosage_unit_final_df[col+'_mean'] = df_conc[col+'_mean']
    
    df_conc.head()

Unnamed: 0,drift_Total Spending_2026,ets_Total Spending_2026,poly_Total Spending_2026,Total Spending_2026_mean
0,62558.380984,70201.234364,0.0,38299.313928
1,229594.651816,114690.142814,0.0,126266.34019
2,2195828.034504,7380912.325398,0.0,1836005.249792
3,53494.573941,1223450.633969,0.0,149092.350368
4,0.0,308110.618143,0.0,30811.061814


Unnamed: 0,drift_Total Spending_2027,ets_Total Spending_2027,poly_Total Spending_2027,Total Spending_2027_mean
0,56150.471476,70201.234364,0.0,35095.359174
1,339284.252952,114690.142814,0.0,181111.140757
2,0.0,7380912.325398,0.0,738091.23254
3,39752.653909,1223450.633969,0.0,142221.390351
4,0.0,308110.618143,0.0,30811.061814


Unnamed: 0,drift_Total Spending_2028,ets_Total Spending_2028,poly_Total Spending_2028,Total Spending_2028_mean
0,44616.234362,70201.234364,0.0,29328.240617
1,553178.975165,114690.142814,0.0,288058.501864
2,0.0,7380912.325398,0.0,738091.23254
3,14101.069848,1223450.633969,0.0,129395.598321
4,0.0,308110.618143,0.0,30811.061814


Unnamed: 0,drift_Total Spending_2029,ets_Total Spending_2029,poly_Total Spending_2029,Total Spending_2029_mean
0,23085.658415,70201.234364,0.0,18562.952644
1,989880.699685,114690.142814,0.0,506409.364124
2,0.0,7380912.325398,0.0,738091.23254
3,0.0,1223450.633969,0.0,122345.063397
4,0.0,308110.618143,0.0,30811.061814


Unnamed: 0,drift_Total Spending_2030,ets_Total Spending_2030,poly_Total Spending_2030,Total Spending_2030_mean
0,0.0,70201.234364,0.0,7020.123436
1,1925670.10937,114690.142814,0.0,974304.068967
2,0.0,7380912.325398,0.0,738091.23254
3,0.0,1223450.633969,0.0,122345.063397
4,0.0,308110.618143,0.0,30811.061814


Unnamed: 0,drift_Total Spending_2031,ets_Total Spending_2031,poly_Total Spending_2031,Total Spending_2031_mean
0,0.0,70201.234364,0.0,7020.123436
1,4031196.281162,114690.142814,0.0,2027067.154862
2,0.0,7380912.325398,0.0,738091.23254
3,0.0,1223450.633969,0.0,122345.063397
4,0.0,308110.618143,0.0,30811.061814


### Generate predictions for year 2026

In [222]:
df_2026_spend = avg_spending_dosage_unit_final_df[['Brand Name', 'Total Spending_2026_mean']]\
                                .sort_values(by='Total Spending_2026_mean', ascending=False)

df_2026_spend['rank'] = df_2026_spend['Total Spending_2026_mean'].rank(method='dense', ascending=False)

df_2026_spend.to_csv("forecasted\\spend\\df_2026_spend.csv", index=False)

### Generate predictions for year 2027

In [223]:
df_2027_spend = avg_spending_dosage_unit_final_df[['Brand Name', 'Total Spending_2027_mean']]\
                                .sort_values(by='Total Spending_2027_mean', ascending=False)

df_2027_spend['rank'] = df_2027_spend['Total Spending_2027_mean'].rank(method='dense', ascending=False)

df_2027_spend.to_csv("forecasted\\spend\\df_2027_spend.csv", index=False)

### Generate predictions for year 2028

In [224]:
df_2028_spend = avg_spending_dosage_unit_final_df[['Brand Name', 'Total Spending_2028_mean']]\
                                .sort_values(by='Total Spending_2028_mean', ascending=False)

df_2028_spend['rank'] = df_2028_spend['Total Spending_2028_mean'].rank(method='dense', ascending=False)

df_2028_spend.to_csv("forecasted\\spend\\df_2028_spend.csv", index=False)

### Generate predictions for year 2029

In [225]:
df_2029_spend = avg_spending_dosage_unit_final_df[['Brand Name', 'Total Spending_2029_mean']]\
                                .sort_values(by='Total Spending_2029_mean', ascending=False)

df_2029_spend['rank'] = df_2029_spend['Total Spending_2029_mean'].rank(method='dense', ascending=False)

df_2029_spend.to_csv("forecasted\\spend\\df_2029_spend.csv", index=False)

### Generate predictions for year 2030

In [226]:
df_2030_spend = avg_spending_dosage_unit_final_df[['Brand Name', 'Total Spending_2030_mean']]\
                                .sort_values(by='Total Spending_2030_mean', ascending=False)

df_2030_spend['rank'] = df_2030_spend['Total Spending_2030_mean'].rank(method='dense', ascending=False)

df_2030_spend.to_csv("forecasted\\spend\\df_2030_spend.csv", index=False)

### Generate predictions for year 2031

In [227]:
df_2031_spend = avg_spending_dosage_unit_final_df[['Brand Name', 'Total Spending_2031_mean']]\
                                .sort_values(by='Total Spending_2031_mean', ascending=False)

df_2031_spend['rank'] = df_2031_spend['Total Spending_2031_mean'].rank(method='dense', ascending=False)

df_2031_spend.to_csv("forecasted\\spend\\df_2031_spend.csv", index=False)