In [1]:
# Import libraries
import pandas as pd
import numpy as np
import itertools
from prophet import Prophet
from sklearn.model_selection import ParameterGrid
from sklearn.metrics import mean_squared_error, mean_absolute_error
from prophet.diagnostics import performance_metrics
from prophet.diagnostics import cross_validation
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from matplotlib.gridspec import GridSpec
import seaborn as sns
import statsmodels.api as sm
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import month_plot, quarter_plot
from prophet.plot import plot_plotly, plot_components_plotly
from prophet.utilities import regressor_coefficients
import utils
from prophet.plot import plot_cross_validation_metric
from prophet.plot import add_changepoints_to_plot
from prophet.serialize import model_to_json, model_from_json
from datetime import datetime, timedelta

In [2]:
def MAPE(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [3]:
# Get Data
drv_data = pd.read_csv('/Users/didi/Documents/Short Term Forecast/Weekly Folder/ssl_weekly_drv_data.csv',encoding='cp1252')
pax_data = pd.read_csv('/Users/didi/Documents/Short Term Forecast/Weekly Folder/ssl_weekly_pax_data.csv',encoding='cp1252')
#eyeballs = pd.read_csv('/Users/didi/Documents/Short Term Forecast/eyeball_data_ssl_full2022.csv' ,encoding='cp1252')
burn = pd.read_csv('/Users/didi/Documents/Short Term Forecast/Weekly Folder/ssl_weekly_burn_data.csv')

holidays = pd.read_csv('/Users/didi/Documents/Short Term Forecast/Weekly Folder/SSL Central Ops - List of Holidays - weekly.csv')
paychecks = pd.read_csv('/Users/didi/Documents/Short Term Forecast/Weekly Folder/ssl_paycheck_dates_full_weekly.csv')
dates = pd.read_csv('/Users/didi/Documents/Short Term Forecast/Weekly Folder/SSL calendar weekly.csv')
burn['driver_income'].astype('float')

#pax_data.columns
paychecks = paychecks.dropna()
paychecks

Unnamed: 0,country_code,year_calendar_week,date,paycheck_day
0,AR,2020/53,2020-12-28,1.0
1,AR,2021/01,2021-01-04,0.0
2,AR,2021/02,2021-01-11,0.0
3,AR,2021/03,2021-01-18,0.0
4,AR,2021/04,2021-01-25,0.0
...,...,...,...,...
2353,PE,2025/49,2025-12-01,0.0
2354,PE,2025/50,2025-12-08,0.0
2355,PE,2025/51,2025-12-15,1.0
2356,PE,2025/52,2025-12-22,0.0


In [4]:

exchange_rates = {
    'MX': 0.054,
    'AR': 0.0053,
    'PE': 0.26,
    'CR': 0.0018,
    'CO': 0.00021,
    'CL': 0.0013,
    'DO': 0.018,
    'PA': 1,
    'EC': 1
}
burn_poc = burn[(burn.city_id == 101)]
burn = burn[(burn.city_id != 101)]
#burn_poc[['gmv', 'pax_eng_burn', 'pax_aqc_burn', 'drv_eng_burn', 'drv_acq_burn', 'driver_income','ipk']] *= burn_poc['country_code'].map(exchange_rates)

def convert_to_usd(row):
    rate = exchange_rates[row['country_code']]
    row['gmv'] *= rate
    row['pax_eng_burn'] *= rate
    row['pax_aqc_burn'] *= rate
    row['drv_eng_burn'] *= rate
    row['drv_acq_burn'] *= rate
    row['driver_income'] *= rate
    row['ipk'] *= rate
    return row

burn_poc = burn_poc.apply(convert_to_usd, axis=1)

drv_data_poc = drv_data[drv_data.city_id == 101]
drv_data = drv_data[drv_data.city_id != 101]

pax_data_poc = pax_data[pax_data.city_id == 101]
pax_data = pax_data[pax_data.city_id != 101]

drv_data_poc.country_code[drv_data_poc.city_id == 101] = 'MX'
pax_data_poc.country_code[pax_data_poc.city_id == 101] = 'MX'
burn_poc.country_code[burn_poc.city_id == 101] = 'MX'
drv_data_poc = (
    drv_data_poc.groupby(['current_stat_date', 'year_calendar_week', 'country_code', 'city_id', 'city_name'])
    .agg({
    'dfsh_cnt':'sum',
    'dfsh_online_time':'sum',
    'dfsh_shpd':'mean',
    'retained_drv_cnt':'sum',
    'retained_drv_online_time':'sum',
    'retained_drv_shpd':'mean',
    'active_drv_cnt':'sum',
    'active_drv_online_time':'sum',
    'active_drv_shpd':'mean'
    })
    .reset_index()
    )

pax_data_poc = (
    pax_data_poc.groupby(['current_stat_date', 'year_calendar_week', 'country_code', 'city_name','city_id'])
    .agg({
    'pfc_pax_cnt':'sum',
    'pfc_calls':'sum',
    'pfc_cpp':'mean',
    'retained_pax_cnt':'sum',
    'retained_calls':'sum',
    'retained_cpp':'mean',
    'active_pax_cnt':'sum',
    'active_pax_calls':'sum',
    'active_pax_cpp':'mean',
    'rides':'sum' 
    })
    .reset_index()
    )
    
burn_poc = (
    burn_poc.groupby(['city_id', 'year_calendar_week', 'current_stat_date', 'country_code'])
    .agg({
    'gmv':'sum',
    'pax_eng_burn':'sum',
    'pax_aqc_burn':'sum',
    'drv_eng_burn':'sum',
    'drv_acq_burn':'sum',
    'driver_income':'sum',
    'ipk': 'mean',
    'driver_income_pct':'mean'
   
    })
    .reset_index()
    )

drv_data_poc['dfsh_shpd'] = drv_data_poc.dfsh_online_time / drv_data_poc.dfsh_cnt
drv_data_poc['retained_drv_shpd'] = drv_data_poc.retained_drv_online_time / drv_data_poc.retained_drv_cnt

pax_data_poc['pfc_cpp'] = pax_data_poc.pfc_calls / pax_data_poc.pfc_pax_cnt
pax_data_poc['retained_cpp'] =  pax_data_poc.retained_calls / pax_data_poc.retained_pax_cnt

burn_poc['driver_income_pct'] = burn_poc.driver_income / burn_poc.gmv

drv_data = pd.concat([drv_data, drv_data_poc])
pax_data = pd.concat([pax_data, pax_data_poc])
burn = pd.concat([burn, burn_poc])

drv_data

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  burn_poc.country_code[burn_poc.city_id == 101] = 'MX'


Unnamed: 0,current_stat_date,year_calendar_week,country_code,city_id,city_name,dfsh_cnt,dfsh_online_time,dfsh_shpd,retained_drv_cnt,retained_drv_online_time,retained_drv_shpd,active_drv_cnt,active_drv_online_time,active_drv_shpd
472,2020-12-28,2021/01,MX,52010100.0,Aguascalientes,18.0,92.700000,5.150000,1438.0,14268.433330,9.922415,1456.0,14361.133330,9.863416
473,2021-01-04,2021/02,MX,52010100.0,Aguascalientes,21.0,462.316667,22.015079,1827.0,37297.650000,20.414696,1848.0,37759.966670,20.432882
474,2021-01-11,2021/03,MX,52010100.0,Aguascalientes,20.0,167.383333,8.369167,1793.0,38637.833330,21.549266,1813.0,38805.216670,21.403870
475,2021-01-18,2021/04,MX,52010100.0,Aguascalientes,54.0,745.683333,13.808951,1794.0,37656.800000,20.990412,1848.0,38402.483330,20.780565
476,2021-01-25,2021/05,MX,52010100.0,Aguascalientes,49.0,835.416667,17.049320,1813.0,37694.766670,20.791377,1862.0,38530.183330,20.692902
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
111,2023-02-13,2023/08,MX,101.0,POC,552.0,4546.566667,8.236534,18019.0,226245.683330,12.555951,18571.0,230792.250003,11.428369
112,2023-02-20,2023/09,MX,101.0,POC,609.0,4813.883333,7.904570,18402.0,233744.950000,12.702149,19011.0,238558.833333,11.741407
113,2023-02-27,2023/10,MX,101.0,POC,741.0,5760.466667,7.773909,19035.0,249877.366667,13.127259,19776.0,255637.833333,12.032275
114,2023-03-06,2023/11,MX,101.0,POC,973.0,8208.600000,8.436382,20305.0,273093.400000,13.449564,21278.0,281302.000000,12.497968


In [5]:
# clean burn data
burn = burn[burn['city_id'].notnull()]
burn.city_id = burn.city_id.apply(np.int64)
burn.pax_aqc_burn = np.where(burn.pax_aqc_burn < 0,0,burn.pax_aqc_burn)
burn.drv_acq_burn = np.where(burn.drv_acq_burn < 0,0,burn.drv_acq_burn)
burn.pax_eng_burn = np.where(burn.pax_eng_burn < 0,0,burn.pax_eng_burn)
burn.drv_eng_burn = np.where(burn.drv_eng_burn < 0,0,burn.drv_eng_burn)
burn.gmv = np.where(burn.gmv < 0,0,burn.gmv)

burn.pax_aqc_burn = np.where(burn.pax_aqc_burn.isnull(),0,burn.pax_aqc_burn)
burn.drv_acq_burn = np.where(burn.drv_acq_burn.isnull(),0,burn.drv_acq_burn)
burn.pax_eng_burn = np.where(burn.pax_eng_burn.isnull(),0,burn.pax_eng_burn)
burn.drv_eng_burn = np.where(burn.drv_eng_burn.isnull(),0,burn.drv_eng_burn)
burn = burn[burn.city_id != -99999]
burn= burn.assign(pax_acq_burn_pct = burn.pax_aqc_burn/ burn.gmv
                  ,drv_acq_burn_pct = burn.drv_acq_burn/ burn.gmv
                  ,pax_eng_burn_pct = burn.pax_eng_burn/ burn.gmv
                  ,drv_eng_burn_pct = burn.drv_eng_burn/ burn.gmv)

burn.pax_acq_burn_pct = np.where(burn.pax_acq_burn_pct.isnull(),0,burn.pax_acq_burn_pct)
burn.drv_acq_burn_pct = np.where(burn.drv_acq_burn_pct.isnull(),0,burn.drv_acq_burn_pct)
burn.pax_eng_burn_pct = np.where(burn.pax_eng_burn_pct.isnull(),0,burn.pax_eng_burn_pct)
burn.drv_eng_burn_pct = np.where(burn.drv_eng_burn_pct.isnull(),0,burn.drv_eng_burn_pct)

# Merge pax and driver data
df1 = (pax_data
        .merge(drv_data, how='left', on = ['current_stat_date','city_id','year_calendar_week', 'country_code'])
        .merge(burn, how='left', on = ['current_stat_date','city_id', 'country_code','year_calendar_week'], suffixes = (None, '_y'))
        .drop(columns=['city_name_y'])
        .rename(columns = {'city_name_x':'city_name'})
    )
# Transfrom to Datetime Format
df1.current_stat_date = pd.to_datetime(df1.current_stat_date, format = '%Y-%m-%d')

# Transfrom to Datetime Format and drop columns
paychecks.date = pd.to_datetime(paychecks.date, format= '%Y-%m-%d')
paychecks.drop(columns = ['year_calendar_week'], inplace = True)
dates.date_value = pd.to_datetime(dates.date_value,  format='%Y-%m-%d')



# Final DataFrame with paycheck
master =(df1.merge(paychecks, how = 'left', left_on = ['current_stat_date', 'country_code'], right_on = ['date', 'country_code'], suffixes= (None, None))
      .drop(columns = ['year_calendar_week', 'city_name', 'date'])
      .sort_values('current_stat_date', ascending = True))


date_pred =  pd.DataFrame(dates, columns = ['date_value', 'rank'])
print(master.columns)

master['ipt'] = master['driver_income'] / master['rides']
master['ipt_standardized'] = master.groupby('country_code')['ipt'].apply(lambda x: (x - x.mean()) / x.std())

master = master.replace([np.inf, -np.inf], 0)
master = master.fillna(0)

# Remove Data errors
mapped_cities = {'country_code': ['MX','MX','MX','MX','MX','MX','MX','MX','MX','MX','MX','MX','MX','MX','MX','MX','MX','MX','MX','DO','EC','CO','CR','CL','CO','DO','CL','AR','EC','CO',  'MX','DO','EC','CO','CR','CL','AR'],
                 'city_id': [52090100,52190500,52140500,52210400,52151600,52310300,52220400,52020200,52020100,52080800,52110500,52170200,52080200,52250100,52010100,52260200,52280300,52250300,52050400,809230100,593071200,57330100,506070100,56513800,57380100,809190100,56650800,54052300,593080100,57010100,101,101,101,101,101,101,101]}

mapped_cities = pd.DataFrame.from_dict(mapped_cities)
master = master.merge(mapped_cities, how = 'inner', on = ['country_code', 'city_id'])

master.to_csv('ssl_full_procssed_data_weekly.csv', index = False)

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  master['ipt_standardized'] = master.groupby('country_code')['ipt'].apply(lambda x: (x - x.mean()) / x.std())


Index(['current_stat_date', 'country_code', 'city_id', 'pfc_pax_cnt',
       'pfc_calls', 'pfc_cpp', 'retained_pax_cnt', 'retained_calls',
       'retained_cpp', 'active_pax_cnt', 'active_pax_calls', 'active_pax_cpp',
       'rides', 'dfsh_cnt', 'dfsh_online_time', 'dfsh_shpd',
       'retained_drv_cnt', 'retained_drv_online_time', 'retained_drv_shpd',
       'active_drv_cnt', 'active_drv_online_time', 'active_drv_shpd', 'gmv',
       'pax_eng_burn', 'pax_aqc_burn', 'drv_eng_burn', 'drv_acq_burn',
       'driver_income', 'driver_income_pct', 'ipk', 'rides_y', 'ride_km',
       'pax_acq_burn_pct', 'drv_acq_burn_pct', 'pax_eng_burn_pct',
       'drv_eng_burn_pct', 'paycheck_day'],
      dtype='object')


In [6]:
# Initialize parameter

# Test Days
testweeks = 4

# Train limit
master = master[master.current_stat_date <= '2023-03-19']

# Horizon for MAPE
horizon = "28 days"
period = "60 days"

# Forecast Days after training specified date
forecast = 52

# Output boundaries
start_date = "2022-01-01"
end_date = "2023-12-31"

# Cities to model
city_list = [           

# 52090100
# ,52190500
# ,52140500
# ,52210400
# ,52151600
# ,52310300
# ,52220400
# ,52020200
# ,52020100
# ,52080800
# ,52110500
# ,52170200
# ,52080200
# ,52250100
52010100
,52260200
,52280300
,52250300
,52050400
,809230100
#,593071200 # Guayaquil
,57330100
,506070100
,56513800
,57380100
#,809190100 #Santiago
,56650800
,54052300
,593080100
,57010100
,101 # POC
] # --> CDMX  52090100


In [7]:
city_df= pd.DataFrame(drv_data, columns=['city_id', 'country_code'])
city_df = city_df.drop_duplicates()
city_df = city_df[city_df.city_id.isin(city_list)]
city_country = pd.Series(city_df.country_code.values, index=city_df.city_id).to_dict()
city_country

{52010100.0: 'MX',
 52050400.0: 'MX',
 52250300.0: 'MX',
 52260200.0: 'MX',
 52280300.0: 'MX',
 54052300.0: 'AR',
 56513800.0: 'CL',
 56650800.0: 'CL',
 57010100.0: 'CO',
 57330100.0: 'CO',
 57380100.0: 'CO',
 506070100.0: 'CR',
 593080100.0: 'EC',
 809230100.0: 'DO',
 101.0: 'MX'}

In [8]:
master.current_stat_date.min()

Timestamp('2020-12-28 00:00:00')

In [9]:
market = []

for i,j in city_country.items():

    # Mater File of Predicted PAX Variables
    master_pfc_df = (pd.DataFrame(
        data = master, columns=['current_stat_date', 'pfc_pax_cnt', 'paycheck_day', 'city_id','pax_acq_burn_pct'])
        .rename(columns = {'current_stat_date': 'ds','pfc_pax_cnt': 'y' }))
    master_pfc_df.pax_acq_burn_pct = np.where(master_pfc_df.pax_acq_burn_pct.isnull(),0,master_pfc_df.pax_acq_burn_pct)
    master_pfc_df = master_pfc_df[(master_pfc_df.city_id ==i)].drop(columns = 'city_id')

    master_pax_df = (pd.DataFrame(
        data = master, columns=['current_stat_date', 'retained_pax_cnt', 'paycheck_day', 'city_id','pax_eng_burn_pct'])
        .rename(columns = {'current_stat_date': 'ds','retained_pax_cnt': 'y' }))
    master_pax_df.pax_eng_burn_pct = np.where(master_pax_df.pax_eng_burn_pct.isnull(),0,master_pax_df.pax_eng_burn_pct)
    master_pax_df = master_pax_df[(master_pax_df.city_id == i)].drop(columns = 'city_id')

    # Mater File of Predicted DRV Variables
    master_dfsh_df = (pd.DataFrame(
        data = master, columns=['current_stat_date', 'dfsh_cnt', 'paycheck_day', 'city_id','drv_acq_burn_pct'])
        .rename(columns = {'current_stat_date': 'ds','dfsh_cnt': 'y' }))
    master_dfsh_df.drv_acq_burn_pct = np.where(master_dfsh_df.drv_acq_burn_pct.isnull(),0,master_dfsh_df.drv_acq_burn_pct)
    master_dfsh_df = master_dfsh_df[(master_dfsh_df.city_id ==i)].drop(columns = 'city_id')

    master_drv_df = (pd.DataFrame(
        data = master, columns=['current_stat_date', 'retained_drv_cnt', 'paycheck_day', 'city_id','drv_eng_burn_pct','ipt_standardized','ipt'])
        .rename(columns = {'current_stat_date': 'ds','retained_drv_cnt': 'y' }))
    #master_drv_df.drv_eng_burn_pct = np.where(master_drv_df.drv_eng_burn_pct.isnull(),0,master_drv_df.drv_eng_burn_pct)
    master_drv_df = master_drv_df[(master_drv_df.city_id ==i)].drop(columns = 'city_id')

    # Master Holidays File
    master_holidays = holidays[holidays.country_code == j]

    # Holidays Modelling
    master_holidays.date_value = pd.to_datetime(master_holidays.date_value, format='%Y-%m-%d')
    master_holidays = master_holidays.drop(columns = ['new_year_calendar_week', 'subregion', 'year', 'month', 'day', 'date_id', 'date'])
    # Holiday Dataframe
    master_holidays = pd.DataFrame({'holiday': master_holidays.holiday_name, 'ds': master_holidays.date_value, 'lower_window': 0, 'upper_window':0})
    

    # Slicing Datasets for PAX
    pfc_training_set = master_pfc_df.iloc[:-testweeks,:]
    pfc_test_set = master_pfc_df.iloc[-testweeks:,:]

    pax_training_set = master_pax_df.iloc[:-testweeks,:]
    pax_test_set = master_pax_df.iloc[-testweeks:,:]

    # Slicing Datasets for DRV
    dfsh_training_set = master_dfsh_df.iloc[:-testweeks,:]
    dfsh_test_set = master_dfsh_df.iloc[-testweeks:,:]

    drv_training_set = master_drv_df.iloc[:-testweeks,:]
    drv_test_set = master_drv_df.iloc[-testweeks:,:]

    # Mater File of Predicted PAX Variables
    pfc_freq_df = (pd.DataFrame(
        data = master, columns=['current_stat_date','city_id', 'pfc_cpp','pfc_pax_cnt','pfc_calls'])
        .rename(columns = {'current_stat_date': 'ds'}))
    pfc_freq_df = pfc_freq_df[(pfc_freq_df.city_id == i)]

    pax_freq_df = (pd.DataFrame(
        data = master, columns=['current_stat_date','city_id','retained_cpp','retained_pax_cnt','retained_calls' ])
        .rename(columns = {'current_stat_date': 'ds'}))
    pax_freq_df = pax_freq_df[(pax_freq_df.city_id == i)]

    # Mater File of Predicted DRV Variables
    dfsh_freq_df = (pd.DataFrame(
        data = master, columns=['current_stat_date','city_id','dfsh_shpd','dfsh_cnt', 'dfsh_online_time'])
        .rename(columns = {'current_stat_date': 'ds'}))
    dfsh_freq_df = dfsh_freq_df[(dfsh_freq_df.city_id ==i)]

    drv_freq_df = (pd.DataFrame(
        data = master, columns=['current_stat_date','city_id', 'retained_drv_shpd','retained_drv_cnt', 'retained_drv_online_time'])
        .rename(columns = {'current_stat_date': 'ds'}))
    drv_freq_df = drv_freq_df[(drv_freq_df.city_id == i)]

    #Creating frequency datasets with dates


    pfc_freq_df = dates.merge(pfc_freq_df, how = 'left', left_on = 'date_value', right_on = 'ds')
    pfc_freq_df = pfc_freq_df[(pfc_freq_df.date_value >= '2020-09-01')&(pfc_freq_df.date_value <= '2023-12-31')]

    pax_freq_df = dates.merge(pax_freq_df, how = 'left', left_on = 'date_value', right_on = 'ds')
    pax_freq_df = pax_freq_df[(pax_freq_df.date_value >= '2020-09-01')&(pax_freq_df.date_value <= '2023-12-31')]

    dfsh_freq_df = dates.merge(dfsh_freq_df, how = 'left', left_on = 'date_value', right_on = 'ds')
    dfsh_freq_df = dfsh_freq_df[(dfsh_freq_df.date_value >= '2020-09-01')&(dfsh_freq_df.date_value <= '2023-12-31')]

    drv_freq_df = dates.merge(drv_freq_df, how = 'left', left_on = 'date_value', right_on = 'ds')
    drv_freq_df = drv_freq_df[(drv_freq_df.date_value >= '2020-09-01')&(drv_freq_df.date_value <= '2023-12-31')]

    # Left join with prev year and weeks

    # PFC frequency DF Joins
    pfc_freq_df['last_year'] = pfc_freq_df['rank'] +52

    pfc_freq_df['last_week'] = pfc_freq_df['rank'] +1
    pfc_freq_df['last2_week'] = pfc_freq_df['rank'] +2
    pfc_freq_df['last3_week'] = pfc_freq_df['rank'] +3
    pfc_freq_df['last4_week'] = pfc_freq_df['rank'] +4

    pfc_freq_final = (
        pfc_freq_df.merge(pfc_freq_df, how = 'left', left_on = [ 'rank'], right_on = ['last_year'],suffixes=( None, '_ly'))
        .merge(pfc_freq_df, how = 'left', left_on = [ 'rank'], right_on = [ 'last_week'],suffixes=( None, '_lw'))
        .merge(pfc_freq_df, how = 'left', left_on = [ 'rank'], right_on = [ 'last2_week'],suffixes=( None, '_l2w'))
        .merge(pfc_freq_df, how = 'left', left_on = [ 'rank'], right_on = [ 'last3_week'],suffixes=( None, '_l3w'))
        .merge(pfc_freq_df, how = 'left', left_on = [ 'rank'], right_on = ['last4_week'],suffixes=( None, '_l4w'))
        )
    pfc_freq_final = pd.DataFrame(pfc_freq_final, columns=['rank','year','year_calendar_week','calendar_week','date_value','city_id', 'pfc_cpp', 'pfc_cpp_ly', 'pfc_cpp_lw', 'pfc_cpp_l2w', 'pfc_cpp_l3w', 'pfc_cpp_l4w'])

    # Pax frequency DF Joins
    pax_freq_df['last_year'] = pax_freq_df['rank']+52

    pax_freq_df['last_week'] = pax_freq_df['rank'] +1
    pax_freq_df['last2_week'] = pax_freq_df['rank'] +2
    pax_freq_df['last3_week'] = pax_freq_df['rank'] +3
    pax_freq_df['last4_week'] = pax_freq_df['rank'] +4

    pax_freq_final = (
        pax_freq_df.merge(pax_freq_df, how = 'left', left_on = [ 'rank'], right_on = ['last_year'],suffixes=( None, '_ly'))
        .merge(pax_freq_df, how = 'left', left_on = [ 'rank'], right_on = ['last_week'],suffixes=( None, '_lw'))
        .merge(pax_freq_df, how = 'left', left_on = [ 'rank'], right_on = ['last2_week'],suffixes=( None, '_l2w'))
        .merge(pax_freq_df, how = 'left', left_on = ['rank'], right_on = ['last3_week'],suffixes=( None, '_l3w'))
        .merge(pax_freq_df, how = 'left', left_on = [ 'rank'], right_on = ['last4_week'],suffixes=( None, '_l4w'))
        )
    pax_freq_final = pd.DataFrame(pax_freq_final, columns=['rank','year','year_calendar_week','calendar_week','date_value','city_id', 'retained_cpp', 'retained_cpp_ly', 'retained_cpp_lw','retained_cpp_l2w', 'retained_cpp_l3w','retained_cpp_l4w'])

    # DFSH frequency DF Joins
    dfsh_freq_df['last_year'] = dfsh_freq_df['rank'] +52

    dfsh_freq_df['last_week'] = dfsh_freq_df['rank'] +1
    dfsh_freq_df['last2_week'] = dfsh_freq_df['rank'] +2
    dfsh_freq_df['last3_week'] = dfsh_freq_df['rank'] +3
    dfsh_freq_df['last4_week'] = dfsh_freq_df['rank'] +4

    dfsh_freq_final = (
        dfsh_freq_df.merge(dfsh_freq_df, how = 'left', left_on = ['rank'], right_on = ['last_year'],suffixes=( None, '_ly'))
        .merge(dfsh_freq_df, how = 'left', left_on = [ 'rank'], right_on = ['last_week'],suffixes=( None, '_lw'))
        .merge(dfsh_freq_df, how = 'left', left_on = [ 'rank'], right_on = ['last2_week'],suffixes=( None, '_l2w'))
        .merge(dfsh_freq_df, how = 'left', left_on = [ 'rank'], right_on = ['last3_week'],suffixes=( None, '_l3w'))
        .merge(dfsh_freq_df, how = 'left', left_on = [ 'rank'], right_on = ['last4_week'],suffixes=( None, '_l4w'))
        )
    dfsh_freq_final = pd.DataFrame(dfsh_freq_final, columns=['rank','year','year_calendar_week','calendar_week','date_value','city_id', 'dfsh_shpd', 'dfsh_shpd_ly', 'dfsh_shpd_lw','dfsh_shpd_l2w', 'dfsh_shpd_l3w','dfsh_shpd_l4w'])

    # DRV frequency DF Joins
    drv_freq_df['last_year'] = drv_freq_df['rank'] +52

    drv_freq_df['last_week'] = drv_freq_df['rank'] +1
    drv_freq_df['last2_week'] = drv_freq_df['rank'] +2
    drv_freq_df['last3_week'] = drv_freq_df['rank'] +3
    drv_freq_df['last4_week'] = drv_freq_df['rank'] +4

    drv_freq_final = (
        drv_freq_df.merge(drv_freq_df, how = 'left', left_on = ['rank'], right_on = ['last_year'],suffixes=( None, '_ly'))
        .merge(drv_freq_df, how = 'left', left_on = ['rank'], right_on = ['last_week'],suffixes=( None, '_lw'))
        .merge(drv_freq_df, how = 'left', left_on = ['rank'], right_on = ['last2_week'],suffixes=( None, '_l2w'))
        .merge(drv_freq_df, how = 'left', left_on = ['rank'], right_on = ['last3_week'],suffixes=( None, '_l3w'))
        .merge(drv_freq_df, how = 'left', left_on = ['rank'], right_on = ['last4_week'],suffixes=( None, '_l4w'))
        )
    drv_freq_final = pd.DataFrame(drv_freq_final
                    , columns=['rank','year','year_calendar_week','calendar_week','date_value','city_id', 'retained_drv_shpd', 'retained_drv_shpd_ly', 'retained_drv_shpd_lw','retained_drv_shpd_l2w'
                    , 'retained_drv_shpd_l3w','retained_drv_shpd_l4w','last_week', 'last_2week', 'last_3week', 'last_4week' ])

    pfc_freq_final['pfc_cpp_lweeks'] = (pfc_freq_final.pfc_cpp_lw
                                    + pfc_freq_final.pfc_cpp_l2w
                                    + pfc_freq_final.pfc_cpp_l3w
                                    + pfc_freq_final.pfc_cpp_l4w ) / 4


    pax_freq_final['retained_cpp_lweeks'] = (pax_freq_final.retained_cpp_lw
                                    + pax_freq_final.retained_cpp_l2w
                                    + pax_freq_final.retained_cpp_l3w
                                    + pax_freq_final.retained_cpp_l4w ) / 4

    dfsh_freq_final['dfsh_shpd_lweeks'] = (dfsh_freq_final.dfsh_shpd_lw
                                    + dfsh_freq_final.dfsh_shpd_l2w
                                    + dfsh_freq_final.dfsh_shpd_l3w
                                    + dfsh_freq_final.dfsh_shpd_l4w ) / 4

    drv_freq_final['retained_drv_shpd_lweeks'] = (drv_freq_final.retained_drv_shpd_lw
                                    + drv_freq_final.retained_drv_shpd_l2w
                                    + drv_freq_final.retained_drv_shpd_l3w
                                    + drv_freq_final.retained_drv_shpd_l4w ) / 4

    pfc_freq_final = pd.DataFrame(pfc_freq_final, columns = ['rank', 'year', 'year_calendar_week', 'calendar_week', 'date_value', 'city_id', 'pfc_cpp_ly', 'pfc_cpp_lweeks', 'pfc_cpp'])
    pfc_freq_final['pfc_cpp_lweeks'].ffill(axis=0,inplace=True)
    pfc_freq_final['city_id'].ffill(axis=0,inplace=True)
    pfc_freq_final['pfc_cpp_pred'] = (pfc_freq_final.pfc_cpp_lweeks*0.35+pfc_freq_final.pfc_cpp_ly*0.65)
    pfc_freq_final['pfc_cpp_pred'] =np.where(pfc_freq_final.pfc_cpp_pred.isnull(),pfc_freq_final.pfc_cpp,pfc_freq_final.pfc_cpp_pred)

    pax_freq_final = pd.DataFrame(pax_freq_final, columns = ['rank', 'year', 'year_calendar_week', 'calendar_week', 'date_value', 'city_id', 'retained_cpp_ly', 'retained_cpp_lweeks','retained_cpp' ])
    pax_freq_final['retained_cpp_lweeks'].ffill(axis=0,inplace=True)
    pax_freq_final['city_id'].ffill(axis=0,inplace=True)
    pax_freq_final['retained_cpp_pred'] = (pax_freq_final.retained_cpp_lweeks*0.3+pax_freq_final.retained_cpp_ly*0.7)
    pax_freq_final['retained_cpp_pred'] = np.where(pax_freq_final.retained_cpp_pred.isnull(),pax_freq_final.retained_cpp,pax_freq_final.retained_cpp_pred)

    dfsh_freq_final = pd.DataFrame(dfsh_freq_final, columns = ['rank', 'year', 'year_calendar_week', 'calendar_week', 'date_value', 'city_id', 'dfsh_shpd_ly', 'dfsh_shpd_lweeks','dfsh_shpd'])
    dfsh_freq_final['dfsh_shpd_lweeks'].ffill(axis=0,inplace=True)
    dfsh_freq_final['city_id'].ffill(axis=0,inplace=True)
    dfsh_freq_final['dfsh_shpd_pred'] = (dfsh_freq_final.dfsh_shpd_lweeks*0.4+dfsh_freq_final.dfsh_shpd_ly*0.6)
    dfsh_freq_final['dfsh_shpd_pred'] = np.where(dfsh_freq_final.dfsh_shpd_pred.isnull(),dfsh_freq_final.dfsh_shpd, dfsh_freq_final.dfsh_shpd_pred)

    drv_freq_final = pd.DataFrame(drv_freq_final, columns = ['rank', 'year', 'year_calendar_week', 'calendar_week', 'date_value', 'city_id', 'retained_drv_shpd_ly', 'retained_drv_shpd_lweeks','retained_drv_shpd'])
    drv_freq_final['retained_drv_shpd_lweeks'].ffill(axis=0,inplace=True)
    drv_freq_final['city_id'].ffill(axis=0,inplace=True)
    drv_freq_final['retained_drv_shpd_pred'] = (drv_freq_final.retained_drv_shpd_lweeks*0.5+drv_freq_final.retained_drv_shpd_ly*0.5)
    drv_freq_final['retained_drv_shpd_pred'] = np.where(drv_freq_final.retained_drv_shpd_pred.isnull(),drv_freq_final.retained_drv_shpd,drv_freq_final.retained_drv_shpd_pred)

    frequency_df = (pfc_freq_final.merge(pax_freq_final, how = 'left', on = 'date_value', suffixes = (None, '_x'))
                    .merge(dfsh_freq_final, how = 'left', on = 'date_value', suffixes = (None, '_y'))
                    .merge(drv_freq_final, how = 'left', on = 'date_value', suffixes = (None, '_z'))
                    )
    frequency_df = pd.DataFrame(frequency_df, columns = ['rank', 'year', 'year_calendar_week', 'calendar_week', 'date_value', 'city_id','pfc_cpp_pred','retained_cpp_pred','dfsh_shpd_pred','retained_drv_shpd_pred','pfc_cpp','retained_cpp','dfsh_shpd','retained_drv_shpd'])
    
    # Parameter Grid
    param_grid = {
        'seasonality_prior_scale':   [0.01,0.5,5,10]
        , 'changepoint_prior_scale':  [0.001,0.1,0.3,0.05]
        , 'holidays_prior_scale': [0.01,0.5,5,10]
        , 'seasonality_mode': ['additive', 'multiplicative']
        }
    grid = ParameterGrid(param_grid)

    # Hyperparameter tuning PFC
    pfc_mape = []
    for params in grid: 
        #build model
        pfc_m = (Prophet(
            growth = 'linear'
            , yearly_seasonality = True
            , weekly_seasonality = True
            , daily_seasonality = False
            , seasonality_mode = params['seasonality_mode']
            , holidays = master_holidays
            , seasonality_prior_scale = params['seasonality_prior_scale']
            , holidays_prior_scale = params['holidays_prior_scale']
            , changepoint_prior_scale=  params['changepoint_prior_scale']
            )
        )
        pfc_m.add_regressor('pax_acq_burn_pct')
        pfc_m.add_regressor('paycheck_day')
        pfc_m.fit(pfc_training_set)
        #cross-validation
        pfc_cv = cross_validation(pfc_m, horizon = horizon, initial = '366 days', period = period,  parallel= 'processes')
        #gather results
        pfc_error = MAPE(pfc_cv['y'], pfc_cv['yhat'])
        
        pfc_mape.append(pfc_error)
    # Hyperparameter tuning Retained Pax
    pax_mape = []
    for params in grid: 
        #build model
        pax_m = (Prophet(
            growth = 'linear'
            , yearly_seasonality = True
            , weekly_seasonality = True
            , daily_seasonality = False
            , seasonality_mode = params['seasonality_mode']
            , holidays = master_holidays
            , seasonality_prior_scale = params['seasonality_prior_scale']
            , holidays_prior_scale = params['holidays_prior_scale']
            , changepoint_prior_scale=  params['changepoint_prior_scale']
            )
        )
        pax_m.add_regressor('pax_eng_burn_pct')
        pax_m.add_regressor('paycheck_day')
        pax_m.fit(pax_training_set)
        #cross-validation
        pax_cv = cross_validation(pax_m, horizon = horizon, initial = '366 days',period = period, parallel= 'processes')
        #gather results
        pax_error = MAPE(pax_cv['y'], pax_cv['yhat'])
        
        pax_mape.append(pax_error)
        
    # Hyperparameter tuning DFSH
    dfsh_mape = []
    for params in grid: 
        #build model
        dfsh_m = (Prophet(
            growth = 'linear'
            , yearly_seasonality = True
            , weekly_seasonality = True
            , daily_seasonality = False
            , seasonality_mode = params['seasonality_mode']
            , holidays = master_holidays
            , seasonality_prior_scale = params['seasonality_prior_scale']
            , holidays_prior_scale = params['holidays_prior_scale']
            , changepoint_prior_scale=  params['changepoint_prior_scale']
            )
        )
        dfsh_m.add_regressor('drv_acq_burn_pct')
        dfsh_m.add_regressor('paycheck_day')
        dfsh_m.fit(dfsh_training_set)
        #cross-validation
        dfsh_cv = cross_validation(dfsh_m, horizon = horizon, initial = '366 days',period = period, parallel= 'processes')
        #gather results
        dfsh_error = MAPE(dfsh_cv['y'], dfsh_cv['yhat'])
        
        dfsh_mape.append(dfsh_error)
        
    # Hyperparameter tuning Retained Drv
    drv_mape = []
    for params in grid: 
        #build model
        drv_m = (Prophet(
            growth = 'linear'
            , yearly_seasonality = True
            , weekly_seasonality = True
            , daily_seasonality = False
            , seasonality_mode = params['seasonality_mode']
            , holidays = master_holidays
            , seasonality_prior_scale = params['seasonality_prior_scale']
            , holidays_prior_scale = params['holidays_prior_scale']
            , changepoint_prior_scale=  params['changepoint_prior_scale']
            )
        )
        drv_m.add_regressor('ipt_standardized')
        drv_m.add_regressor('drv_eng_burn_pct')
        drv_m.add_regressor('paycheck_day')
        drv_m.fit(drv_training_set)
        #cross-validation
        drv_cv = cross_validation(drv_m, horizon = horizon, initial = '366 days',period = period, parallel= 'processes')
        #gather results
        drv_error = MAPE(drv_cv['y'], drv_cv['yhat'])
        
        drv_mape.append(drv_error)

    # best parameters pax
    pfc_best_params = grid[np.argmin(pfc_mape)]

    pax_best_params = grid[np.argmin(pax_mape)]

    # best parameters drv
    dfsh_best_params = grid[np.argmin(dfsh_mape)]

    drv_best_params = grid[np.argmin(drv_mape)]

    # PAX Final models
    pfc_m = Prophet(growth = 'linear'
                , yearly_seasonality = True
                , weekly_seasonality = True
                , daily_seasonality = False
                , seasonality_mode = pfc_best_params['seasonality_mode']
                , seasonality_prior_scale = pfc_best_params['seasonality_prior_scale']
                , holidays_prior_scale = pfc_best_params['holidays_prior_scale']
                , holidays = master_holidays
                , changepoint_prior_scale = pfc_best_params['changepoint_prior_scale']
    )
    pfc_m.add_regressor('pax_acq_burn_pct')
    pfc_m.add_regressor('paycheck_day')
    pfc_m.fit(pfc_training_set)

    pax_m = Prophet(growth = 'linear'
                , yearly_seasonality = True
                , weekly_seasonality = True
                , daily_seasonality = False
                , seasonality_mode = pax_best_params['seasonality_mode']
                , seasonality_prior_scale = pax_best_params['seasonality_prior_scale']
                , holidays_prior_scale = pax_best_params['holidays_prior_scale']
                , holidays = master_holidays
                , changepoint_prior_scale = pax_best_params['changepoint_prior_scale']
    )
    pax_m.add_regressor('pax_eng_burn_pct')
    pax_m.add_regressor('paycheck_day')
    pax_m.fit(pax_training_set)

    # DRV Final models
    dfsh_m = Prophet(growth = 'linear'
                , yearly_seasonality = True
                , weekly_seasonality = True
                , daily_seasonality = False
                , seasonality_mode = dfsh_best_params['seasonality_mode']
                , seasonality_prior_scale = dfsh_best_params['seasonality_prior_scale']
                , holidays_prior_scale = dfsh_best_params['holidays_prior_scale']
                , holidays = master_holidays
                , changepoint_prior_scale = dfsh_best_params['changepoint_prior_scale']
    )
    dfsh_m.add_regressor('drv_acq_burn_pct')
    dfsh_m.add_regressor('paycheck_day')
    dfsh_m.fit(dfsh_training_set)

    drv_m = Prophet(growth = 'linear'
                , yearly_seasonality = True
                , weekly_seasonality = True
                , daily_seasonality = False
                , seasonality_mode = drv_best_params['seasonality_mode']
                , seasonality_prior_scale = drv_best_params['seasonality_prior_scale']
                , holidays_prior_scale = drv_best_params['holidays_prior_scale']
                , holidays = master_holidays
                , changepoint_prior_scale = drv_best_params['changepoint_prior_scale']
    )
    drv_m.add_regressor('ipt_standardized')
    drv_m.add_regressor('drv_eng_burn_pct')
    drv_m.add_regressor('paycheck_day')
    drv_m.fit(drv_training_set)
    

    # with open('pfc_model_mx'+i+'.json', 'w') as fout:
    #     fout.write(model_to_json(pfc_m))  # Save model
    
    # with open('pax_model_mx'+i+'.json', 'w') as fout:
    #     fout.write(model_to_json(pax_m))  # Save model
        
    # with open('dfsh_model_mx'+i+'.json', 'w') as fout:
    #     fout.write(model_to_json(dfsh_m))  # Save model
        
    # with open('drv_model_mx'+i+'.json', 'w') as fout:
    #     fout.write(model_to_json(drv_m))  # Save model

    #with open('serialized_model.json', 'r') as fin:
     #   m = model_from_json(fin.read())  # Load model


    # Create Future Dataframe PAX
    pfc_pred = pfc_m.make_future_dataframe(periods = testweeks + forecast, freq = 'W')
    pfc_pred['ds'] = pfc_pred['ds'].apply(lambda x: x - pd.Timedelta(days=x.dayofweek))
    pax_pred = pax_m.make_future_dataframe(periods = testweeks + forecast, freq = 'W')
    pax_pred['ds'] = pax_pred['ds'].apply(lambda x: x - pd.Timedelta(days=x.dayofweek))
    
    # Create Future Dataframe DRV
    dfsh_pred= dfsh_m.make_future_dataframe(periods = testweeks + forecast, freq = 'W')
    dfsh_pred['ds'] = dfsh_pred['ds'].apply(lambda x: x - pd.Timedelta(days=x.dayofweek))
    drv_pred = drv_m.make_future_dataframe(periods = testweeks + forecast, freq = 'W')
    drv_pred['ds'] = drv_pred['ds'].apply(lambda x: x - pd.Timedelta(days=x.dayofweek))

    # Merge Regressors pax
    pfc_pred = pfc_pred.merge(master_pfc_df, how = 'left', on = 'ds').merge(paychecks[paychecks.country_code == j], how = 'left', left_on = 'ds', right_on = 'date', suffixes = (None,'_x')).merge(date_pred, how = 'left', left_on = 'ds', right_on = 'date_value', suffixes = (None,'_x'))

    pfc_pred['paycheck_day'] = pfc_pred['paycheck_day_x']
    pfc_pred = pfc_pred.drop(columns =  ['paycheck_day_x','date'])
    pfc_pred['rank_ly'] = pfc_pred['rank'] +52
    pfc_pred = pfc_pred.merge(pfc_pred, how= 'left', left_on = ['rank'], right_on = ['rank_ly'], suffixes = (None, '_y'))
    
    pfc_pred = pfc_pred.drop(columns =  ['ds_y', 'y_y', 'paycheck_day_y', 'rank_y', 'rank_ly_y', 'rank_ly',	'country_code_y',	'date_value_y'])
    pfc_pred.pax_acq_burn_pct = np.where(pfc_pred['pax_acq_burn_pct'].isnull(),np.where(pfc_pred['pax_acq_burn_pct_y'].isnull(),0,pfc_pred['pax_acq_burn_pct_y']),pfc_pred['pax_acq_burn_pct'])
    
    #pfc_pred.drop_duplicates(inplace=True)

    pax_pred = pax_pred.merge(master_pax_df, how = 'left', on = 'ds').merge(paychecks[paychecks.country_code == j], how = 'left', left_on = 'ds', right_on = 'date', suffixes = (None,'_x')).merge(date_pred, how = 'left', left_on = 'ds', right_on = 'date_value', suffixes = (None,'_x'))

    pax_pred['paycheck_day'] = pax_pred['paycheck_day_x']
    pax_pred = pax_pred.drop(columns =  ['paycheck_day_x', 'date','date_value'])
    pax_pred['rank_ly'] = pax_pred['rank'] +52
    pax_pred = pax_pred.merge(pax_pred, how= 'left', left_on = ['rank'], right_on = ['rank_ly'], suffixes = (None, '_y'))
    pax_pred = pax_pred.drop(columns =  ['ds_y', 'y_y', 'paycheck_day_y','rank_y', 'rank_ly_y','rank_ly','country_code_y'])
    pax_pred.pax_eng_burn_pct = np.where(pax_pred['pax_eng_burn_pct'].isnull(),np.where(pax_pred['pax_eng_burn_pct_y'].isnull(),0,pax_pred['pax_eng_burn_pct_y']),pax_pred['pax_eng_burn_pct'])

    #  Merge Regressors drv
    dfsh_pred = dfsh_pred.merge(master_dfsh_df, how = 'left', on = 'ds').merge(paychecks[paychecks.country_code == j], how = 'left', left_on = 'ds', right_on = 'date', suffixes = (None,'_x')).merge(date_pred, how = 'left', left_on = 'ds', right_on = 'date_value', suffixes = (None,'_x'))
    
    dfsh_pred['paycheck_day'] = dfsh_pred['paycheck_day_x']
    dfsh_pred = dfsh_pred.drop(columns =  ['paycheck_day_x', 'date','date_value'])
    dfsh_pred['rank_ly'] = dfsh_pred['rank'] +52
    dfsh_pred = dfsh_pred.merge(dfsh_pred, how= 'left', left_on = ['rank'], right_on = ['rank_ly'], suffixes = (None, '_y'))
    
    dfsh_pred = dfsh_pred.drop(columns =  ['ds_y', 'y_y', 'paycheck_day_y', 'rank_y', 'rank_ly_y', 'rank_ly', 'country_code_y'])
    dfsh_pred.drv_acq_burn_pct = np.where(dfsh_pred['drv_acq_burn_pct'].isnull(),np.where(dfsh_pred['drv_acq_burn_pct_y'].isnull(),0,dfsh_pred['drv_acq_burn_pct_y']),dfsh_pred['drv_acq_burn_pct'])
    
    
    drv_pred = drv_pred.merge(master_drv_df, how = 'left', on = 'ds').merge(paychecks[paychecks.country_code == j], how = 'left', left_on = 'ds', right_on = 'date', suffixes = (None,'_x')).merge(date_pred, how = 'left', left_on = 'ds', right_on = 'date_value', suffixes = (None,'_x'))
    drv_pred['paycheck_day'] = drv_pred['paycheck_day_x']
    drv_pred = drv_pred.drop(columns =  ['paycheck_day_x','date_value'])
    drv_pred['rank_ly'] = drv_pred['rank'] +52
    drv_pred = drv_pred.merge(drv_pred, how= 'left', left_on = ['rank'], right_on = ['rank_ly'], suffixes = (None, '_y'))
    
    drv_pred = drv_pred.drop(columns =  ['ds_y', 'y_y', 'paycheck_day_y', 'rank_y', 'rank_ly_y',  'rank_ly','country_code_y', 'date_y'])
    drv_pred.drv_eng_burn_pct = np.where(drv_pred['drv_eng_burn_pct'].isnull(),np.where(drv_pred['drv_eng_burn_pct_y'].isnull(),0,drv_pred['drv_eng_burn_pct_y']),drv_pred['drv_eng_burn_pct'])
    drv_pred.ipt_standardized = np.where(drv_pred['ipt_standardized'].isnull(),np.where(drv_pred['ipt_standardized_y'].isnull(),0,drv_pred['ipt_standardized_y']),drv_pred['ipt_standardized'])
    drv_pred.ipt = np.where(drv_pred['ipt'].isnull(),np.where(drv_pred['ipt_y'].isnull(),0,drv_pred['ipt_y']),drv_pred['ipt'])
    
    master_regressors = pfc_pred.merge(pax_pred, how = 'left', on = 'ds', suffixes = (None,'_x')).merge(dfsh_pred, how = 'left', on = 'ds', suffixes = (None,'_y')).merge(drv_pred, how = 'left', on = 'ds', suffixes = (None,'_z'))
    master_regressors = pd.DataFrame(master_regressors, columns =['ds','paycheck_day', 'pax_acq_burn_pct', 'pax_eng_burn_pct','drv_acq_burn_pct', 'drv_eng_burn_pct','ipt_standardized','ipt'])
    
    # Forecast PAX

    pfc_prediction = pfc_m.predict(pfc_pred)
    pfc_prediction = pfc_prediction.rename(columns = {'yhat':'pfc_prediction', 'yhat_lower': 'pfc_lower', 'yhat_upper': 'pfc_upper'}) 
    pax_prediction  = pax_m.predict(pax_pred)
    pax_prediction = pax_prediction.rename(columns = {'yhat':'pax_prediction', 'yhat_lower': 'pax_lower', 'yhat_upper': 'pax_upper'}) 

    # # Forecast DRV
    dfsh_prediction = dfsh_m.predict(dfsh_pred)
    dfsh_prediction = dfsh_prediction.rename(columns = {'yhat':'dfsh_prediction', 'yhat_lower': 'dfsh_lower', 'yhat_upper': 'dfsh_upper'}) 
    drv_prediction  = drv_m.predict(drv_pred)
    drv_prediction = drv_prediction.rename(columns = {'yhat':'drv_prediction', 'yhat_lower': 'drv_lower', 'yhat_upper': 'drv_upper'}) 

    predictions_df = pfc_prediction.merge(pax_prediction, how='left', on = 'ds',suffixes = (None, '_x')).merge(dfsh_prediction, how='left', on = 'ds', suffixes = (None, '_y')).merge(drv_prediction, how='left', on = 'ds', suffixes = (None, '_z'))
    predictions_df = pd.DataFrame(predictions_df, columns = ['ds', 'pfc_prediction', 'pax_prediction', 'dfsh_prediction' ,'drv_prediction'])#, 'pax_acq_burn_pct','pax_eng_burn_pct', 'drv_acq_burn_pct', 'drv_eng_burn_pct','ipt_standardized'

    # Join Frequency with prediction
    market_df = frequency_df.merge(predictions_df, how = 'left', left_on = 'date_value', right_on = 'ds', suffixes = (None,'_x'))
    market_df = market_df.drop(columns = ['date_value'])
    market_df = market_df[(market_df.ds >= start_date)&(market_df.ds <= end_date)]

    market_df = pd.DataFrame(market_df, columns = [
        'year_calendar_week'
        ,'calendar_week'
        ,'ds'
        ,'city_id'
        ,'pfc_cpp_pred'
        ,'retained_cpp_pred'
        ,'dfsh_shpd_pred'
        ,'retained_drv_shpd_pred'
        ,'pfc_prediction'
        ,'pax_prediction'
        ,'dfsh_prediction' 
        ,'drv_prediction'])
    
    # Actuals dataframe
    rides_df = master.copy()
    rides_df['cr'] = rides_df.rides / rides_df.active_pax_calls
    rides_df = rides_df[(rides_df.city_id == i)][['current_stat_date','rides','active_pax_calls', 'active_drv_online_time','cr','pfc_cpp','pfc_pax_cnt','pfc_calls','retained_cpp','retained_pax_cnt','retained_calls','dfsh_shpd','dfsh_cnt', 'dfsh_online_time','retained_drv_shpd','retained_drv_cnt', 'retained_drv_online_time']]
    
    market_df = market_df.merge(rides_df, how='left',left_on = 'ds' ,right_on='current_stat_date', suffixes=(None,None))
    market_df = market_df.merge(master_regressors, how='left',on='ds', suffixes=(None,None))
    market_df.drop(columns=['current_stat_date'],inplace= True)
    market_df.city_id = market_df.city_id.astype('string')
    market_df.drop_duplicates(inplace= True)
    
    market_df = market_df.assign(
        predicted_calls = (market_df.pfc_cpp_pred*market_df.pfc_prediction) + (market_df.retained_cpp_pred*market_df.pax_prediction),
        predicted_tsh = (market_df.dfsh_shpd_pred*market_df.dfsh_prediction) + (market_df.retained_drv_shpd_pred*market_df.drv_prediction),
        predicted_dsr = ((market_df.pfc_cpp_pred*market_df.pfc_prediction) + (market_df.retained_cpp_pred*market_df.pax_prediction)) / ((market_df.dfsh_shpd_pred*market_df.dfsh_prediction) + (market_df.retained_drv_shpd_pred*market_df.drv_prediction)),
        predicted_pfc_calls = market_df.pfc_cpp_pred*market_df.pfc_prediction,
        predicted_retained_calls = market_df.retained_cpp_pred*market_df.pax_prediction,
        predicted_dfsh_tsh = market_df.dfsh_shpd_pred*market_df.dfsh_prediction,
        predicted_drv_tsh = market_df.retained_drv_shpd_pred*market_df.drv_prediction
    )
    
    market_df = pd.DataFrame(market_df, columns = [
        'year_calendar_week'
        ,'calendar_week'
        ,'ds'
        ,'city_id'
        ,'pfc_cpp_pred'
        ,'retained_cpp_pred'
        ,'dfsh_shpd_pred'
        ,'retained_drv_shpd_pred'
        ,'pfc_prediction'
        ,'pax_prediction'
        ,'dfsh_prediction' 
        ,'drv_prediction'
        ,'predicted_pfc_calls'
        ,'predicted_retained_calls'
        ,'predicted_dfsh_tsh'
        ,'predicted_drv_tsh'
        ,'predicted_calls'
        ,'predicted_tsh'
        ,'predicted_dsr'
        ,'pax_acq_burn_pct'
        ,'pax_eng_burn_pct'
        ,'drv_acq_burn_pct'
        ,'drv_eng_burn_pct'
        ,'ipt_standardized'
        ,'ipt'
        ,'pfc_pax_cnt'
        ,'pfc_calls'
        ,'pfc_cpp'
        ,'retained_pax_cnt'
        ,'retained_calls'
        ,'retained_cpp'
        ,'dfsh_cnt'
        ,'dfsh_online_time'
        ,'dfsh_shpd'
        ,'retained_drv_cnt'
        ,'retained_drv_online_time'
        ,'retained_drv_shpd' 
        ,'active_pax_calls'
        ,'active_drv_online_time'
        ,'cr'
        ,'rides'
        ])
    
    market_df.to_csv('/Users/didi/Documents/Short Term Forecast/2023 Predictions/'+str(i)+'_weekly_20230319.csv')
    market.append(market_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  master_holidays.date_value = pd.to_datetime(master_holidays.date_value, format='%Y-%m-%d')
17:34:27 - cmdstanpy - INFO - Chain [1] start processing
17:34:27 - cmdstanpy - INFO - Chain [1] done processing
17:34:27 - cmdstanpy - ERROR - Chain [1] error: error during processing Stale NFS file handle
Optimization terminated abnormally. Falling back to Newton.
17:34:27 - cmdstanpy - INFO - Chain [1] start processing
17:34:27 - cmdstanpy - INFO - Chain [1] done processing
17:34:29 - cmdstanpy - INFO - Chain [1] start processing
17:34:29 - cmdstanpy - INFO - Chain [1] start processing
17:34:29 - cmdstanpy - INFO - Chain [1] start processing
17:34:29 - cmdstanpy - INFO - Chain [1] start processing
17:34:29 - cmdstanpy - INFO - Chain 

In [None]:
market_df

Unnamed: 0,year_calendar_week,calendar_week,ds,city_id,pfc_cpp_pred,retained_cpp_pred,dfsh_shpd_pred,retained_drv_shpd_pred,pfc_prediction,pax_prediction,...,dfsh_cnt,dfsh_online_time,dfsh_shpd,retained_drv_cnt,retained_drv_online_time,retained_drv_shpd,active_pax_calls,active_drv_online_time,cr,rides
0,2022/24,24,2022-06-06,52190500.0,1.812779,3.00333,15.313727,21.379237,16204.378001,487738.572538,...,468.0,7781.833333,16.627849,21215.0,439230.1167,20.703753,1686604.0,447011.95,0.675778,1139770.0
1,2022/25,25,2022-06-13,52190500.0,1.847124,3.003643,16.454665,20.670467,16439.918999,497187.506723,...,466.0,7888.683333,16.928505,21098.0,443137.65,21.003775,1721482.0,451026.3333,0.66066,1137314.0
2,2022/26,26,2022-06-20,52190500.0,1.833995,3.045632,16.853516,21.271466,15957.233118,487161.66104,...,474.0,7684.383333,16.211779,21247.0,448758.9,21.121048,1668779.0,456443.2833,0.680083,1134908.0
3,2022/27,27,2022-06-27,52190500.0,1.845609,3.027479,17.45491,21.147859,15597.54468,488355.481751,...,462.0,7884.166667,17.065296,21444.0,461330.3333,21.513259,1716800.0,469214.5,0.680071,1167546.0
4,2022/28,28,2022-07-04,52190500.0,1.837831,3.065875,17.125739,21.489909,15079.009321,479945.210077,...,452.0,6309.483333,13.959034,21526.0,438894.1333,20.389024,1720311.0,445203.6167,0.657518,1131135.0
5,2022/29,29,2022-07-11,52190500.0,1.876238,3.118724,16.581093,21.281339,15308.007306,488127.913221,...,508.0,8904.05,17.527657,20936.0,421593.2667,20.13724,1662961.0,430497.3167,0.665145,1106111.0
6,2022/30,30,2022-07-18,52190500.0,1.86607,3.120974,17.323984,21.311247,15366.0221,481926.14896,...,531.0,8260.8,15.557062,21009.0,431522.2333,20.539875,1561847.0,439783.0333,0.706184,1102952.0
7,2022/31,31,2022-07-25,52190500.0,1.892003,3.110771,16.344312,21.101691,15652.265447,486459.242397,...,481.0,7405.833333,15.396743,20971.0,433049.15,20.649905,1555304.0,440454.9833,0.708424,1101814.0
8,2022/32,32,2022-08-01,52190500.0,1.861806,3.108249,16.982929,21.264321,15321.435061,477686.906537,...,430.0,6728.683333,15.648101,21058.0,438299.7167,20.813929,1622377.0,445028.4,0.695484,1128338.0
9,2022/33,33,2022-08-08,52190500.0,1.87505,3.132059,17.581933,21.23316,15096.583346,478675.865859,...,390.0,5875.75,15.066026,21066.0,445828.1167,21.163397,1608032.0,451703.8667,0.69322,1114720.0


In [None]:
final_market_df = pd.concat(market)
filename = 'market_predictions_february_28d_weekly'
folder = '28d_predictions'
predictions_folder = '2023 predictions'
final_market_df.to_csv('/Users/didi/Documents/Short Term Forecast/'+predictions_folder+'/'+folder+'/'+filename+'.csv', index = False)


final_market_df[final_market_df.ds >= "2022-07-16"].head(20)


Unnamed: 0,year_calendar_week,calendar_week,ds,city_id,pfc_cpp_pred,retained_cpp_pred,dfsh_shpd_pred,retained_drv_shpd_pred,pfc_prediction,pax_prediction,...,dfsh_cnt,dfsh_online_time,dfsh_shpd,retained_drv_cnt,retained_drv_online_time,retained_drv_shpd,active_pax_calls,active_drv_online_time,cr,rides
6,2022/30,30,2022-07-18,52090100.0,1.659263,2.604932,19.68617,21.764098,41980.945304,1046587.0,...,1235.0,21522.86667,17.427422,54270.0,1142815.933,21.057968,2892283.0,1164338.8,0.782573,2263422.0
7,2022/31,31,2022-07-25,52090100.0,1.682912,2.62035,20.081588,21.703892,44471.726023,1080498.0,...,1407.0,26268.16667,18.669628,53754.0,1124264.05,20.914984,2970808.0,1150532.217,0.76497,2272580.0
8,2022/32,32,2022-08-01,52090100.0,1.663838,2.62761,19.787178,21.756871,42233.000747,1055425.0,...,1484.0,29279.7,19.730256,53348.0,1099260.567,20.605469,2933219.0,1128540.267,0.753977,2211580.0
9,2022/33,33,2022-08-08,52090100.0,1.671789,2.636767,20.404609,21.65201,42442.195829,1055587.0,...,1362.0,24759.88333,18.179063,53306.0,1095362.75,20.548583,2978121.0,1120122.633,0.728159,2168547.0
10,2022/34,34,2022-08-15,52090100.0,1.673388,2.629797,19.847886,21.768513,45313.765969,1090769.0,...,1231.0,23793.83333,19.328865,52873.0,1092284.833,20.658651,3089957.0,1116078.667,0.704325,2176333.0
11,2022/35,35,2022-08-22,52090100.0,1.676275,2.681399,19.931854,21.648295,43432.976736,1075175.0,...,1207.0,22622.96667,18.743137,53515.0,1106051.633,20.668068,3005267.0,1128674.6,0.72603,2181913.0
12,2022/36,36,2022-08-29,52090100.0,1.679102,2.643335,20.81293,21.301004,46461.59582,1104378.0,...,1240.0,22940.65,18.500524,54158.0,1125273.35,20.777602,3088234.0,1148214.0,0.716324,2212175.0
13,2022/37,37,2022-09-05,52090100.0,1.670792,2.613357,19.125302,21.092717,43631.442059,1074953.0,...,1344.0,25349.45,18.861198,54414.0,1121917.5,20.618177,3071716.0,1147266.95,0.712859,2189700.0
14,2022/38,38,2022-09-12,52090100.0,1.671566,2.641423,20.100637,21.41073,50158.495788,1114602.0,...,921.0,16887.05,18.335559,54593.0,1065487.533,19.516926,3034908.0,1082374.583,0.695243,2109999.0
15,2022/39,39,2022-09-19,52090100.0,1.660349,2.643937,19.69477,21.349907,41968.776483,1063515.0,...,1118.0,19695.95,17.617129,54901.0,1104839.8,20.12422,2972155.0,1124535.75,0.716929,2130824.0
