In [1]:
from mlflow.tracking import MlflowClient
import mlflow
import numpy as np
import pandas as pd

from scenario_utils.scenarios import Scenarios_Client
import forecast_utils as utils

# Set the tracking URI to the directory where mlruns is located
mlflow.set_tracking_uri("file:///Users/pabloparedes/Documents/prophet_GB_demand_forecasting_workflow/mlruns")

client = MlflowClient()

In [2]:
# Loading Daily Model.
model_name = "gb_non_dom_monthly"
model = client.get_registered_model(model_name)
monthly_model = mlflow.prophet.load_model(f"models:/{model.name}/{list(model.aliases.values())[0]}")

# Loading Monthly Model.
model_name = "gb_non_dom_daily"
model = client.get_registered_model(model_name)
daily_model = mlflow.prophet.load_model(f"models:/{model.name}/{list(model.aliases.values())[0]}")

In [3]:
### Daily Regressor Scenarios:

# Getting variables
temperature = utils.make_daily_regressors_df(initial_date='2015-01-01')[1]
temperature = temperature[temperature['ds'] < '2031-04-01']
cdd = temperature[['ds', 'cdd']].copy()
hdd = temperature[['ds', 'hdd']].copy()
temperature = temperature[['ds', 'temperature']]

# Temperature:
col = 'temperature'
max_std_dev = 0.2  # Adjust this to control the initial variance
index = temperature[temperature['ds'] == '2023-08-01'].index[0]
index_plus = index + 1
remaining = len(temperature) - index_plus
weather_factors = []

for x in range(20):
  temperature.loc[:index, col+'_'+str(x+1)] = temperature.loc[:index, 'temperature'].copy()
  random_factors = np.array([np.random.normal(loc=1, scale=max_std_dev * (i/remaining)) for i in range(remaining)])
  weather_factors.append(random_factors)
  temperature.loc[index_plus:, col+'_'+str(x+1)] = temperature.loc[index_plus:, 'temperature'].values * random_factors

temperature = temperature.drop(columns=col)

# Heating Degree Days:
col = 'hdd'
max_std_dev = 0.2  # Adjust this to control the initial variance
index = hdd[hdd['ds'] == '2023-08-01'].index[0]
index_plus = index + 1
remaining = len(hdd) - index_plus

for x in range(20):
  hdd.loc[:index, col+'_'+str(x+1)] = hdd.loc[:index, 'hdd'].copy()
  hdd.loc[index_plus:, col+'_'+str(x+1)] = hdd.loc[index_plus:, 'hdd'].values * weather_factors[x]

hdd = hdd.drop(columns=col)

# Cooling Degree Days:
col = 'cdd'
max_std_dev = 0.2  # Adjust this to control the initial variance
index = cdd[cdd['ds'] == '2023-08-01'].index[0]
index_plus = index + 1
remaining = len(cdd) - index_plus

for x in range(20):
  cdd.loc[:index, col+'_'+str(x+1)] = cdd.loc[:index, 'cdd'].copy()
  cdd.loc[index_plus:, col+'_'+str(x+1)] = cdd.loc[index_plus:, 'cdd'].values * weather_factors[x]

cdd = cdd.drop(columns=col)

In [4]:
# Monthly Regressor Scenarios:

gdp = pd.read_parquet("data/regressors/gdp.parquet")
col = 'gdp'
max_std_dev = 0.01  # Adjust this to control the initial variance
index = gdp[gdp['ds'] == '2023-08-01'].index[0]
index_plus = index + 1
remaining = len(gdp) - index_plus

for x in range(20):
  gdp.loc[:index, col+'_'+str(x+1)] = gdp.loc[:index, 'gdp'].copy()
  random_factors = np.array([np.random.normal(loc=1, scale=max_std_dev * (i/remaining)) for i in range(remaining)])
  gdp.loc[index_plus:, col+'_'+str(x+1)] = gdp.loc[index_plus:, 'gdp'].values * random_factors

gdp = gdp.drop(columns=col)

In [5]:
scenarios = Scenarios_Client(granular_model=daily_model, monthly_model=monthly_model)

In [6]:
scenarios.add_variable(variable_df=gdp, variable_name='gdp')
scenarios.add_variable(variable_df=temperature, variable_name='temperature')
scenarios.add_variable(variable_df=hdd, variable_name='hdd')
scenarios.add_variable(variable_df=cdd, variable_name='cdd')

'Added cdd'

In [7]:
scenarios.variables_pairing(variables=['temperature', 'hdd', 'cdd'])

"Paired variables: ['temperature', 'hdd', 'cdd']"

In [8]:
scenarios_df = scenarios.create_scenarios()

In [10]:
scenarios.variables_granularities

{'1day', '1month'}

In [13]:
scenarios_df[0]['1day']

Unnamed: 0,ds,temperature_1,hdd_1,cdd_1
0,2015-01-01,9.994652,803.921260,0.0
1,2015-01-02,6.967103,1949.742425,0.0
2,2015-01-03,4.495221,2882.854784,0.0
3,2015-01-04,1.930792,3866.575854,0.0
4,2015-01-05,6.817229,1990.184090,0.0
...,...,...,...,...
5929,2031-03-27,5.882309,1956.168156,0.0
5930,2031-03-28,7.720019,2124.880123,0.0
5931,2031-03-29,5.854276,1725.134661,0.0
5932,2031-03-30,6.226833,2048.190025,0.0


In [13]:
scenarios.variables['gdp']['scenarios_num']

20

In [11]:
scenarios_df[0]['1month']

Unnamed: 0,ds,gdp_1
0,2015-01-01,91.687000
1,2015-02-01,91.961000
2,2015-03-01,92.019500
3,2015-04-01,92.551300
4,2015-05-01,92.306600
...,...,...
190,2030-11-01,114.251758
191,2030-12-01,116.550204
192,2031-01-01,118.327794
193,2031-02-01,116.525456


In [None]:
scenarios_df[0]['1day']

In [36]:

from collections import defaultdict

In [59]:
variables = {}

ex = [['f', 'd'], [4, 5]]
ex2 = [['l']]

variables['temperature'] = ['kaka', 'koko', 'jiji']
variables['cdd'] = ['popo', 'kuku', 'mhm']
variables['hdd'] = ['popo', 'uiui', 'mhm']

In [62]:
list(product(list(zip(*ex)),*ex2))

[(('f', 4), 'l'), (('d', 5), 'l')]

In [47]:
variables

{'temperature': ['kaka', 'koko', 'jiji'],
 'cdd': ['popo', 'kuku', 'mhm'],
 'hdd': ['popo', 'uiui', 'mhm']}

In [28]:
# Example:
import pandas as pd
from itertools import product

df1 = pd.DataFrame({"date": pd.date_range(start="2015-01-01", end="2024-01-10", freq='D')})
df2 = pd.DataFrame({"date": pd.date_range(start="2015-01-01", end="2024-01-10", freq='D')})
df3 = pd.DataFrame({"date": pd.date_range(start="2015-01-01", end="2024-01-10", freq='D')})
df4 = pd.DataFrame({"date": pd.date_range(start="2015-01-01", end="2024-01-10", freq='MS')})

for x in range(20):
    df1['temp_'+str(x+1)] = x ** 2
    df2['cdd_'+str(x+1)] = x ** 2.2
    df3['hdd_'+str(x+1)] = x ** 2.5
    df4['gdp_'+str(x+1)] = x ** 5


var1 = df1.select_dtypes('number').columns.tolist()
var2 = df2.select_dtypes('number').columns.tolist()
var3 = df3.select_dtypes('number').columns.tolist()
var4 = df4.select_dtypes('number').columns.tolist()

# Paired:
paired = list(zip(var1,var2))

# Cross:
cross = list(product(paired,var3))

# Cross 2:
cross2 = list(product(paired,var3, var4))


In [32]:
print(len(paired))
paired

20


[('temp_1', 'cdd_1'),
 ('temp_2', 'cdd_2'),
 ('temp_3', 'cdd_3'),
 ('temp_4', 'cdd_4'),
 ('temp_5', 'cdd_5'),
 ('temp_6', 'cdd_6'),
 ('temp_7', 'cdd_7'),
 ('temp_8', 'cdd_8'),
 ('temp_9', 'cdd_9'),
 ('temp_10', 'cdd_10'),
 ('temp_11', 'cdd_11'),
 ('temp_12', 'cdd_12'),
 ('temp_13', 'cdd_13'),
 ('temp_14', 'cdd_14'),
 ('temp_15', 'cdd_15'),
 ('temp_16', 'cdd_16'),
 ('temp_17', 'cdd_17'),
 ('temp_18', 'cdd_18'),
 ('temp_19', 'cdd_19'),
 ('temp_20', 'cdd_20')]

In [33]:
print(len(cross))
cross

400


[(('temp_1', 'cdd_1'), 'hdd_1'),
 (('temp_1', 'cdd_1'), 'hdd_2'),
 (('temp_1', 'cdd_1'), 'hdd_3'),
 (('temp_1', 'cdd_1'), 'hdd_4'),
 (('temp_1', 'cdd_1'), 'hdd_5'),
 (('temp_1', 'cdd_1'), 'hdd_6'),
 (('temp_1', 'cdd_1'), 'hdd_7'),
 (('temp_1', 'cdd_1'), 'hdd_8'),
 (('temp_1', 'cdd_1'), 'hdd_9'),
 (('temp_1', 'cdd_1'), 'hdd_10'),
 (('temp_1', 'cdd_1'), 'hdd_11'),
 (('temp_1', 'cdd_1'), 'hdd_12'),
 (('temp_1', 'cdd_1'), 'hdd_13'),
 (('temp_1', 'cdd_1'), 'hdd_14'),
 (('temp_1', 'cdd_1'), 'hdd_15'),
 (('temp_1', 'cdd_1'), 'hdd_16'),
 (('temp_1', 'cdd_1'), 'hdd_17'),
 (('temp_1', 'cdd_1'), 'hdd_18'),
 (('temp_1', 'cdd_1'), 'hdd_19'),
 (('temp_1', 'cdd_1'), 'hdd_20'),
 (('temp_2', 'cdd_2'), 'hdd_1'),
 (('temp_2', 'cdd_2'), 'hdd_2'),
 (('temp_2', 'cdd_2'), 'hdd_3'),
 (('temp_2', 'cdd_2'), 'hdd_4'),
 (('temp_2', 'cdd_2'), 'hdd_5'),
 (('temp_2', 'cdd_2'), 'hdd_6'),
 (('temp_2', 'cdd_2'), 'hdd_7'),
 (('temp_2', 'cdd_2'), 'hdd_8'),
 (('temp_2', 'cdd_2'), 'hdd_9'),
 (('temp_2', 'cdd_2'), 'hdd_10')

In [35]:
print(len(cross2))
cross2

8000


[(('temp_1', 'cdd_1'), 'hdd_1', 'gdp_1'),
 (('temp_1', 'cdd_1'), 'hdd_1', 'gdp_2'),
 (('temp_1', 'cdd_1'), 'hdd_1', 'gdp_3'),
 (('temp_1', 'cdd_1'), 'hdd_1', 'gdp_4'),
 (('temp_1', 'cdd_1'), 'hdd_1', 'gdp_5'),
 (('temp_1', 'cdd_1'), 'hdd_1', 'gdp_6'),
 (('temp_1', 'cdd_1'), 'hdd_1', 'gdp_7'),
 (('temp_1', 'cdd_1'), 'hdd_1', 'gdp_8'),
 (('temp_1', 'cdd_1'), 'hdd_1', 'gdp_9'),
 (('temp_1', 'cdd_1'), 'hdd_1', 'gdp_10'),
 (('temp_1', 'cdd_1'), 'hdd_1', 'gdp_11'),
 (('temp_1', 'cdd_1'), 'hdd_1', 'gdp_12'),
 (('temp_1', 'cdd_1'), 'hdd_1', 'gdp_13'),
 (('temp_1', 'cdd_1'), 'hdd_1', 'gdp_14'),
 (('temp_1', 'cdd_1'), 'hdd_1', 'gdp_15'),
 (('temp_1', 'cdd_1'), 'hdd_1', 'gdp_16'),
 (('temp_1', 'cdd_1'), 'hdd_1', 'gdp_17'),
 (('temp_1', 'cdd_1'), 'hdd_1', 'gdp_18'),
 (('temp_1', 'cdd_1'), 'hdd_1', 'gdp_19'),
 (('temp_1', 'cdd_1'), 'hdd_1', 'gdp_20'),
 (('temp_1', 'cdd_1'), 'hdd_2', 'gdp_1'),
 (('temp_1', 'cdd_1'), 'hdd_2', 'gdp_2'),
 (('temp_1', 'cdd_1'), 'hdd_2', 'gdp_3'),
 (('temp_1', 'cdd_1'), 

In [85]:
flattened_values = [(*nested[0], *nested[1:]) for nested in cross2]
flattened_values

[('temp_1', 'cdd_1', 'hdd_1', 'gdp_1'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_2'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_3'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_4'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_5'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_6'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_7'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_8'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_9'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_10'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_11'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_12'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_13'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_14'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_15'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_16'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_17'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_18'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_19'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_20'),
 ('temp_1', 'cdd_1', 'hdd_2', 'gdp_1'),
 ('temp_1', 'cdd_1', 'hdd_2', 'gdp_2'),
 ('temp_1', 'cdd_1', 'hdd_2', 'gdp_3'),
 ('temp_1', 'cdd_1', 'hdd_2', 'gdp_4'),
 ('temp_1', 'cdd_1', 'hdd_2',

In [86]:
flattened_values

[('temp_1', 'cdd_1', 'hdd_1', 'gdp_1'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_2'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_3'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_4'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_5'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_6'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_7'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_8'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_9'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_10'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_11'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_12'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_13'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_14'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_15'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_16'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_17'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_18'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_19'),
 ('temp_1', 'cdd_1', 'hdd_1', 'gdp_20'),
 ('temp_1', 'cdd_1', 'hdd_2', 'gdp_1'),
 ('temp_1', 'cdd_1', 'hdd_2', 'gdp_2'),
 ('temp_1', 'cdd_1', 'hdd_2', 'gdp_3'),
 ('temp_1', 'cdd_1', 'hdd_2', 'gdp_4'),
 ('temp_1', 'cdd_1', 'hdd_2',

In [89]:
names = ['temperature', 'cdd', 'hdd', 'gdp']
loop1 = flattened_values[0]
loop1

('temp_1', 'cdd_1', 'hdd_1', 'gdp_1')

In [None]:
df1[df1[tempo1]]

In [91]:
for cos, name in zip(loop1, names):
  print(cos, name)
  

temp_1 temperature
cdd_1 cdd
hdd_1 hdd
gdp_1 gdp


In [93]:
df1.select_dtypes('datetime').columns.tolist()[0]

'date'

In [94]:
df = defaultdict(lambda: defaultdict(dict))

In [97]:
len(df['granularity'])

0