In [14]:
# Import functions
from markov_chains import *
from dmp_downloader import *
import calendar

In [15]:
# Set parameters for iterations and starting month

num_iterations = 10
current_month = "january"
current_year = 2024

In [16]:
# Remaining parameters are set as a result

calendar_dict = {month: index for index, month in enumerate(calendar.month_abbr) if month}

current_month_abbr = current_month[:3].title()
current_month_num = int(calendar_dict[current_month_abbr])

ws_date_ranges_dict = {
    "Current recruitment difficulty": "Oct-2021",
    "Overall uncertainty": "Feb-2020", 
    "Employment growth": "Jan-2017"
}

ws_names: list[str] = list(ws_date_ranges_dict.keys())

column_lengths_dict: dict[str, int] = {key: months_diff(
    datetime.date(current_year, current_month_num, 1),
    datetime.date(int(value[-4:]), calendar_dict[value[:3]], 1)
) for key, value in ws_date_ranges_dict.items()}

In [10]:
# Extraction: download the raw data and process in memory to produce the necessary .csv files for downstream

input_file_paths: list[str] = download_and_process_dmp_data(current_month, current_year, ws_names, column_lengths_dict)

In [11]:
# Use list comprehension to calculate Markovs and store results
results = [(
    means_list,
    std_devs_list,
    future_dfs_dict
) for means_list, std_devs_list, future_dfs_dict in 
    [calculate_markovs(input_file_path, num_iterations) for input_file_path in input_file_paths]]

# Unpack results directly into variables
(recruitment_means_list, recruitment_std_devs_list, recruitment_future_dfs_dict), \
(uncertainty_means_list, uncertainty_std_devs_list, uncertainty_future_dfs_dict), \
(employment_growth_means_list, employment_growth_std_devs_list, employment_growth_future_dfs_dict) = results



You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  transition_df[k][j] += 1  # Update k-th column and j-th row for i-th matrix
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work

In [17]:
# Examples

recruitment_future_dfs_dict["much_harder"]

[         up      down
 0  0.222222  0.777778,
          up      down
 0  0.369644  0.630356,
          up      down
 0  0.341701  0.658299,
          up      down
 0  0.346998  0.653002,
          up      down
 0  0.345994  0.654006,
          up      down
 0  0.346184  0.653816,
          up      down
 0  0.346148  0.653852,
          up      down
 0  0.346155  0.653845,
          up      down
 0  0.346154  0.653846,
          up      down
 0  0.346154  0.653846]

In [28]:
uncertainty_columns = list(uncertainty_future_dfs_dict.keys())
uncertainty_final_step_dfs = [uncertainty_df[-1] for uncertainty_df in uncertainty_future_dfs_dict.values()]

for column_name, df in zip(uncertainty_columns, uncertainty_final_step_dfs):
    print(f"Projected state of {column_name} after {num_iterations} steps")
    display(df)

Projected state of very_low after 10 steps


Unnamed: 0,up,down
0,0.499958,0.500042


Projected state of low after 10 steps


Unnamed: 0,up,down
0,0.518519,0.481481


Projected state of medium after 10 steps


Unnamed: 0,up,down
0,0.56701,0.43299


Projected state of high after 10 steps


Unnamed: 0,up,down
0,0.521743,0.478257


Projected state of very_high after 10 steps


Unnamed: 0,up,down
0,0.436742,0.563258
