In [2]:
# Import functions
from markov_chains import *
from dmp_downloader import *
from output_writer import *

from datetime import datetime, timedelta, date
from typing import List

In [3]:
# Set parameters for iterations and starting month

num_iterations: int = 10

current_day: datetime = datetime.today()
first_day_current_month: datetime = current_day.replace(day=1)
prev_month_last_day: datetime = first_day_current_month - timedelta(days=1)

prev_month_name: str = prev_month_last_day.strftime("%B").lower()
prev_month_num: int = prev_month_last_day.month
prev_month_year: int = prev_month_last_day.year

In [4]:
# Remaining parameters are set as a result

# Initial month of time-series data
ws_date_ranges_dict: dict[str, List[int]] = {
    "Current recruitment difficulty": [2021, 10],
    "Overall uncertainty": [2020, 2], 
    "Employment growth": [2017, 1]
}

# Worksheet names
ws_names: list[str] = list(ws_date_ranges_dict.keys())

# Number of columns we can expect for each time series
column_lengths_dict: dict[str, int] = {key: months_diff(
    date(prev_month_year, prev_month_num, 1),
    date(value[0], value[1], 1)
) for key, value in ws_date_ranges_dict.items()}

In [5]:
# Extraction: download the raw data and process in memory to produce the necessary .csv files for downstream

input_file_paths: list[str] = download_and_process_dmp_data(prev_month_name, prev_month_year, ws_names, column_lengths_dict)

In [6]:
# Transformation: use list comprehension to calculate Markov iterations and store results

results: List[tuple[List[float], List[float], dict[str, List[pd.DataFrame]]]] = [(
    means_dict,
    std_devs_dict,
    future_dfs_dict
) for means_dict, std_devs_dict, future_dfs_dict in 
    [calculate_markovs(input_file_path, num_iterations) for input_file_path in input_file_paths]]

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  transition_df[k][j] += 1  # Update k-th column and j-th row for i-th matrix
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work

In [7]:
# Load: save output into a .xlsx file for convenient access

write_results_to_xlsx(
    input_file_paths, 
    results,
    output_path=f"markov_results_{prev_month_name}_{prev_month_year}.xlsx"
)

File saved to markov_results_march_2024.xlsx succesfully!




In [9]:
growth_means_dict, growth_std_devs_dict, growth_future_dfs_dict = calculate_markovs("growth_mar.csv", 10)

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  transition_df[k][j] += 1  # Update k-th column and j-th row for i-th matrix


In [10]:
sonia_means_dict, sonia_std_devs_dict, sonia_future_dfs_dict = calculate_markovs("sonia_mar.csv", 10)

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  transition_df[k][j] += 1  # Update k-th column and j-th row for i-th matrix


In [11]:
gap_means_dict, gap_std_devs_dict, gap_future_dfs_dict = calculate_markovs("gap_mar.csv", 10)

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  transition_df[k][j] += 1  # Update k-th column and j-th row for i-th matrix


In [15]:
competitors_means_dict, competitors_std_devs_dict, competitors_future_dfs_dict = calculate_markovs("competitors_mar.csv", 10)

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  transition_df[k][j] += 1  # Update k-th column and j-th row for i-th matrix
