In [15]:
import pandas as pd
import numpy as np
import os

In [16]:
# Sample data

DEFAULT_DATE_RANGE = ('2023-01-01', '2025-01-01')

def generate_timeseries_df():
    start, end = DEFAULT_DATE_RANGE
    idx = pd.date_range(start=start, end=end, freq="MS") # freq month start
    n_points = len(idx)
    rng = np.random.default_rng(np.random.randint(1, 99))
    base = rng.normal(loc=5500, scale=1500, size=n_points)
    trend = np.linspace(0, 2000, n_points)
    values = np.maximum(base + trend, 1000).astype(float)
    values = np.minimum(values, 10000).round(2)
    df = pd.DataFrame({"value": values})
    df.insert(0, "date", idx)
    return df
    
def generate_multiline_df():
    start, end = DEFAULT_DATE_RANGE
    idx = pd.date_range(start=start, end=end, freq="MS")

    rng = np.random.default_rng(np.random.randint(1, 99))
    region_names = ["North", "South", "East", "West"]
    frames: list[pd.DataFrame] = []
    for i, name in enumerate(region_names):
        base = 700_000 + i * 120_000
        # Alternate trend direction for some lines
        if i % 2 == 0:
            trend_slope = 400_000 + i * 50_000
            trend = np.linspace(0, trend_slope, len(idx))
        else:
            trend_slope = 400_000 + i * 50_000
            # Down then up: negative slope for first half, positive for second half
            half = len(idx) // 2
            trend = np.concatenate([
                np.linspace(0, -trend_slope, half),
                np.linspace(-trend_slope, trend_slope, len(idx) - half)
            ])
        noise = rng.normal(loc=0, scale=60_000, size=len(idx))
        values = np.maximum(base + trend + noise, 0).round(2)
        frames.append(pd.DataFrame({"date": idx, "region": name, "value": values}))

    df = pd.concat(frames, ignore_index=True)
    return df

In [17]:
bar = generate_timeseries_df()
single_line = generate_timeseries_df()
multi_line = generate_multiline_df()

# Bar chart

In [18]:
bar.head()

Unnamed: 0,date,value
0,2023-01-01,1835.19
1,2023-02-01,4066.15
2,2023-03-01,3642.9
3,2023-04-01,3726.75
4,2023-05-01,5210.06


# Single line chart

In [19]:
single_line.head()

Unnamed: 0,date,value
0,2023-01-01,3868.61
1,2023-02-01,3716.4
2,2023-03-01,6507.46
3,2023-04-01,6783.23
4,2023-05-01,6475.53


# Multi line chart

In [20]:
multi_line.head()

Unnamed: 0,date,region,value
0,2023-01-01,North,699590.39
1,2023-02-01,North,779435.26
2,2023-03-01,North,777828.64
3,2023-04-01,North,793437.39
4,2023-05-01,North,863793.24


# Quarterly Effort

In [21]:
quarterly_effort = pd.DataFrame({
    'completion_quarter_str': ['2021Q1', '2021Q2', '2021Q4', '2022Q1', '2022Q2', '2022Q3', '2022Q4', 
                                '2023Q1', '2023Q2', '2023Q3', '2023Q4', '2024Q1', '2024Q2', '2024Q3', 
                                '2024Q4', '2025Q1', '2025Q2', '2025Q4'],
    'avg_effort_per_project': [2007.000000, 992.500000, 1155.000000, 874.350000, 2477.600000, 
                               879.666667, 1368.000000, 401.062500, 1108.333333, 1340.833333, 
                               866.000000, 1691.666667, 810.000000, 2761.000000, 2736.500000, 
                               916.666667, 176.833333, 344.000000]
})
quarterly_effort.head()

Unnamed: 0,completion_quarter_str,avg_effort_per_project
0,2021Q1,2007.0
1,2021Q2,992.5
2,2021Q4,1155.0
3,2022Q1,874.35
4,2022Q2,2477.6


In [22]:
# Export the final dfs needed for the charts
os.makedirs('output', exist_ok=True)
bar.to_csv('output/bar.csv', index=False)
single_line.to_csv('output/single_line.csv', index=False)
multi_line.to_csv('output/multi_line.csv', index=False)
quarterly_effort.to_csv('output/quarterly_effort.csv', index=False)