In [0]:
dbutils.widgets.text("start_date", "2023-01-01", "Data di inizio")
dbutils.widgets.text("end_date", "2025-08-18", "Data di fine")
dbutils.widgets.text("output_name", "dato_input_2025-08-18.csv", "Nome file output")
dbutils.widgets.text("output_folder", "/Volumes/ts_catalog/ts_data/ts_input/", "Cartella output")
dbutils.widgets.text("seed", "100", "Seed random")

In [0]:
start_date = dbutils.widgets.get("start_date")
end_date = dbutils.widgets.get("end_date")
output_name = dbutils.widgets.get("output_name")
output_folder = dbutils.widgets.get("output_folder")
seed = int(dbutils.widgets.get("seed"))

In [0]:
import sys
import os

parent_dir = os.path.abspath(os.path.join(os.getcwd(), ".."))
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

In [0]:
import pandas as pd
import numpy as np
from src.data.columns import Columns

In [0]:
def generate_series(name, dates, amp_28=10, amp_7=5, noise_scale=0.1):
    days = np.arange(len(dates))
    sin_28 = amp_28 * np.sin(2 * np.pi * days / 28)
    sin_7 = amp_7 * np.sin(2 * np.pi * days / 7)
    noise_amp = (amp_28 + amp_7) * noise_scale
    noise = noise_amp * np.random.randn(len(dates))
    amount = sin_28 + sin_7 + noise
    df = pd.DataFrame({
        Columns.DATE.value: dates,
        Columns.AMOUNT.value: amount,
        Columns.NAME.value: name
    })
    return df

In [0]:
np.random.seed(seed)

dates = pd.bdate_range(start=start_date, end=end_date)
df1 = generate_series("series_1", dates)
df2 = generate_series("series_2", dates)
df = pd.concat([df1, df2], ignore_index=True)
df[Columns.DATE.value] = df[Columns.DATE.value].dt.strftime("%d-%b-%y")

display(df.tail(10))

In [0]:
output_file = f"{output_folder}{output_name}"
df.to_csv(output_file, index=False)
print(f"File '{output_file}' creato con successo con due serie distinte.")