# Sample Time Series Data Generator

Use this notebook to generate sample timeseries data and save it in an output directory for testing the VAI Utils Time Series Module

In [None]:
import numpy as np 
import pandas as pd 
from datetime import datetime

In [None]:
OUTPUT_PATH = 'data/random_no_target'
START_DATE = pd.to_datetime('2023-01-01')
END_DATE = pd.to_datetime('2024-01-01')

In [None]:
def to_vai_format(df, path): 
    """Saves csvs by date (MM-DD-YYYY.csv) with a HH:MM:SS column in the specified path.  
    
    Args: 
        df (pandas.DataFrame): dataframe with datetime index
        path (string): top level directory to save csvs
        
    Returns: 
        None
    """
    for date, group in df.groupby(df.index.date):
        group.index.name = 'datetime'
        filename = date.strftime("%m-%d-%Y")
        group['HH:MM:SS'] = group.index.strftime("%H:%M:%S")        
        group.to_csv(path + '/' + filename + ".csv")

In [None]:
def random_dates(start, end, n=31537):
    """Generates some number of random datetimes 
    
    Args:
        start (pandas.Timestamp): pd timestamp start 
        end (pandas.Timestamp): pd timestamp end 
        n (int): number of samples to generate 
    
    Returns: 
        pandas.DatetimeIndex: datetime index with n random datetimes 
    
    """
    start_u = start.value//10**9
    end_u = end.value//10**9

    return pd.to_datetime(np.random.randint(start_u, end_u, n), unit='s').sort_values()

In [None]:
date_rng = random_dates(START_DATE, END_DATE)

In [None]:
# Generate random features 
num_features = 10 
features = np.random.rand(len(date_rng), num_features)

In [None]:
df = pd.DataFrame(index=date_rng, data=features)

In [None]:
to_vai_format(df, OUTPUT_PATH)