# Configs

Import packages.

In [3]:
import pandas as pd
import yaml

from pathlib import Path

Read the GIFT-Eval datasets' metadata.

In [4]:
metadata_path = Path("../src/data/meta/metadata.csv")

df = pd.read_csv(metadata_path)

print(f"shape: {df.shape}")
df.head()

shape: (97, 10)


Unnamed: 0,name,term,freq,domain,_total_univariate_series,target_dim,_min_series_length,sum_series_length,prediction_length,windows
0,LOOP_SEATTLE/5T,short,5T,Transport,323,1,105120,33953760,48,20
1,LOOP_SEATTLE/D,short,D,Transport,323,1,365,117895,30,2
2,LOOP_SEATTLE/H,short,H,Transport,323,1,8760,2829480,48,19
3,M_DENSE/D,short,D,Transport,30,1,730,21900,30,3
4,M_DENSE/H,short,H,Transport,30,1,17520,525600,48,20


Count the max number of univariate series that belong to a single dataset across all domains.

In [5]:
domains = df['domain'].unique().tolist()

rows = []

for domain in domains:
    filtered_df = df[df['domain'] == domain]
    
    rows.append({
        "domain": domain,
        "num_datasets": len(filtered_df),
        "total_series": filtered_df["_total_univariate_series"].sum(),
        "max_series": filtered_df["_total_univariate_series"].max(),
    })

summary_df = pd.DataFrame(rows).sort_values(by="total_series", ascending=True).reset_index(drop=True)
display(summary_df)

Unnamed: 0,domain,num_datasets,total_series,max_series
0,Healthcare,5,1036,767
1,Transport,15,3005,323
2,Sales,4,3717,2674
3,Energy,32,4168,370
4,Web/CloudOps,20,14174,2500
5,Nature,15,33302,32072
6,Econ/Fin,6,99974,48000


Read the "all" data config.

In [8]:
config_path = Path("../conf/data/all.yaml")

with open(config_path, "r") as f:
    config = yaml.safe_load(f)
    
config_df = pd.DataFrame(config)

# Limit the DataFrame to the first 55 rows
limited_df = config_df.head(55)

# Count the rows where the 'term' column equals "short"
short_count = (limited_df['term'] == "short").sum()

print(f"Number of rows where 'term' equals 'short': {short_count}")

display(limited_df)

Number of rows where 'term' equals 'short': 55


Unnamed: 0,name,term
0,LOOP_SEATTLE/5T,short
1,LOOP_SEATTLE/D,short
2,LOOP_SEATTLE/H,short
3,M_DENSE/D,short
4,M_DENSE/H,short
5,SZ_TAXI/15T,short
6,SZ_TAXI/H,short
7,bitbrains_fast_storage/5T,short
8,bitbrains_fast_storage/H,short
9,bitbrains_rnd/5T,short
