In [5]:
import numpy as np
import pandas as pd

# Parameters
num_times = 100  # number of different times in the time series
num_points_per_time = 100  # number of points for each time
num_features = 3  # number of features
anomaly_ratio = 0.01  # ratio of anomalies per time

# Create an empty DataFrame
df = pd.DataFrame()

# Generate data
for t in range(num_times):
    # Create baseline time series for this time point
    baseline = np.random.randn(num_points_per_time, num_features)

    # Add periodic patterns
    for i in range(num_features):
        baseline[:, i] += np.sin(np.linspace(0, 10, num_points_per_time)) * (i + 1) * 0.5

    # Introduce anomalies
    num_anomalies = int(num_points_per_time * anomaly_ratio)
    anomaly_indices = np.random.choice(num_points_per_time, num_anomalies, replace=False)
    baseline[anomaly_indices] += np.random.randn(num_anomalies, num_features) * 3

    # Append to DataFrame
    temp_df = pd.DataFrame(baseline, columns=[f'Feature_{i+1}' for i in range(num_features)])
    temp_df['Time'] = t
    temp_df['Anomaly'] = 0
    temp_df.loc[anomaly_indices, 'Anomaly'] = 1
    df = pd.concat([df, temp_df], ignore_index=True)

# Shuffle the DataFrame
df = df.sample(frac=1).reset_index(drop=True)

# Display the first few rows of the DataFrame
df

Unnamed: 0,Feature_1,Feature_2,Feature_3,Time,Anomaly
0,-0.408369,-0.398377,-2.729000,45,0
1,0.873882,-0.604648,1.548505,20,0
2,-0.730538,-0.664050,-0.205321,67,0
3,0.483484,2.418424,-0.698387,25,0
4,2.314556,-0.294959,0.949196,41,0
...,...,...,...,...,...
9995,1.687416,0.452357,1.904850,31,0
9996,-0.457175,0.254593,-0.606134,91,0
9997,-0.638825,0.110515,-0.503206,51,0
9998,0.590966,1.054791,0.695094,51,0


In [7]:
import numpy as np

def generate_synthetic_time_series(num_times, num_points_per_time):
    """
    Generate a synthetic time series dataset.

    :param num_times: Number of different times in the time series.
    :param num_points_per_time: Number of points for each time.
    :return: A list of lists of lists containing the synthetic time series data.
    """
    num_features = 3  # Number of features
    anomaly_ratio = 0.01  # Ratio of anomalies per time

    # Initialize an empty list for all times
    all_times_data = []

    # Generate data
    for t in range(num_times):
        # Create baseline time series for this time point
        baseline = np.random.randn(num_points_per_time, num_features)

        # Add periodic patterns
        for i in range(num_features):
            baseline[:, i] += np.sin(np.linspace(0, 10, num_points_per_time)) * (i + 1) * 0.5

        # Introduce anomalies
        num_anomalies = int(num_points_per_time * anomaly_ratio)
        anomaly_indices = np.random.choice(num_points_per_time, num_anomalies, replace=False)
        baseline[anomaly_indices] += np.random.randn(num_anomalies, num_features) * 3

        # Convert to list and append to the main list
        time_data = np.array(baseline.tolist())
        all_times_data.append(time_data)

    return all_times_data

# Example usage
data = generate_synthetic_time_series(100, 100)

# The 'data' variable is a list of lists of lists. Each sublist represents one time point,
# and within each sublist, each sub-sublist represents one data point with 3 features.
data

[array([[ 1.49294327,  0.42564465, -0.74790995],
        [ 0.51851257,  0.48354459, -0.30586103],
        [-0.53904332, -0.66829173, -0.18786919],
        [ 1.85327152,  0.86303952,  0.82462644],
        [ 0.2541898 , -0.58963521,  2.00865393],
        [ 0.99493614,  2.8194594 ,  0.68827917],
        [-0.98713452, -0.36088253,  0.07746628],
        [-0.68557509,  0.46282698, -1.35132984],
        [-0.25725921,  0.48935616, -0.47832682],
        [ 0.65367158,  0.41144241,  1.46221052],
        [ 1.02008692,  2.01125087,  1.07021562],
        [-1.01021518,  0.3606466 ,  1.39443698],
        [-0.52601044,  3.74140732,  3.96258763],
        [-0.147547  ,  0.65567453,  0.83832871],
        [ 0.53142037,  1.77777743,  2.68906457],
        [ 0.24053868,  2.35553914,  1.76789837],
        [ 0.1942639 ,  1.92836157,  1.12640381],
        [ 1.85447038, -0.69359585,  1.97902953],
        [-0.02609004,  1.41212686,  1.4556039 ],
        [ 0.046514  ,  0.67510974,  1.95343732],
        [ 1.32564791

In [10]:
def generate_synthetic_time_series(num_times, num_points_per_time, amplitude=1, noise_level=0.5):
    """
    Generate a synthetic time series dataset with enhanced periodic patterns.

    :param num_times: Number of different times in the time series.
    :param num_points_per_time: Number of points for each time.
    :param amplitude: Amplitude of the sine wave.
    :param noise_level: Standard deviation of the random noise.
    :return: A list of lists of lists containing the synthetic time series data.
    """
    num_features = 3  # Number of features
    anomaly_ratio = 0.01  # Ratio of anomalies per time

    all_times_data = []

    for t in range(num_times):
        baseline = np.zeros((num_points_per_time, num_features))

        for i in range(num_features):
            baseline[:, i] = amplitude * np.sin(np.linspace(0, 10, num_points_per_time) + np.pi / num_features * i)

        # Add noise
        baseline += np.random.randn(num_points_per_time, num_features) * noise_level

        # Introduce anomalies
        num_anomalies = int(num_points_per_time * anomaly_ratio)
        anomaly_indices = np.random.choice(num_points_per_time, num_anomalies, replace=False)
        baseline[anomaly_indices] += np.random.randn(num_anomalies, num_features) * 3

        all_times_data.append(np.array(baseline.tolist()))

    return all_times_data

data = generate_synthetic_time_series(100, 100)
data

[array([[ 0.55279663,  0.97482924,  0.53417357],
        [ 0.34900195,  1.62465691,  0.52984667],
        [ 1.03086997,  1.13550538,  0.79592046],
        [-0.31256679,  2.07735989, -0.25446724],
        [-0.6432854 ,  0.74413901,  1.25181678],
        [ 0.34831842,  0.28922167,  0.92708839],
        [ 0.21935615,  0.69375459,  0.40196083],
        [ 1.10810901,  1.27656832, -0.0771778 ],
        [ 1.93243084,  1.4542085 ,  0.02860668],
        [ 0.94479562,  1.16332008, -1.23996266],
        [ 1.0614084 ,  1.24537422, -0.16384627],
        [ 0.19709113,  0.4882341 , -0.25709066],
        [ 0.79734084,  1.64642425, -0.36879863],
        [ 1.18538816,  1.12324931, -0.75741226],
        [ 0.82499893,  0.78950665, -0.16268476],
        [ 1.29307706,  0.40012001, -0.62010848],
        [ 1.65217305,  0.98262781,  0.01047322],
        [ 1.19827988,  0.64105812, -0.82757007],
        [ 0.40498522,  0.52934813, -0.80312298],
        [ 0.17829371,  0.7925303 , -0.69971787],
        [ 1.84140551