In [22]:
# Import necessary libraries
import pandas as pd
from sdv.single_table import GaussianCopulaSynthesizer
from sdv.metadata import SingleTableMetadata

# Load your data from a CSV file (replace this with the actual path to your CSV file)
df = pd.read_csv('PEMS-BAY.csv')
print(df.shape)
# Convert the datetime column
df['Unnamed: 0'] = pd.to_datetime(df['Unnamed: 0'])

# Select the first 1000 rows (or any subset you need)
data_subset = df.iloc[:10081]
print(data_subset)

# Define metadata
metadata = SingleTableMetadata()

# Add the datetime column
metadata.add_column('Unnamed: 0', sdtype='datetime', datetime_format='%Y-%m-%d %H:%M:%S')

# Dynamically add all sensor columns (assuming they are numerical)
sensor_columns = data_subset.columns[1:]  # Exclude the first column which is datetime
for column in sensor_columns:
    metadata.add_column(column, sdtype='numerical')

# Initialize the GaussianCopulaSynthesizer with metadata
synthesizer = GaussianCopulaSynthesizer(metadata)

# Fit the synthesizer to the data subset
synthesizer.fit(data_subset)

# Generate synthetic data
synthetic_data = synthesizer.sample(num_rows=10081)

# Output the synthetic data
print(synthetic_data)

# Optionally, save synthetic data to a CSV file
synthetic_data.to_csv('synthetic_data.csv', index=False)

(52116, 326)
               Unnamed: 0  400001  400017  400030  400040  400045  400052  \
0     2017-01-01 00:00:00    71.4    67.8    70.5    67.4    68.8    66.6   
1     2017-01-01 00:05:00    71.6    67.5    70.6    67.5    68.7    66.6   
2     2017-01-01 00:10:00    71.6    67.6    70.2    67.4    68.7    66.1   
3     2017-01-01 00:15:00    71.1    67.5    70.3    68.0    68.5    66.7   
4     2017-01-01 00:20:00    71.7    67.8    70.2    68.1    68.4    66.9   
...                   ...     ...     ...     ...     ...     ...     ...   
10076 2017-02-04 23:40:00    71.9    65.1    67.1    60.3    63.0    65.1   
10077 2017-02-04 23:45:00    71.6    65.3    67.1    59.9    63.2    65.4   
10078 2017-02-04 23:50:00    71.1    65.2    67.6    59.9    63.2    66.6   
10079 2017-02-04 23:55:00    70.9    65.3    66.1    60.2    63.1    66.2   
10080 2017-02-05 00:00:00    71.1    64.9    66.8    60.2    62.7    66.7   

       400057  400059  400065  ...  409525  409526  409528  40



               Unnamed: 0  400001  400017  400030  400040  400045  400052  \
0     2017-01-12 10:48:14    59.6    54.6    41.1    63.6    52.3    50.4   
1     2017-01-20 01:39:59    66.5    14.0    33.0    53.4    52.0    62.4   
2     2017-02-03 07:32:02    73.6    57.2    67.8    46.8    65.0    67.8   
3     2017-01-07 02:36:24    76.2    14.0    60.8    54.7    62.1    70.8   
4     2017-01-16 05:27:59    73.7    45.3    64.7    58.9    69.0    70.8   
...                   ...     ...     ...     ...     ...     ...     ...   
10076 2017-01-16 06:02:33    69.9    64.8    67.0    64.4    67.3    70.8   
10077 2017-01-29 11:03:39    63.4    14.0    50.4    63.6    33.3    54.9   
10078 2017-01-28 21:47:27    56.9    14.0    23.0    41.0    52.0    53.3   
10079 2017-01-12 00:25:32    68.5    14.0    54.8    56.8    51.9    52.1   
10080 2017-01-29 18:45:35    62.7    69.0    67.7    69.9    64.8    60.0   

       400057  400059  400065  ...  409525  409526  409528  409529  413026 