In [2]:
import pandas as pd
import sdv
from sdv.single_table import CTGANSynthesizer

In [3]:
from sdv.metadata import SingleTableMetadata

metadata = SingleTableMetadata()

In [12]:
df = pd.read_csv("dataset/wind.csv")

In [13]:
df.head()

Unnamed: 0,Customer,Generator Capacity,Postcode,Consumption Category,date,time,value,datetime,label
0,1,3.78,2076,GC,1-Jul-10,0:30,0.303,2010-07-01 00:30:00,2
1,1,3.78,2076,CL,1-Jul-10,0:30,1.25,2010-07-01 00:30:00,2
2,1,3.78,2076,GG,1-Jul-10,0:30,0.0,2010-07-01 00:30:00,2
3,1,3.78,2076,GC,2-Jul-10,0:30,0.116,2010-07-02 00:30:00,50
4,1,3.78,2076,CL,2-Jul-10,0:30,1.238,2010-07-02 00:30:00,50


In [16]:
filtered_df = df.drop(columns=["Postcode","date","time","datetime"])
filtered_df.columns = ["location","gen_capacity","category","value","time_seq"]

In [19]:
new_order = ["location","time_seq","gen_capacity","category","value"]
filtered_df=filtered_df.reindex(columns=new_order)

In [21]:
filtered_df.head()

Unnamed: 0,location,time_seq,gen_capacity,category,value
0,1,2,3.78,GC,0.303
1,1,2,3.78,CL,1.25
2,1,2,3.78,GG,0.0
3,1,50,3.78,GC,0.116
4,1,50,3.78,CL,1.238


In [22]:
metadata.detect_from_dataframe(data=filtered_df)

In [23]:
python_dict = metadata.to_dict()

In [24]:
metadata.validate()

In [25]:
python_dict

{'columns': {'location': {'sdtype': 'numerical'},
  'time_seq': {'sdtype': 'numerical'},
  'gen_capacity': {'sdtype': 'numerical'},
  'category': {'sdtype': 'categorical'},
  'value': {'sdtype': 'numerical'}},
 'METADATA_SPEC_VERSION': 'SINGLE_TABLE_V1'}

In [26]:
from sdv.single_table import CTGANSynthesizer

synthesizer = CTGANSynthesizer(
    metadata,
    enforce_rounding=False,
    epochs=500,
    verbose=True
)

In [27]:
synthesizer.fit(filtered_df)

Epoch 1, Loss G: -0.8084,Loss D: -0.0718
Epoch 2, Loss G: -0.2350,Loss D: -0.1026
Epoch 3, Loss G: -0.2307,Loss D: -0.2917
Epoch 4, Loss G: -0.4842,Loss D:  0.0581
Epoch 5, Loss G: -0.2016,Loss D: -0.0218
Epoch 6, Loss G: -0.1479,Loss D: -0.0682
Epoch 7, Loss G: -0.2917,Loss D:  0.1358
Epoch 8, Loss G: -0.2266,Loss D: -0.0024
Epoch 9, Loss G: -0.2839,Loss D: -0.1156
Epoch 10, Loss G: -0.2121,Loss D: -0.0454
Epoch 11, Loss G: -0.3863,Loss D:  0.0111
Epoch 12, Loss G: -0.4396,Loss D:  0.1468
Epoch 13, Loss G: -0.2234,Loss D: -0.0066
Epoch 14, Loss G: -0.2010,Loss D: -0.3021
Epoch 15, Loss G: -0.2528,Loss D: -0.1037
Epoch 16, Loss G: -0.3723,Loss D: -0.2191
Epoch 17, Loss G: -0.4907,Loss D: -0.3377
Epoch 18, Loss G: -0.5224,Loss D: -0.2225
Epoch 19, Loss G: -0.6432,Loss D:  0.1023
Epoch 20, Loss G: -0.4433,Loss D: -0.0576
Epoch 21, Loss G: -0.4735,Loss D: -0.1191
Epoch 22, Loss G: -0.4363,Loss D: -0.0389
Epoch 23, Loss G: -0.5754,Loss D: -0.1417
Epoch 24, Loss G: -0.4686,Loss D: -0.1519
E

In [28]:
synthesizer.save(
    filepath='saved_model/ctgan.pkl'
)

In [None]:
load_synthesizer = CTGANSynthesizer.load(
    filepath='my_synthesizer.pkl'
)
#