## Data test import 

In [2]:
import pandas as pd
import json

# Load the JSON file
with open("data.json", "r") as file:
    data = json.load(file)

# Extract metadata
meta_data = data["_meta"]

# Convert the list of items into a DataFrame
df = pd.json_normalize(data["_items"])

# Display the first few rows
print(df.head())


# Save the DataFrame to a CSV filedf.to_csv("data.csv", index=True)

                        _id clusterID                 connectionTime  \
0  5e225fa2f9af8b5c26d2171a      0001  Wed, 01 Jan 2020 19:45:43 GMT   
1  5e23b149f9af8b5fe4b973cf      0001  Thu, 02 Jan 2020 13:08:54 GMT   
2  5e23b149f9af8b5fe4b973d0      0001  Thu, 02 Jan 2020 13:36:50 GMT   
3  5e23b149f9af8b5fe4b973d1      0001  Thu, 02 Jan 2020 13:56:35 GMT   
4  5e23b149f9af8b5fe4b973d2      0001  Thu, 02 Jan 2020 13:59:58 GMT   

                  disconnectTime               doneChargingTime  kWhDelivered  \
0  Wed, 01 Jan 2020 22:51:31 GMT  Wed, 01 Jan 2020 22:11:41 GMT        14.224   
1  Thu, 02 Jan 2020 19:11:15 GMT  Thu, 02 Jan 2020 17:31:35 GMT        25.016   
2  Thu, 02 Jan 2020 22:38:21 GMT  Thu, 02 Jan 2020 20:18:05 GMT        33.097   
3  Fri, 03 Jan 2020 00:39:22 GMT  Thu, 02 Jan 2020 16:35:06 GMT         6.521   
4  Thu, 02 Jan 2020 16:38:39 GMT  Thu, 02 Jan 2020 15:18:45 GMT         2.355   

                                sessionID siteID  spaceID    stationID  \
0  1_1

## Generative Adversarial Networks (GANs) for Synthetic EV Data

In [12]:
%pip install sdv 

Note: you may need to restart the kernel to use updated packages.


In [16]:
%pip install sdv

Note: you may need to restart the kernel to use updated packages.


In [23]:
from sdv.metadata import SingleTableMetadata
from sdv.single_table import GaussianCopulaSynthesizer

# Convert 'userInputs' column to string format
df['userInputs'] = df['userInputs'].apply(str)

# Create metadata for the DataFrame
metadata = SingleTableMetadata()
metadata.detect_from_dataframe(data=df)

# Initialize the synthesizer with the metadata
synthesizer = GaussianCopulaSynthesizer(metadata)

# Fit the synthesizer to the DataFrame
synthesizer.fit(
    data=df
)

synthetic_data = synthesizer.sample(
    num_rows=1000
)

synthetic_data.head()

# Save the synthetic data to a CSV file
synthetic_data.to_csv("synthetic_data.csv", index=False)


The 'SingleTableMetadata' is deprecated. Please use the new 'Metadata' class for synthesizers.


We strongly recommend saving the metadata using 'save_to_json' for replicability in future SDV versions.



In [25]:
from sdv.evaluation.single_table import run_diagnostic

diagnostic = run_diagnostic(
    real_data=df,
    synthetic_data=synthetic_data,
    metadata=metadata
)

Generating report ...

(1/2) Evaluating Data Validity: |██████████| 13/13 [00:00<00:00, 299.44it/s]|
Data Validity Score: 100.0%

(2/2) Evaluating Data Structure: |██████████| 1/1 [00:00<00:00, 78.99it/s]|
Data Structure Score: 100.0%

Overall Score (Average): 100.0%



In [34]:
from sdv.evaluation.single_table import evaluate_quality

quality_report = evaluate_quality(
  df,
  synthetic_data,
  metadata
)

quality_report.get_details(property_name='Column Shapes')


Generating report ...

(1/2) Evaluating Column Shapes: |██████████| 13/13 [00:00<00:00, 587.56it/s]|
Column Shapes Score: 82.85%

(2/2) Evaluating Column Pair Trends: |██████████| 78/78 [00:00<00:00, 1263.96it/s]|
Column Pair Trends Score: 75.54%

Overall Score (Average): 79.2%



Unnamed: 0,Column,Metric,Score
0,clusterID,TVComplement,1.0
1,connectionTime,KSComplement,0.61706
2,disconnectTime,KSComplement,0.64206
3,doneChargingTime,KSComplement,0.65006
4,kWhDelivered,KSComplement,0.890583
5,siteID,TVComplement,1.0
6,timezone,TVComplement,1.0


In [30]:
from sdv.evaluation.single_table import get_column_plot

fig = get_column_plot(
    real_data=df ,
    synthetic_data=synthetic_data,
    column_name='kWhDelivered',
    metadata=metadata
)

fig.show()

In [31]:
from sdv.evaluation.single_table import get_column_pair_plot

fig = get_column_pair_plot(
    real_data=df,
    synthetic_data=synthetic_data,
    column_names=['doneChargingTime', 'kWhDelivered'],
    metadata=metadata
)

fig.show()

In [32]:
synthesizer.save('my_synthesizer.pkl')

synthesizer = GaussianCopulaSynthesizer.load('my_synthesizer.pkl')
