In [None]:
!pip install --quiet --root-user-action=ignore faker swifter coqui-tts pydub

# Imports

In [None]:
# Import python packages
import warnings
import json
warnings.filterwarnings("ignore")
from snowflake.snowpark.context import get_active_session
from data_generation.data_generator import DataGenerator
from data_generation.text_to_speech import TextToSpeech

session = get_active_session()

# Data Generation

The classes `DataGenerator` and `TextToSpeech` generates all data required for this demo.

In [None]:
# Structured Data
data_generator = DataGenerator(session)
data_generator.load_configuration()
data_generator.generate_data(start_date='2024-01-01', end_date='2025-07-31')

In [None]:
# Unstructured Data 
tts_generator = TextToSpeech(
    model='tts_models/multilingual/multi-dataset/xtts_v2', 
    voices='data_generation/04_audio/configuration/voices.json'
)

# load conversations from json
recordings = json.load(open('data_generation/04_audio/configuration/call_center_recordings.json'))
output_folder = '/call_center_recordings'

# Create audio files from conversations
tts_generator.dict_to_speech_optimized(recordings, output_folder)

# Upload audio files to Snowflake stage
session.file.put(local_file_name=f'{output_folder}/*', stage_location='@AUDIO/call_center_recordings', auto_compress=False)
session.sql('ALTER STAGE AUDIO REFRESH').collect()

##  Structured Data
The data model of this demo consists of multiple tables and views listed here.

In [None]:
data_generator.dim_dates.show(n=5)
data_generator.dim_suppliers.show()
data_generator.dim_product_hierarchy.show(n=5)
data_generator.dim_products.show(n=5)
data_generator.dim_platforms.show(n=5)
data_generator.dim_customers.show(n=5)
data_generator.dim_dates.show(n=5)
data_generator.fact_transactions.show(n=5)
data_generator.fact_supplier_deliveries.show(n=5)
data_generator.fact_daily_stock_levels.show(n=5)
data_generator.customer_reviews.show(n=5)

## Unstructured Data

This demo provides multiple unstructured datasources listed here.

In [None]:
SELECT * FROM DIRECTORY(@DOCUMENTS) limit 10;

In [None]:
SELECT * FROM DIRECTORY(@AUDIO) limit 10;