# Data Storage: CSV and Parquet
This notebook demonstrates saving the processed holdings data to both CSV and Parquet formats and validating the reloaded data. [cite: 332]

In [None]:
import pandas as pd
import os
import sys
sys.path.append('../src')
from config import DATA_DIR_RAW, DATA_DIR_PROCESSED

# Create a sample DataFrame to save
data = {'col1': [1, 2], 'col2': [3, 4]}
df_to_save = pd.DataFrame(data)

# --- 1. Save in Two Formats --- [cite: 334]
# Save as CSV to raw directory
csv_path = os.path.join(DATA_DIR_RAW, 'sample.csv')
df_to_save.to_csv(csv_path, index=False) [cite: 336]
print(f"Saved to CSV at: {csv_path}")

# Save as Parquet to processed directory
parquet_path = os.path.join(DATA_DIR_PROCESSED, 'sample.parquet')
df_to_save.to_parquet(parquet_path, index=False) [cite: 336]
print(f"Saved to Parquet at: {parquet_path}")

# --- 2. Reload and Validate --- [cite: 338]
reloaded_csv = pd.read_csv(csv_path)
reloaded_parquet = pd.read_parquet(parquet_path)

def validate_dfs(original_df, reloaded_df, name):
    """A small validation function.""" [cite: 341]
    shape_match = original_df.shape == reloaded_df.shape
    dtype_match = all(original_df.dtypes == reloaded_df.dtypes)
    print(f"Validation for {name}:")
    print(f"  - Shapes match: {shape_match}")
    print(f"  - Dtypes match: {dtype_match}")

validate_dfs(df_to_save, reloaded_csv, "CSV")
validate_dfs(df_to_save, reloaded_parquet, "Parquet")