1. Save in Two Formats

In [16]:
import pandas as pd

data = [
    ['iphone 17', ''],
    ['iphone 17 air', 'air'],
    ['iphone 17 pro', 'pro']
]
df = pd.DataFrame(data, columns=['name', 'value'])

df


Unnamed: 0,name,value
0,iphone 17,
1,iphone 17 air,air
2,iphone 17 pro,pro


In [23]:
from dotenv import load_dotenv
import pathlib, os

load_dotenv()
csv_filename = "phone_names.csv"
parquet_filename = "phone_names.parquet"
SOU_DIR = pathlib.Path(os.getenv("SOURCE_DIR"))
RAW_DIR = SOU_DIR / pathlib.Path(os.getenv("DATA_DIR_RAW")) / csv_filename
PRO_DIR = SOU_DIR / pathlib.Path(os.getenv("DATA_DIR_PROCESSED")) / parquet_filename

df.to_csv(RAW_DIR, index=False)
df.to_parquet(PRO_DIR, index=False)

2. Reload

In [29]:
df_csv = pd.read_csv(RAW_DIR)
df_parquet = pd.read_parquet(PRO_DIR)

print(df_csv)
print(df_parquet)
assert df_csv.shape == df.shape, "csv not good"
assert df_parquet.shape == df.shape, "parque not good"

def validate_loaded(original: pd.DataFrame, reloaded: pd.DataFrame):
    checks = {
        'shape_equal': original.shape == reloaded.shape,
        'cols_present': all(c in reloaded.columns for c in original.columns)
    }
    return checks

validate_loaded(df, df_csv)

            name value
0      iphone 17   NaN
1  iphone 17 air   air
2  iphone 17 pro   pro
            name value
0      iphone 17      
1  iphone 17 air   air
2  iphone 17 pro   pro


{'shape_equal': True, 'cols_present': True}

3. Refactor to Utilities

In [31]:
def read_df(RAW_DIR, PRO_DIR):
    df_csv = pd.read_csv(RAW_DIR)
    df_parquet = pd.read_parquet(PRO_DIR)
    return df_csv, df_parquet

def write_df(df, RAW_DIR, PRO_DIR):
    df.to_csv(RAW_DIR)
    try:
        df.to_parquet(PRO_DIR)
    except Exception as e:
        print("Parquet save failed.")
        print("Error:", e)
    
    validate_loaded(df, df_csv)
    validate_loaded(df, df_parquet)

write_df(df, RAW_DIR, PRO_DIR)