In [None]:

import pandas as pd
from pathlib import Path
from sqlalchemy import create_engine

# -----------------------------
# DB config
# -----------------------------
DB_USER = "postgres"
DB_PASS = "1998"
DB_HOST = "localhost"
DB_PORT = "5432"
DB_NAME = "rossmann"
DB_SCHEMA = "raw"

DB_URL = f"postgresql+psycopg2://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
engine = create_engine(DB_URL)

# -----------------------------
# Paths (absolute, stable)
# -----------------------------
PROJECT_ROOT = Path("C:/Users/User/Documents/Projects/dbt_rossmann_anayltics").resolve()
DATA_CLEAN_PATH = PROJECT_ROOT / "data_clean"

sales_path = DATA_CLEAN_PATH / "sales_clean.csv"
stores_path = DATA_CLEAN_PATH / "stores_clean.csv"


In [5]:

# -----------------------------
# Load STORES
# -----------------------------
if not stores_path.exists():
    raise FileNotFoundError(f"Missing file: {stores_path}")

print(f"\nLoading {stores_path.name} -> {DB_SCHEMA}.stores_raw")
stores_df = pd.read_csv(stores_path)
print(stores_df.head())
print(stores_df.dtypes)

stores_df.to_sql(
    name="stores_raw",
    con=engine,
    schema=DB_SCHEMA,
    if_exists="replace",
    index=False,
    chunksize=50_000,
    method="multi",
)
print(f"Loaded {len(stores_df):,} rows into {DB_SCHEMA}.stores_raw")

print("\nDone.")



Loading stores_clean.csv -> raw.stores_raw
   Store StoreType Assortment  CompetitionDistance  CompetitionOpenSinceMonth  \
0      1         c          a               1270.0                        9.0   
1      2         a          a                570.0                       11.0   
2      3         a          a              14130.0                       12.0   
3      4         c          c                620.0                        9.0   
4      5         a          a              29910.0                        4.0   

   CompetitionOpenSinceYear  Promo2  Promo2SinceWeek  Promo2SinceYear  \
0                    2008.0       0              NaN              NaN   
1                    2007.0       1             13.0           2010.0   
2                    2006.0       1             14.0           2011.0   
3                    2009.0       0              NaN              NaN   
4                    2015.0       0              NaN              NaN   

     PromoInterval  
0        

In [6]:

# -----------------------------
# Load SALES
# -----------------------------
if not sales_path.exists():
    raise FileNotFoundError(f"Missing file: {sales_path}")

print(f"\nLoading {sales_path.name} -> {DB_SCHEMA}.sales_raw")
sales_df = pd.read_csv(sales_path)
print(sales_df.head())
print(sales_df.dtypes)

sales_df.to_sql(
    name="sales_raw",
    con=engine,
    schema=DB_SCHEMA,
    if_exists="replace",   # avoids duplicates while developing
    index=False,
    chunksize=100_000,
    method="multi",
)
print(f"Loaded {len(sales_df):,} rows into {DB_SCHEMA}.sales_raw")



Loading sales_clean.csv -> raw.sales_raw


  sales_df = pd.read_csv(sales_path)


   Store        Date  Sales  Customers  Open  Promo StateHoliday  \
0      1  2015-07-31   5263        555     1      1            0   
1      2  2015-07-31   6064        625     1      1            0   
2      3  2015-07-31   8314        821     1      1            0   
3      4  2015-07-31  13995       1498     1      1            0   
4      5  2015-07-31   4822        559     1      1            0   

   SchoolHoliday  
0              1  
1              1  
2              1  
3              1  
4              1  
Store             int64
Date             object
Sales             int64
Customers         int64
Open              int64
Promo             int64
StateHoliday     object
SchoolHoliday     int64
dtype: object
Loaded 1,017,209 rows into raw.sales_raw
