In [20]:
import pandas as pd
import numpy as np
import datetime
import warnings

warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", None)

In [21]:
company = "mgl"

In [22]:
df_store_pos = pd.read_parquet(
    f"s3://mega-dev-lake/RawData/indie_on_develop/stores/{company}/store_branch.parquet"
)
df_store_dynamics = pd.read_parquet(
    f"s3://mega-dev-lake/RawData/D365/Store/{company}/year=2024/data.parquet"
)

In [23]:
df_store_pos_selected = df_store_pos[["id", "extCode", "status"]]
df_store_pos_selected["status"] = df_store_pos_selected["status"].map(
    {1: "Open", 0: "Close"}
)

In [24]:
df_store = df_store_dynamics[
    [
        "MdsCode",
        "Whcode",
        "AxCode",
        "Brand",
        "StdName",
        "Sqm",
        "DeptStore",
        "MainChannel",
        "Channel",
        "Region",
        "Province",
        "City",
        "Address",
        "OpenStatus",
        "Sssg",
    ]
].merge(
    df_store_pos_selected,
    "left",
    left_on=["AxCode"],
    right_on=["extCode"],
)

df_store['StoreCountry'] = 'Indonesia'
df_store["StoreDynamicsID"] = np.NaN
df_store["StoreInvoiceCode"] = np.NaN
df_store["DataAreaId"] = company

In [25]:
dim_store = df_store[
    [
        "DataAreaId",
        "id",
        "StoreDynamicsID",
        "Whcode",
        "status",
        "OpenStatus",
        "Brand",
        "AxCode",
        "StoreInvoiceCode",
        "MdsCode",
        "Sssg",
        "StdName",
        "MainChannel",
        "Channel",
        "StoreCountry",
        "Region",
        "Province",
        "City",
        "Address",
        "Sqm",
    ]
].rename(
    columns={
        "id": "StorePosID",
        "Whcode": "WarehouseKey",
        "status": "StorePosStatus",
        "OpenStatus": "StoreDynamicsStatus",
        "Brand": "BrandKey",
        "AxCode": "StoreCode",
        "MdsCode": "StoreMdsCode",
        "Sssg": "StoreSssg",
        "StdName": "StoreName",
        "MainChannel": "StoreMainChannel",
        "Channel": "StoreChannel",
        "Region": "StoreRegion",
        "Province": "StoreProvince",
        "City": "StoreCity",
        "Address": "StoreAddress",
        "Sqm": "StoreSqm",
    }
)

In [26]:
dim_store["StoreSssg"] = dim_store["StoreSssg"].map({1: "SSSG", 0: "Non SSSG"})

dim_store["StoreCity"] = dim_store["StoreCity"].str.title()
dim_store["StoreProvince"] = dim_store["StoreProvince"].str.title()
dim_store["StoreRegion"] = dim_store["StoreRegion"].str.title()

dim_store["StoreRegion"] = (
    dim_store["StoreRegion"]
    .str.replace("Ntt", "NTT")
    .str.replace("Ntb", "NTB")
    .str.replace("Dki", "DKI")
)

dim_store.loc[dim_store["StoreProvince"] == "Jawa Tenga", "StoreProvince"] = (
    "Jawa Tengah"
)
dim_store.loc[dim_store["StoreProvince"] == "Di Yogyaka", "StoreProvince"] = (
    "DI Yogyakarta"
)
dim_store.loc[dim_store["StoreProvince"] == "Dki Jakarta", "StoreProvince"] = (
    "DKI Jakarta"
)
dim_store.loc[dim_store["StoreProvince"] == "Sumatera S", "StoreProvince"] = (
    "Sumatera Selatan"
)
dim_store.loc[dim_store["StoreProvince"] == "Sulawesi T", "StoreProvince"] = (
    "Sumatera Tengah"
)
dim_store.loc[dim_store["StoreProvince"] == "Nusa Tengg", "StoreProvince"] = (
    "Nusa Tenggara"
)
dim_store.loc[dim_store["StoreProvince"] == "Sulawesi U", "StoreProvince"] = (
    "Sulawesi Utara"
)
dim_store.loc[dim_store["StoreProvince"] == "Bangka Bel", "StoreProvince"] = (
    "Bangka Belitung"
)
dim_store.loc[dim_store["StoreProvince"] == "Maluku Uta", "StoreProvince"] = (
    "Maluku Utara"
)
dim_store.loc[dim_store["StoreProvince"] == "Sulawesi S", "StoreProvince"] = (
    "Sulawesi Selatan"
)

dim_store["StoreCity"] = dim_store["StoreCity"].str.replace("Kota ", "")
dim_store["StoreAddress"] = dim_store["StoreAddress"].str.replace(" \n", " ")
dim_store["StoreAddress"] = dim_store["StoreAddress"].str.replace("\n", ", ")
dim_store["StoreAddress"] = dim_store["StoreAddress"].str.replace("\xa0", "")

In [27]:
dim_store["StoreSyncStatus"] = np.where(
    dim_store["StoreDynamicsStatus"] == dim_store["StorePosStatus"],
    "Yes",
    "No",
)

In [28]:
dim_store["StartDate"] = pd.to_datetime(
    datetime.datetime.now().strftime("%Y-%m-%d")
)

dim_store["EndDate"] = pd.NaT

dim_store["CreateDate"] = pd.to_datetime(
    datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
)

dim_store["UpdateDate"] = pd.to_datetime(
    datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
)


In [29]:
dim_store.head(2)

Unnamed: 0,DataAreaId,StorePosID,StoreDynamicsID,WarehouseKey,StorePosStatus,StoreDynamicsStatus,BrandKey,StoreCode,StoreInvoiceCode,StoreMdsCode,StoreSssg,StoreName,StoreMainChannel,StoreChannel,StoreCountry,StoreRegion,StoreProvince,StoreCity,StoreAddress,StoreSqm,StoreSyncStatus,StartDate,EndDate,CreateDate,UpdateDate
0,mgl,1221.0,,20062,Open,Close,Puma,C0001,,,Non SSSG,SHOWROOM PM EMPORIUM PLUIT MALL JAKARTA,Showroom,Showroom,Indonesia,,DKI Jakarta,Jakarta Utara,"JL. PLUIT SELATAN RAYA NO.15, PENJARINGAN, KEC...",0,No,2024-06-19,NaT,2024-06-19 13:09:05,2024-06-19 13:09:05
1,mgl,1223.0,,20063,Open,Close,Puma,C0002,,,SSSG,SHOWROOM PM MEGA MALL BATAM,Showroom,Showroom,Indonesia,Bali - NTT - NTB,Kepulauan,Batam,"JL. ENGKU PUTRI NO.1, TLK. TERING, KEC. BATAM ...",0,No,2024-06-19,NaT,2024-06-19 13:09:05,2024-06-19 13:09:05


In [30]:
dim_store['CompanyKey'] = np.where(dim_store['DataAreaId'] == 'mpr', 1, 2)

In [31]:
import re

In [32]:
def camel_to_snake(name):
    """
    Mengubah nama kolom dari camelCase ke snake_case.
    """
    s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
    return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()

In [33]:
dim_store.columns = [camel_to_snake(col) for col in dim_store.columns]

In [36]:
import sqlalchemy
engine = sqlalchemy.create_engine(
    f"postgresql+psycopg2://postgres:dsteam141@gateway-LB-0daa0ad89236a16a.elb.ap-southeast-3.amazonaws.com:5432/DataProd",
)

In [42]:
# Initial Load
dim_store.to_sql('dim_store', engine, if_exists='append', index=False)

157