## 1-Read product bulk file

In [2]:
import json, pandas as pd

In [8]:
def read_data(file_name):
    data=open(file_name, 'r')
    all_lines=[]
    for line in data:
        dict=json.loads(line)
        all_lines.append(dict)
        break
    data.close()
    return all_lines

## 2-Parse jsonl into a pandas df

In [None]:
# Load JSONL
all_lines=read_data('bulk_products_data.jsonl')
df = pd.read_json("bulk_products_data.jsonl", lines=True)

# Derive entity type from the Shopify GID
if "id" in df.columns:
    df["entityType"] = df["id"].astype(str).str.extract(r"gid:\/\/shopify\/([^\/]+)\/")

# Flatten list-like columns for display (e.g., tags)
if "tags" in df.columns:
    df["tags"] = df["tags"].apply(lambda x: ", ".join(x) if isinstance(x, list) else x)

# Order a readable set of columns if present
preferred_cols = [
    "entityType", "id", "__parentId", "title", "sku", "price", "compareAtPrice",
    "inventoryQuantity", "handle", "vendor", "productType", "tags", "status",
    "createdAt", "updatedAt", "url", "altText"
]
existing_cols = [c for c in preferred_cols if c in df.columns]
ordered_df = df[existing_cols + [c for c in df.columns if c not in existing_cols]]



In [5]:
ordered_df.entityType.value_counts()

entityType
ProductVariant    526
ProductImage      518
Product           517
Name: count, dtype: int64

In [7]:
dim_product=ordered_df[ordered_df['entityType'] == 'Product']

## Fact table

In [35]:
import pandas as pd
import json

with open("bulk_orders_data.jsonl", "r") as f:
    data = [json.loads(line) for line in f]

df = pd.json_normalize(data, sep=".")


In [43]:
customer_cols=[col for col in df.columns if col.startswith('customer')]
shippingAddress_cols=[col for col in df.columns if col.startswith('shippingAddress')]
billingAddress_cols=[col for col in df.columns if col.startswith('billingAddress')]
order_cols=[col for col in df.columns if col not in customer_cols and col not in shippingAddress_cols and col not in billingAddress_cols]



        

## customer master

In [47]:
df[customer_cols+shippingAddress_cols+billingAddress_cols]

Unnamed: 0,customer.id,customer.firstName,customer.lastName,customer.email,shippingAddress.name,shippingAddress.address1,shippingAddress.address2,shippingAddress.city,shippingAddress.province,shippingAddress.country,shippingAddress.zip,shippingAddress.phone,billingAddress.name,billingAddress.address1,billingAddress.address2,billingAddress.city,billingAddress.province,billingAddress.country,billingAddress.zip,billingAddress.phone
0,gid://shopify/Customer/8243723305173,Russell,Winfield,Russel.winfield@example.com,Russell Winfield,105 Victoria St,,Toronto,Ontario,Canada,M5C 1N7,,Russell Winfield,105 Victoria St,,Toronto,,Canada,M5C1N7,
1,,,,,,,,,,,,,,,,,,,,
2,,,,,,,,,,,,,,,,,,,,
3,gid://shopify/Customer/8244183892181,Vance,Donnelly,egnition_sample_2748@egnition.com,Chloe Glover,3884 Purus Street,,Phoenix,Arizona,Solomon Islands,3998,+67721863,Vance Donnelly,"P.O. Box 726, 7322 Tellus Avenue",,Houston,Texas,"Congo, The Democratic Republic Of The",923619,+39471628167
4,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
304,,,,,,,,,,,,,,,,,,,,
305,gid://shopify/Customer/8244180517077,Euna,Olson,egnition_sample_5934@egnition.com,Dandre Kozey,Ap #921-2770 Felis Rd.,,Oviedo,Principado de Asturias,Dominica,7755,+18094543125,Hillard Schinner,6253 Vitae Ave,,Mostoles,Madrid,Saint Martin,60201,+590590874153
306,,,,,,,,,,,,,,,,,,,,
307,,,,,,,,,,,,,,,,,,,,


## ft transactions


In [49]:
df[order_cols]

Unnamed: 0,id,name,createdAt,processedAt,currencyCode,tags,note,totalPriceSet.shopMoney.amount,totalPriceSet.shopMoney.currencyCode,title,...,fulfillableQuantity,fulfillmentStatus,__parentId,variant.id,variant.sku,variant.title,originalUnitPriceSet.shopMoney.amount,originalUnitPriceSet.shopMoney.currencyCode,discountedTotalSet.shopMoney.amount,discountedTotalSet.shopMoney.currencyCode
0,gid://shopify/Order/6182997917909,#1001,2025-08-05T02:24:33Z,2025-08-05T02:24:33Z,USD,[Multiple Fulfillments],,3163.4,USD,,...,,,,,,,,,,
1,gid://shopify/LineItem/14853623840981,,,,,,,,,The Complete Snowboard,...,1.0,unfulfilled,gid://shopify/Order/6182997917909,gid://shopify/ProductVariant/51816085127381,,Sunset,664.95,USD,664.95,USD
2,gid://shopify/LineItem/14853623873749,,,,,,,,,The 3p Fulfilled Snowboard,...,1.0,unfulfilled,gid://shopify/Order/6182997917909,gid://shopify/ProductVariant/51816085422293,sku-hosted-1,Default Title,2498.45,USD,2498.45,USD
3,gid://shopify/Order/6183358234837,#1002,2025-08-05T10:34:22Z,2025-08-05T10:34:22Z,USD,"[egnition-sample-data, toys]",,19.98,USD,,...,,,,,,,,,,
4,gid://shopify/LineItem/14854250037461,,,,,,,,,Monster High Dance the Fright Away - Clawdeen ...,...,1.0,unfulfilled,gid://shopify/Order/6183358234837,gid://shopify/ProductVariant/51816806318293,TOY394,Default Title,14.99,USD,14.99,USD
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
304,gid://shopify/LineItem/14854291259605,,,,,,,,,Jurassic World RC Vehicle Gyrosphere RC,...,1.0,unfulfilled,gid://shopify/Order/6183381303509,gid://shopify/ProductVariant/51816789180629,TOY192,Default Title,34.99,USD,34.99,USD
305,gid://shopify/Order/6183381369045,#1101,2025-08-05T11:03:00Z,2025-08-05T11:03:00Z,USD,"[egnition-sample-data, toys]",,104.97,USD,,...,,,,,,,,,,
306,gid://shopify/LineItem/14854291292373,,,,,,,,,Barbie Pizza Chef Doll and Playset,...,1.0,unfulfilled,gid://shopify/Order/6183381369045,gid://shopify/ProductVariant/51816789016789,TOY188,Default Title,29.99,USD,29.99,USD
307,gid://shopify/LineItem/14854291325141,,,,,,,,,Jurassic World RC Vehicle Raptor Attack RC,...,1.0,unfulfilled,gid://shopify/Order/6183381369045,gid://shopify/ProductVariant/51816789410005,TOY198,Default Title,49.99,USD,49.99,USD
