# PRODUCT DIMENSION

In [1]:
import pandas as pd
from connection_script import connect_databases

In [2]:
db_op, db_etl = connect_databases()

## EXTRACT 

In [3]:
dim_product_category =  pd.read_sql_query("SELECT * FROM [Production].[ProductCategory];", db_op)
dim_product_subcategory =  pd.read_sql_query("SELECT Name, ProductCategoryID FROM [Production].[ProductSubCategory];", db_op)

static_products =  pd.read_sql_query("""
	SELECT t1.*, t2.Name AS ModelName, t4.LargePhoto
	FROM [Production].[Product] AS t1
	LEFT JOIN [Production].[ProductModel] AS t2
	ON t1.ProductModelID = t2.ProductModelID
	LEFT JOIN [Production].[ProductProductPhoto] AS t3
	ON t1.ProductID = t3.ProductID
	LEFT JOIN [Production].[ProductPhoto] AS t4
	ON t3.ProductPhotoID = t4.ProductPhotoID
	WHERE t3.ProductID NOT IN(
		SELECT ProductID FROM Production.ProductCostHistory
	)
""", db_op)

dynamic_products =  pd.read_sql_query("""
	SELECT t1.*, t2.Name AS ModelName, t4.LargePhoto
	FROM [Production].[Product] AS t1
	LEFT JOIN [Production].[ProductModel] AS t2
	ON t1.ProductModelID = t2.ProductModelID
	LEFT JOIN [Production].[ProductProductPhoto] AS t3
	ON t1.ProductID = t3.ProductID
	LEFT JOIN [Production].[ProductPhoto] AS t4
	ON t3.ProductPhotoID = t4.ProductPhotoID
	WHERE t3.ProductID IN(
		SELECT ProductID FROM Production.ProductCostHistory
	)
""", db_op)


ProductsListPriceChanges =  pd.read_sql_query("SELECT * FROM [Production].[ProductListPriceHistory]", db_op)

ProductsCostChanges =  pd.read_sql_query("SELECT * FROM [Production].[ProductCostHistory]", db_op)



#product = pd.read_sql_query("SELECT * FROM [Production].[Product]", db_op)

## TRANSFORM

### Product Category

In [4]:
dim_product_category.index+=1
dim_product_category = dim_product_category.rename(columns={"Name":"EnglishProductCategoryName"})
dim_product_category = dim_product_category.assign(ProductCategoryAlternateKey=list(dim_product_category.index))
dim_product_category = dim_product_category[["ProductCategoryAlternateKey", "EnglishProductCategoryName"]]
dim_product_category


Unnamed: 0,ProductCategoryAlternateKey,EnglishProductCategoryName
1,1,Bikes
2,2,Components
3,3,Clothing
4,4,Accessories


### Product Subcategory

In [5]:
dim_product_subcategory.index+=1
dim_product_subcategory = dim_product_subcategory.rename(columns={"Name":"EnglishProductSubcategoryName", "ProductCategoryID":"ProductCategoryKey"})
dim_product_subcategory = dim_product_subcategory.assign(ProductSubcategoryAlternateKey=list(dim_product_subcategory.index))
dim_product_subcategory = dim_product_subcategory[["ProductSubcategoryAlternateKey", "EnglishProductSubcategoryName", "ProductCategoryKey"]]
dim_product_subcategory

Unnamed: 0,ProductSubcategoryAlternateKey,EnglishProductSubcategoryName,ProductCategoryKey
1,1,Mountain Bikes,1
2,2,Road Bikes,1
3,3,Touring Bikes,1
4,4,Handlebars,2
5,5,Bottom Brackets,2
6,6,Brakes,2
7,7,Chains,2
8,8,Cranksets,2
9,9,Derailleurs,2
10,10,Forks,2


### Product

In [6]:
static_products.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 211 entries, 0 to 210
Data columns (total 27 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   ProductID              211 non-null    int64         
 1   Name                   211 non-null    object        
 2   ProductNumber          211 non-null    object        
 3   MakeFlag               211 non-null    bool          
 4   FinishedGoodsFlag      211 non-null    bool          
 5   Color                  13 non-null     object        
 6   SafetyStockLevel       211 non-null    int64         
 7   ReorderPoint           211 non-null    int64         
 8   StandardCost           211 non-null    float64       
 9   ListPrice              211 non-null    float64       
 10  Size                   2 non-null      object        
 11  SizeUnitMeasureCode    2 non-null      object        
 12  WeightUnitMeasureCode  9 non-null      object        
 13  Weigh

In [7]:

dim_product = static_products[["ProductID",
                            "ProductNumber",
                           "ProductSubcategoryID",
                           "WeightUnitMeasureCode",
                           "SizeUnitMeasureCode",
                           "Name",
                          "FinishedGoodsFlag",
                          "Color",
                           "SafetyStockLevel",
                           "ReorderPoint",
                           "Size",
                           ##"SizeRange" missing
                           "Weight",
                           "DaysToManufacture",
                           "ProductLine",
                           "Class",
                           "Style",
                           "ModelName",
                           "LargePhoto",
                          "StandardCost",
                          "ListPrice"
                          ]]               

In [8]:
dynamic_products = dynamic_products.drop(columns=["StandardCost", "ListPrice"])

In [9]:
ProductsCostChanges["custom_index"] = list(ProductsCostChanges.index)
ProductsListPriceChanges["custom_index"] = list(ProductsListPriceChanges.index)

In [10]:
merge_cost_listprice = ProductsCostChanges.merge(ProductsListPriceChanges, on="custom_index")

In [11]:
merge_cost_listprice = merge_cost_listprice[["ProductID_x", "StandardCost", "ListPrice"]]
merge_cost_listprice = merge_cost_listprice.rename(columns={"ProductID_x":"ProductID"})
merge_cost_listprice

Unnamed: 0,ProductID,StandardCost,ListPrice
0,707,12.0278,33.6442
1,707,13.8782,33.6442
2,707,13.0863,34.9900
3,708,12.0278,33.6442
4,708,13.8782,33.6442
...,...,...,...
390,995,44.9506,101.2400
391,996,53.9416,121.4900
392,997,343.6496,539.9900
393,998,343.6496,539.9900


In [12]:
dynamic_products = dynamic_products.merge(merge_cost_listprice, on="ProductID")
dynamic_products

Unnamed: 0,ProductID,Name,ProductNumber,MakeFlag,FinishedGoodsFlag,Color,SafetyStockLevel,ReorderPoint,Size,SizeUnitMeasureCode,...,ProductModelID,SellStartDate,SellEndDate,DiscontinuedDate,rowguid,ModifiedDate,ModelName,LargePhoto,StandardCost,ListPrice
0,707,"Sport-100 Helmet, Red",HL-U509-R,False,True,Red,4,3,,,...,33,2011-05-31,NaT,,2E1EF41A-C08A-4FF6-8ADA-BDE58B64A712,2014-02-08 10:01:36.827,Sport-100,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x00\x00\x...,12.0278,33.6442
1,707,"Sport-100 Helmet, Red",HL-U509-R,False,True,Red,4,3,,,...,33,2011-05-31,NaT,,2E1EF41A-C08A-4FF6-8ADA-BDE58B64A712,2014-02-08 10:01:36.827,Sport-100,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x00\x00\x...,13.8782,33.6442
2,707,"Sport-100 Helmet, Red",HL-U509-R,False,True,Red,4,3,,,...,33,2011-05-31,NaT,,2E1EF41A-C08A-4FF6-8ADA-BDE58B64A712,2014-02-08 10:01:36.827,Sport-100,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x00\x00\x...,13.0863,34.9900
3,708,"Sport-100 Helmet, Black",HL-U509,False,True,Black,4,3,,,...,33,2011-05-31,NaT,,A25A44FB-C2DE-4268-958F-110B8D7621E2,2014-02-08 10:01:36.827,Sport-100,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x00\x00\x...,12.0278,33.6442
4,708,"Sport-100 Helmet, Black",HL-U509,False,True,Black,4,3,,,...,33,2011-05-31,NaT,,A25A44FB-C2DE-4268-958F-110B8D7621E2,2014-02-08 10:01:36.827,Sport-100,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x00\x00\x...,13.8782,33.6442
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
390,995,ML Bottom Bracket,BB-8107,True,True,,500,375,,,...,96,2013-05-30,NaT,,71AB847F-D091-42D6-B735-7B0C2D82FC84,2014-02-08 10:01:36.827,ML Bottom Bracket,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x00\x00\x...,44.9506,101.2400
391,996,HL Bottom Bracket,BB-9108,True,True,,500,375,,,...,97,2013-05-30,NaT,,230C47C5-08B2-4CE3-B706-69C0BDD62965,2014-02-08 10:01:36.827,HL Bottom Bracket,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x00\x00\x...,53.9416,121.4900
392,997,"Road-750 Black, 44",BK-R19B-44,True,True,Black,100,75,44,CM,...,31,2013-05-30,NaT,,44CE4802-409F-43AB-9B27-CA53421805BE,2014-02-08 10:01:36.827,Road-750,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x8avw\xf7...,343.6496,539.9900
393,998,"Road-750 Black, 48",BK-R19B-48,True,True,Black,100,75,48,CM,...,31,2013-05-30,NaT,,3DE9A212-1D49-40B6-B10A-F564D981DBDE,2014-02-08 10:01:36.827,Road-750,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x8avw\xf7...,343.6496,539.9900


In [13]:
dim_product = pd.concat([static_products, dynamic_products])

  dim_product = pd.concat([static_products, dynamic_products])


In [14]:
dim_product = dim_product.rename(columns={"ProductNumber":"ProductAlternateKey",
                                          "ProductSubcategoryID":"ProductSubcategoryKey",
                                          "Name":"EnglishProductName"
                                         })

In [15]:
products_descriptions = pd.read_sql_query("SELECT ProductID, TRIM(CultureID) AS CultureID, Description FROM [Production].[vProductAndDescription]", db_op)

In [16]:
products_descriptions

Unnamed: 0,ProductID,CultureID,Description
0,994,en,Chromoly steel.
1,995,en,Aluminum alloy cups; large diameter spindle.
2,996,en,Aluminum alloy cups and a hollow axle.
3,984,en,"Suitable for any type of riding, on or off-roa..."
4,985,en,"Suitable for any type of riding, on or off-roa..."
...,...,...,...
1759,894,ar,تصميم عريض الوصلات.
1760,894,fr,Conception liaison large.
1761,894,th,การออกแบบให้มีจุดเชื่อมกว้าง
1762,894,he,עיצוב רחב-חוליות.


In [17]:
# Get description for a product in a specif language
def get_description(product_id, language):
    row_data = products_descriptions[(products_descriptions['ProductID']==product_id)&(products_descriptions['CultureID']==language)]
    if len(row_data) > 0:
        return row_data.iloc[0]['Description']
    else:
        return None
    

In [18]:
#len(list(dim_product['ProductID']))
dim_product

Unnamed: 0,ProductID,EnglishProductName,ProductAlternateKey,MakeFlag,FinishedGoodsFlag,Color,SafetyStockLevel,ReorderPoint,StandardCost,ListPrice,...,Style,ProductSubcategoryKey,ProductModelID,SellStartDate,SellEndDate,DiscontinuedDate,rowguid,ModifiedDate,ModelName,LargePhoto
0,341,Flat Washer 1,FW-1000,False,False,,1000,750,0.0000,0.00,...,,,,2008-04-30,NaT,,A3F2FA3A-22E1-43D8-A131-A9B89C32D8EA,2014-02-08 10:01:36.827,,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x00\x00\x...
1,342,Flat Washer 6,FW-1200,False,False,,1000,750,0.0000,0.00,...,,,,2008-04-30,NaT,,331ADDEC-E9B9-4A7E-9324-42069C2DCDC4,2014-02-08 10:01:36.827,,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x00\x00\x...
2,343,Flat Washer 2,FW-1400,False,False,,1000,750,0.0000,0.00,...,,,,2008-04-30,NaT,,84A3473E-AE26-4A21-81B9-60BB418A79B2,2014-02-08 10:01:36.827,,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x00\x00\x...
3,344,Flat Washer 9,FW-3400,False,False,,1000,750,0.0000,0.00,...,,,,2008-04-30,NaT,,0AE4CE60-5242-48F5-ADA1-3013FF45F969,2014-02-08 10:01:36.827,,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x00\x00\x...
4,345,Flat Washer 4,FW-3800,False,False,,1000,750,0.0000,0.00,...,,,,2008-04-30,NaT,,2C1C58B4-234C-4B3A-8C8E-84524AC05EEA,2014-02-08 10:01:36.827,,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x00\x00\x...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
390,995,ML Bottom Bracket,BB-8107,True,True,,500,375,44.9506,101.24,...,,5.0,96.0,2013-05-30,NaT,,71AB847F-D091-42D6-B735-7B0C2D82FC84,2014-02-08 10:01:36.827,ML Bottom Bracket,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x00\x00\x...
391,996,HL Bottom Bracket,BB-9108,True,True,,500,375,53.9416,121.49,...,,5.0,97.0,2013-05-30,NaT,,230C47C5-08B2-4CE3-B706-69C0BDD62965,2014-02-08 10:01:36.827,HL Bottom Bracket,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x00\x00\x...
392,997,"Road-750 Black, 44",BK-R19B-44,True,True,Black,100,75,343.6496,539.99,...,U,2.0,31.0,2013-05-30,NaT,,44CE4802-409F-43AB-9B27-CA53421805BE,2014-02-08 10:01:36.827,Road-750,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x8avw\xf7...
393,998,"Road-750 Black, 48",BK-R19B-48,True,True,Black,100,75,343.6496,539.99,...,U,2.0,31.0,2013-05-30,NaT,,3DE9A212-1D49-40B6-B10A-F564D981DBDE,2014-02-08 10:01:36.827,Road-750,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x8avw\xf7...


In [19]:
#dim_product['EnglishDescription'] = dim_product[['ProductID']].apply(lambda x:int(x), index=0)
products_ids = list(dim_product['ProductID'])
english_descriptions = []
french_descriptions = []
chinese_descriptions = []
arabic_descriptions = []
hebrew_descriptions = []
thai_descriptions = []
for p in products_ids:
    english_descriptions.append(get_description(p, 'en'))
    french_descriptions.append(get_description(p, 'fr'))
    chinese_descriptions.append(get_description(p, 'zh-cht'))
    arabic_descriptions.append(get_description(p, 'ar'))
    hebrew_descriptions.append(get_description(p, 'he'))
    thai_descriptions.append(get_description(p, 'th'))
    ## German, Japanese and Turkish are still missing

In [20]:
dim_product= dim_product.assign(
    EnglishDescription=english_descriptions,
    FrenchDescription=french_descriptions,
    ChineseDescription=chinese_descriptions,
    ArabicDescription=arabic_descriptions,
    HebrewDescription=hebrew_descriptions,
    ThaiDescription=thai_descriptions
)

In [21]:
dim_product = dim_product[[
    "ProductID"
,"ProductAlternateKey"
,"ProductSubcategoryKey"
,"WeightUnitMeasureCode"
,"SizeUnitMeasureCode"
,"EnglishProductName"
,"StandardCost"
,"FinishedGoodsFlag"
,"Color"
,"SafetyStockLevel"
,"ReorderPoint"
,"ListPrice"
,"Size"
#,"SizeRange"
,"Weight"
,"DaysToManufacture"
,"ProductLine"
#,"DealerPrice"
,"Class"
,"Style"
,"ModelName"
,"LargePhoto"
,"EnglishDescription"
,"FrenchDescription"
,"ChineseDescription"
,"ArabicDescription"
,"HebrewDescription"
,"ThaiDescription"
]]

In [22]:
dim_product = dim_product.sort_values("ProductID")
dim_product = dim_product.reset_index()
dim_product.index += 1
dim_product = dim_product.drop(columns=["index", "ProductID"])
dim_product

Unnamed: 0,ProductAlternateKey,ProductSubcategoryKey,WeightUnitMeasureCode,SizeUnitMeasureCode,EnglishProductName,StandardCost,FinishedGoodsFlag,Color,SafetyStockLevel,ReorderPoint,...,Class,Style,ModelName,LargePhoto,EnglishDescription,FrenchDescription,ChineseDescription,ArabicDescription,HebrewDescription,ThaiDescription
1,AR-5381,,,,Adjustable Race,0.0000,False,,1000,750,...,,,,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x00\x00\x...,,,,,,
2,BA-8327,,,,Bearing Ball,0.0000,False,,1000,750,...,,,,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x00\x00\x...,,,,,,
3,BE-2349,,,,BB Ball Bearing,0.0000,False,,800,600,...,,,,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x00\x00\x...,,,,,,
4,BE-2908,,,,Headset Ball Bearings,0.0000,False,,800,600,...,,,,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x00\x00\x...,,,,,,
5,BL-2036,,,,Blade,0.0000,False,,800,600,...,,,,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x00\x00\x...,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
602,BB-8107,5.0,G,,ML Bottom Bracket,44.9506,True,,500,375,...,M,,ML Bottom Bracket,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x00\x00\x...,Aluminum alloy cups; large diameter spindle.,Cuvettes en alliage d'aluminium ; axe de grand...,铝合金车圈；大直径脚蹬轴。,فناجين من سبيكة الألومنيوم؛ ذات محور دوران كبي...,כיסויים מסגסוגת אלומיניום; ציר רחב-קוטר.,ดุมอลูมิเนียมอัลลอยด์ แกนเพลาขนาดใหญ่
603,BB-9108,5.0,G,,HL Bottom Bracket,53.9416,True,,500,375,...,H,,HL Bottom Bracket,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x00\x00\x...,Aluminum alloy cups and a hollow axle.,Cuvettes en alliage d'aluminium et axe creux.,铝合金车圈和空心轴。,فناجين من سبيكة الألومنيوم ومحور أجوف.,כיסויים מסגסוגת אלומיניום וציר חלול.,ดุมอลูมิเนียมอัลลอยด์และเพลากลวง
604,BK-R19B-44,2.0,LB,CM,"Road-750 Black, 44",343.6496,True,Black,100,75,...,L,U,Road-750,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x8avw\xf7...,Entry level adult bike; offers a comfortable r...,Vélo d'adulte d'entrée de gamme ; permet une c...,入门级成人自行车；确保越野旅行或公路骑乘的舒适。快拆式车毂和轮缘。,إنها دراجة مناسبة للمبتدئين من البالغين؛ فهي ت...,"אופני מבוגרים למתחילים; מציעים רכיבה נוחה ""מחו...",จักรยานระดับเริ่มต้นสำหรับผู้ใหญ่ ให้ความสบายใ...
605,BK-R19B-48,2.0,LB,CM,"Road-750 Black, 48",343.6496,True,Black,100,75,...,L,U,Road-750,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x8avw\xf7...,Entry level adult bike; offers a comfortable r...,Vélo d'adulte d'entrée de gamme ; permet une c...,入门级成人自行车；确保越野旅行或公路骑乘的舒适。快拆式车毂和轮缘。,إنها دراجة مناسبة للمبتدئين من البالغين؛ فهي ت...,"אופני מבוגרים למתחילים; מציעים רכיבה נוחה ""מחו...",จักรยานระดับเริ่มต้นสำหรับผู้ใหญ่ ให้ความสบายใ...


## LOAD

In [23]:
# DimProduct does not match the original size yet!!!

db_op, db_etl = connect_databases()
dim_product_category.to_sql('DimProductCategory', db_etl, if_exists='replace', index_label='ProductCategoryKey')
dim_product_subcategory.to_sql('DimProductSubcategory', db_etl, if_exists='replace', index_label='ProductSubcategoryKey')
dim_product.to_sql('DimProduct', db_etl, if_exists='replace', index_label='ProductKey')

606