# PRODUCT DIMENSION

In [1]:
import pandas as pd
from connection_script import connect_databases

In [2]:
db_op, db_etl = connect_databases()

## EXTRACT 

In [3]:
dim_product_category =  pd.read_sql_query("SELECT * FROM [Production].[ProductCategory];", db_op)
dim_product_subcategory =  pd.read_sql_query("SELECT Name, ProductCategoryID FROM [Production].[ProductSubCategory];", db_op)
#dim_product =  pd.read_sql_query(
"""
    SELECT t1.*, t2.Name AS ModelName, t4.LargePhoto
    FROM [Production].[Product] AS t1
    INNER JOIN [Production].[ProductModel] AS t2
    ON t1.ProductModelID = t2.ProductModelID
    INNER JOIN [Production].[ProductProductPhoto] AS t3
    ON t1.ProductID = t3.ProductID
    INNER JOIN [Production].[ProductPhoto] AS t4
    ON t3.ProductPhotoID = t4.ProductPhotoID;
"""
#, db_op)
dim_product =  pd.read_sql_query("""
    SELECT t1.*, t2.Name AS ModelName, t4.LargePhoto
    FROM [Production].[Product] AS t1
    LEFT JOIN [Production].[ProductModel] AS t2
    ON t1.ProductModelID = t2.ProductModelID
    LEFT JOIN [Production].[ProductProductPhoto] AS t3
    ON t1.ProductID = t3.ProductID
    LEFT JOIN [Production].[ProductPhoto] AS t4
    ON t3.ProductPhotoID = t4.ProductPhotoID;
""", db_op)



## TRANSFORM

### Product Category

In [4]:
dim_product_category.index+=1
dim_product_category = dim_product_category.rename(columns={"Name":"EnglishProductCategoryName"})
dim_product_category = dim_product_category.assign(ProductCategoryAlternateKey=list(dim_product_category.index))
dim_product_category = dim_product_category[["ProductCategoryAlternateKey", "EnglishProductCategoryName"]]
dim_product_category


Unnamed: 0,ProductCategoryAlternateKey,EnglishProductCategoryName
1,1,Bikes
2,2,Components
3,3,Clothing
4,4,Accessories


### Product Subcategory

In [5]:
dim_product_subcategory.index+=1
dim_product_subcategory = dim_product_subcategory.rename(columns={"Name":"EnglishProductSubcategoryName", "ProductCategoryID":"ProductCategoryKey"})
dim_product_subcategory = dim_product_subcategory.assign(ProductSubcategoryAlternateKey=list(dim_product_subcategory.index))
dim_product_subcategory = dim_product_subcategory[["ProductSubcategoryAlternateKey", "EnglishProductSubcategoryName", "ProductCategoryKey"]]
dim_product_subcategory

Unnamed: 0,ProductSubcategoryAlternateKey,EnglishProductSubcategoryName,ProductCategoryKey
1,1,Mountain Bikes,1
2,2,Road Bikes,1
3,3,Touring Bikes,1
4,4,Handlebars,2
5,5,Bottom Brackets,2
6,6,Brakes,2
7,7,Chains,2
8,8,Cranksets,2
9,9,Derailleurs,2
10,10,Forks,2


### Product

In [6]:
dim_product.info()
dim_product.index +=1

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 504 entries, 0 to 503
Data columns (total 27 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   ProductID              504 non-null    int64         
 1   Name                   504 non-null    object        
 2   ProductNumber          504 non-null    object        
 3   MakeFlag               504 non-null    bool          
 4   FinishedGoodsFlag      504 non-null    bool          
 5   Color                  256 non-null    object        
 6   SafetyStockLevel       504 non-null    int64         
 7   ReorderPoint           504 non-null    int64         
 8   StandardCost           504 non-null    float64       
 9   ListPrice              504 non-null    float64       
 10  Size                   211 non-null    object        
 11  SizeUnitMeasureCode    176 non-null    object        
 12  WeightUnitMeasureCode  205 non-null    object        
 13  Weigh

In [7]:

dim_product = dim_product[["ProductID",
                            "ProductNumber",
                           "ProductSubcategoryID",
                           "WeightUnitMeasureCode",
                           "SizeUnitMeasureCode",
                           "Name",
                           "StandardCost",
                          "FinishedGoodsFlag",
                          "Color",
                           "SafetyStockLevel",
                           "ReorderPoint",
                           "ListPrice",
                           "Size",
                           ##"SizeRange" missing
                           "Weight",
                           "DaysToManufacture",
                           "ProductLine",
                           "Class",
                           "Style",
                           "ModelName",
                           "LargePhoto"
                          ]]


dim_product = dim_product.rename(columns={"ProductNumber":"ProductAlternateKey",
                                          "ProductSubcategoryID":"ProductSubcategoryKey",
                                          "Name":"EnglishProductName"
                                         })
  
                                        

In [8]:
products_descriptions = pd.read_sql_query("SELECT ProductID, TRIM(CultureID) AS CultureID, Description FROM [Production].[vProductAndDescription]", db_op)

In [9]:
products_descriptions

Unnamed: 0,ProductID,CultureID,Description
0,994,en,Chromoly steel.
1,995,en,Aluminum alloy cups; large diameter spindle.
2,996,en,Aluminum alloy cups and a hollow axle.
3,984,en,"Suitable for any type of riding, on or off-roa..."
4,985,en,"Suitable for any type of riding, on or off-roa..."
...,...,...,...
1759,894,ar,تصميم عريض الوصلات.
1760,894,fr,Conception liaison large.
1761,894,th,การออกแบบให้มีจุดเชื่อมกว้าง
1762,894,he,עיצוב רחב-חוליות.


In [10]:
# Get description for a product in a specif language
def get_description(product_id, language):
    row_data = products_descriptions[(products_descriptions['ProductID']==product_id)&(products_descriptions['CultureID']==language)]
    if len(row_data) > 0:
        return row_data.iloc[0]['Description']
    else:
        return None
    

In [11]:
#dim_product['EnglishDescription'] = dim_product[['ProductID']].apply(lambda x:int(x), index=0)
products_ids = list(dim_product['ProductID'])
english_descriptions = []
french_descriptions = []
chinese_descriptions = []
arabic_descriptions = []
hebrew_descriptions = []
thai_descriptions = []
for p in products_ids:
    english_descriptions.append(get_description(p, 'en'))
    french_descriptions.append(get_description(p, 'fr'))
    chinese_descriptions.append(get_description(p, 'zh-cht'))
    arabic_descriptions.append(get_description(p, 'ar'))
    hebrew_descriptions.append(get_description(p, 'he'))
    thai_descriptions.append(get_description(p, 'th'))
    ## German, Japanese and Turkish are still missing

In [12]:
dim_product= dim_product.assign(
    EnglishDescription=english_descriptions,
    FrenchDescription=french_descriptions,
    ChineseDescription=chinese_descriptions,
    ArabicDescriptions=arabic_descriptions,
    HebrewDescription=hebrew_descriptions,
    ThaiDescriptions=thai_descriptions
)

In [13]:
dim_product

Unnamed: 0,ProductID,ProductAlternateKey,ProductSubcategoryKey,WeightUnitMeasureCode,SizeUnitMeasureCode,EnglishProductName,StandardCost,FinishedGoodsFlag,Color,SafetyStockLevel,...,Class,Style,ModelName,LargePhoto,EnglishDescription,FrenchDescription,ChineseDescription,ArabicDescriptions,HebrewDescription,ThaiDescriptions
1,1,AR-5381,,,,Adjustable Race,0.0000,False,,1000,...,,,,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x00\x00\x...,,,,,,
2,2,BA-8327,,,,Bearing Ball,0.0000,False,,1000,...,,,,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x00\x00\x...,,,,,,
3,3,BE-2349,,,,BB Ball Bearing,0.0000,False,,800,...,,,,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x00\x00\x...,,,,,,
4,4,BE-2908,,,,Headset Ball Bearings,0.0000,False,,800,...,,,,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x00\x00\x...,,,,,,
5,316,BL-2036,,,,Blade,0.0000,False,,800,...,,,,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x00\x00\x...,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
500,796,BK-R89B-58,2.0,LB,CM,"Road-250 Black, 58",1554.9479,True,Black,100,...,H,U,Road-250,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\xc6\xc8\x...,"Alluminum-alloy frame provides a light, stiff ...",Cadre en alliage d'aluminium qui offre une con...,铝合金车架确保骑乘轻快、稳固，可用于室内比赛或参加俱乐部的越野活动。,يوفر الهيكل المصنوع من سبيكة ألومنيوم قيادة قو...,מסגרת מסגסוגת אלומיניום מספקת רכיבה קלה ומוצקה...,เฟรมอลูมิเนียมอัลลอยด์ ให้การขับขี่ที่เบา มั่น...
501,789,BK-R89R-44,2.0,LB,CM,"Road-250 Red, 44",1518.7864,True,Red,100,...,H,U,Road-250,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x9fcj\xca...,"Alluminum-alloy frame provides a light, stiff ...",Cadre en alliage d'aluminium qui offre une con...,铝合金车架确保骑乘轻快、稳固，可用于室内比赛或参加俱乐部的越野活动。,يوفر الهيكل المصنوع من سبيكة ألومنيوم قيادة قو...,מסגרת מסגסוגת אלומיניום מספקת רכיבה קלה ומוצקה...,เฟรมอลูมิเนียมอัลลอยด์ ให้การขับขี่ที่เบา มั่น...
502,790,BK-R89R-48,2.0,LB,CM,"Road-250 Red, 48",1518.7864,True,Red,100,...,H,U,Road-250,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x9fcj\xca...,"Alluminum-alloy frame provides a light, stiff ...",Cadre en alliage d'aluminium qui offre une con...,铝合金车架确保骑乘轻快、稳固，可用于室内比赛或参加俱乐部的越野活动。,يوفر الهيكل المصنوع من سبيكة ألومنيوم قيادة قو...,מסגרת מסגסוגת אלומיניום מספקת רכיבה קלה ומוצקה...,เฟรมอลูมิเนียมอัลลอยด์ ให้การขับขี่ที่เบา มั่น...
503,791,BK-R89R-52,2.0,LB,CM,"Road-250 Red, 52",1518.7864,True,Red,100,...,H,U,Road-250,b'GIF89a\xf0\x00\x95\x00\xf7\x00\x00\x9fcj\xca...,"Alluminum-alloy frame provides a light, stiff ...",Cadre en alliage d'aluminium qui offre une con...,铝合金车架确保骑乘轻快、稳固，可用于室内比赛或参加俱乐部的越野活动。,يوفر الهيكل المصنوع من سبيكة ألومنيوم قيادة قو...,מסגרת מסגסוגת אלומיניום מספקת רכיבה קלה ומוצקה...,เฟรมอลูมิเนียมอัลลอยด์ ให้การขับขี่ที่เบา มั่น...


In [14]:
dim_product = dim_product.drop(columns=["ProductID"])

## LOAD

In [15]:
# DimProduct does not match the original size yet!!!

db_op, db_etl = connect_databases()
dim_product_category.to_sql('DimProductCategory', db_etl, if_exists='replace', index_label='ProductCategoryKey')
dim_product_subcategory.to_sql('DimProductSubcategory', db_etl, if_exists='replace', index_label='ProductSubcategoryKey')
dim_product.to_sql('DimProduct', db_etl, if_exists='replace', index_label='ProductKey')

504