# PRODUCT DIMENSION

In [12]:
import pandas as pd
from connection_script import connect_databases

In [13]:
db_op, db_etl = connect_databases()

## EXTRACT 

In [14]:
dim_product_category =  pd.read_sql_query("SELECT Name FROM [Production].[ProductCategory];", db_op)
dim_product_subcategory =  pd.read_sql_query("SELECT Name, ProductCategoryID FROM [Production].[ProductSubCategory];", db_op)
dim_product =  pd.read_sql_query("""
    SELECT t1.*, t2.Name AS ModelName, t4.LargePhoto
    FROM [Production].[Product] AS t1
    INNER JOIN [Production].[ProductModel] AS t2
    ON t1.ProductModelID = t2.ProductModelID
    INNER JOIN [Production].[ProductProductPhoto] AS t3
    ON t1.ProductID = t3.ProductID
    INNER JOIN [Production].[ProductPhoto] AS t4
    ON t3.ProductPhotoID = t4.ProductPhotoID;
""", db_op)

## TRANSFORM

### Product Category

In [15]:
dim_product_category = dim_product_category.rename(columns={"Name":"EnglishProductCategoryName"})
dim_product_category = dim_product_category.assign(ProductCategoryAlternateKey=list(dim_product_category.index))
dim_product_category = dim_product_category[["ProductCategoryAlternateKey", "EnglishProductCategoryName"]]
dim_product_category

Unnamed: 0,ProductCategoryAlternateKey,EnglishProductCategoryName
0,0,Accessories
1,1,Bikes
2,2,Clothing
3,3,Components


### Product Subcategory

In [16]:
dim_product_subcategory = dim_product_subcategory.rename(columns={"Name":"EnglishProductSubcategoryName", "ProductCategoryID":"ProductCategoryKey"})
dim_product_subcategory = dim_product_subcategory.assign(ProductSubcategoryAlternateKey=list(dim_product_subcategory.index))
dim_product_subcategory = dim_product_subcategory[["ProductSubcategoryAlternateKey", "EnglishProductSubcategoryName", "ProductCategoryKey"]]
dim_product_subcategory

Unnamed: 0,ProductSubcategoryAlternateKey,EnglishProductSubcategoryName,ProductCategoryKey
0,0,Mountain Bikes,1
1,1,Road Bikes,1
2,2,Touring Bikes,1
3,3,Handlebars,2
4,4,Bottom Brackets,2
5,5,Brakes,2
6,6,Chains,2
7,7,Cranksets,2
8,8,Derailleurs,2
9,9,Forks,2


### Product

In [17]:
dim_product.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 295 entries, 0 to 294
Data columns (total 27 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   ProductID              295 non-null    int64         
 1   Name                   295 non-null    object        
 2   ProductNumber          295 non-null    object        
 3   MakeFlag               295 non-null    bool          
 4   FinishedGoodsFlag      295 non-null    bool          
 5   Color                  245 non-null    object        
 6   SafetyStockLevel       295 non-null    int64         
 7   ReorderPoint           295 non-null    int64         
 8   StandardCost           295 non-null    float64       
 9   ListPrice              295 non-null    float64       
 10  Size                   211 non-null    object        
 11  SizeUnitMeasureCode    176 non-null    object        
 12  WeightUnitMeasureCode  198 non-null    object        
 13  Weigh

In [18]:
dim_product = dim_product[["ProductNumber",
                           "ProductSubcategoryID",
                           "WeightUnitMeasureCode",
                           "SizeUnitMeasureCode",
                           "Name",
                           "StandardCost",
                          "FinishedGoodsFlag",
                          "Color",
                           "SafetyStockLevel",
                           "ReorderPoint",
                           "ListPrice",
                           "Size",
                           ##"SizeRange" missing
                           "Weight",
                           "DaysToManufacture",
                           "ProductLine",
                           "Class",
                           "Style",
                           "ModelName",
                           "LargePhoto"
                          ]]
"""
dim_product = dim_product.rename(columns={"ProductNumber":"ProductAlternateKey",
                                          "ProductSubcategoryID":"ProductSubcategoryKey",
                                          "Name":"EnglishProductName"
                                         })
                                         """

'\ndim_product = dim_product.rename(columns={"ProductNumber":"ProductAlternateKey",\n                                          "ProductSubcategoryID":"ProductSubcategoryKey",\n                                          "Name":"EnglishProductName"\n                                         })\n                                         '

In [22]:
products_descriptions = pd.read_sql_query("SELECT ProductID, CultureID, Description FROM [Production].[vProductAndDescription]", db_op)

In [23]:
products_descriptions

Unnamed: 0,ProductID,CultureID,Description
0,994,en,Chromoly steel.
1,995,en,Aluminum alloy cups; large diameter spindle.
2,996,en,Aluminum alloy cups and a hollow axle.
3,984,en,"Suitable for any type of riding, on or off-roa..."
4,985,en,"Suitable for any type of riding, on or off-roa..."
...,...,...,...
1759,894,ar,تصميم عريض الوصلات.
1760,894,fr,Conception liaison large.
1761,894,th,การออกแบบให้มีจุดเชื่อมกว้าง
1762,894,he,עיצוב רחב-חוליות.


In [46]:
def get_descriptions(product_id):
    descriptions = products_descriptions[products_descriptions['ProductID']==product_id]
    return {
        'en':descriptions.loc[descriptions['CultureID']=='en'],
        'fr':descriptions.loc[descriptions['CultureID']=='fr'],
        'zh-cht':descriptions.loc[descriptions['CultureID']=='zh-cht'],
        'ar':descriptions.loc[descriptions['CultureID']=='ar'],
        'he':descriptions.loc[descriptions['CultureID']=='he'],
        'th':descriptions.loc[descriptions['CultureID']=='th'],
        ## German, Japanese and Turkish are still missing
    }

In [48]:
get_descriptions(825)

{'en': Empty DataFrame
 Columns: [ProductID, CultureID, Description]
 Index: [],
 'fr': Empty DataFrame
 Columns: [ProductID, CultureID, Description]
 Index: [],
 'zh-cht':       ProductID CultureID  Description
 1751        825    zh-cht  高性能的山地车备用轮。,
 'ar': Empty DataFrame
 Columns: [ProductID, CultureID, Description]
 Index: [],
 'he': Empty DataFrame
 Columns: [ProductID, CultureID, Description]
 Index: [],
 'th': Empty DataFrame
 Columns: [ProductID, CultureID, Description]
 Index: []}