# RESELLER DIMENSION

In [1]:
import pandas as pd
from connection_script import connect_databases

In [2]:
db_op, db_etl = connect_databases()

## Extraction

In [3]:

store = pd.read_sql_query('SELECT * FROM [Sales].[Store]', db_op)
store_demographics = pd.read_sql_query('SELECT * FROM [Sales].[vStoreWithDemographics]', db_op)
person_phone = pd.read_sql_query('SELECT * FROM [Person].[PersonPhone]', db_op)
sales_person = pd.read_sql_query('SELECT * FROM [Sales].[SalesPerson]', db_op)
dim_geography = pd.read_sql_query('SELECT * FROM "DimGeography"', db_etl)
address = pd.read_sql_query('SELECT AddressID, AddressLine1, AddressLine2 FROM [Person].[Address]', db_op)
business_entity_address = pd.read_sql_query('SELECT * FROM [Person].[BusinessEntityAddress]', db_op)

## TRANSFORM

In [4]:
#Merging data frames
merged_df = store.merge(store_demographics, on='BusinessEntityID', how='inner')
merged_df = merged_df.merge(person_phone, left_on='SalesPersonID', right_on='BusinessEntityID', how='inner')
merged_df = merged_df.merge(sales_person, left_on='SalesPersonID', right_on='BusinessEntityID', how='inner')
merged_df = merged_df.merge(dim_geography, left_on='TerritoryID', right_on='GeographyKey', how='inner')
merged_df = merged_df.merge(business_entity_address, left_on='BusinessEntityID', right_on='BusinessEntityID', how='inner', suffixes=('_store', '_address'))
merged_df = merged_df.merge(address, left_on='AddressID', right_on='AddressID', how='inner')
merged_df.columns

Index(['BusinessEntityID_x', 'Name_x', 'SalesPersonID', 'Demographics',
       'rowguid_x', 'ModifiedDate_x', 'Name_y', 'AnnualSales', 'AnnualRevenue',
       'BankName', 'BusinessType', 'YearOpened', 'Specialty', 'SquareFeet',
       'Brands', 'Internet', 'NumberEmployees', 'BusinessEntityID_y',
       'PhoneNumber', 'PhoneNumberTypeID', 'ModifiedDate_y',
       'BusinessEntityID', 'TerritoryID', 'SalesQuota', 'Bonus',
       'CommissionPct', 'SalesYTD', 'SalesLastYear', 'rowguid_y',
       'ModifiedDate_store', 'GeographyKey', 'City', 'StateProvinceCode',
       'StateProvinceName', 'CountryRegionCode', 'EnglishCountryRegionName',
       'PostalCode', 'SalesTerritoryID', 'AddressID', 'AddressTypeID',
       'rowguid', 'ModifiedDate_address', 'AddressLine1', 'AddressLine2'],
      dtype='object')

In [5]:
def generateAlternateKey(ind):
    l = len(str(ind))
    zeros = 8 - l
    alternateKey = 'AW' + '0'*zeros + str(ind)
    return alternateKey


In [6]:
merged_df = merged_df.reset_index(drop=False)
merged_df['index'] = merged_df['index'] + 1
merged_df.set_index('index', inplace=True)
merged_df['ResellerAlternateKey'] = merged_df.index.to_series().apply(generateAlternateKey)
merged_df[['BusinessEntityID', 'ResellerAlternateKey']]

Unnamed: 0_level_0,BusinessEntityID,ResellerAlternateKey
index,Unnamed: 1_level_1,Unnamed: 2_level_1
1,279,AW00000001
2,276,AW00000002
3,277,AW00000003
4,275,AW00000004
5,286,AW00000005
...,...,...
697,282,AW00000697
698,281,AW00000698
699,277,AW00000699
700,277,AW00000700


In [7]:
# Column Selection
result = merged_df[['ResellerAlternateKey', 'GeographyKey', 'PhoneNumber', 'BusinessType', 'Name_x', 'NumberEmployees', 'Specialty', 'AddressLine1', 'AddressLine2', 
           'AnnualSales', 'BankName', 'AnnualRevenue', 'YearOpened']]

#Renaming
# Create a copy of the DataFrame to avoid SettingWithCopyWarning
result = result.copy()

result = result.rename(columns={'PhoneNumber': 'Phone',
                        'Name_x': 'ResellerName',
                        'Specialty': 'ProductLine'})

result

Unnamed: 0_level_0,ResellerAlternateKey,GeographyKey,Phone,BusinessType,ResellerName,NumberEmployees,ProductLine,AddressLine1,AddressLine2,AnnualSales,BankName,AnnualRevenue,YearOpened
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,AW00000001,5,664-555-0112,BM,Next-Door Bike Store,13,Mountain,8291 Crossbow Way,,800000.0,United Security,80000.0,1996
2,AW00000002,4,883-555-0116,BM,Professional Sales and Service,14,Touring,2487 Riverside Drive,,800000.0,International Bank,80000.0,1991
3,AW00000003,3,517-555-0117,BM,Riders Company,15,Road,80 Sunview Terrace,,800000.0,Primary Bank & Reserve,80000.0,1999
4,AW00000004,2,257-555-0154,BM,The Bike Mechanics,16,Mountain,8154 Via Mexico,,800000.0,International Security,80000.0,1994
5,AW00000005,9,1 (11) 500 555-0190,BM,Nationwide Supply,17,Touring,34 Waterloo Road,,800000.0,Guardian Bank,80000.0,1987
...,...,...,...,...,...,...,...,...,...,...,...,...,...
697,AW00000697,6,185-555-0169,BM,Retreat Inn,8,Road,9100 Sheppard Avenue North,,300000.0,Primary Bank & Reserve,30000.0,1982
698,AW00000698,4,330-555-0120,BM,Technical Parts Manufacturing,5,Touring,5725 Glaze Drive,,300000.0,International Security,30000.0,1976
699,AW00000699,3,517-555-0117,BM,Totes & Baskets Company,2,Road,80 Sunview Terrace,,300000.0,Guardian Bank,30000.0,1970
700,AW00000700,3,517-555-0117,BM,World of Bikes,17,Mountain,80 Sunview Terrace,,800000.0,Primary Bank & Reserve,80000.0,1997


## LOAD

In [8]:
db_op, db_etl = connect_databases()
result.to_sql('DimReseller', db_etl, if_exists='replace', index_label='ResellerKey')

701

In [9]:
# Testing table creation
dim_reseller = pd.read_sql_query('SELECT * FROM "DimReseller"', db_etl)
dim_reseller

Unnamed: 0,ResellerKey,ResellerAlternateKey,GeographyKey,Phone,BusinessType,ResellerName,NumberEmployees,ProductLine,AddressLine1,AddressLine2,AnnualSales,BankName,AnnualRevenue,YearOpened
0,1,AW00000001,5,664-555-0112,BM,Next-Door Bike Store,13,Mountain,8291 Crossbow Way,,800000.0,United Security,80000.0,1996
1,2,AW00000002,4,883-555-0116,BM,Professional Sales and Service,14,Touring,2487 Riverside Drive,,800000.0,International Bank,80000.0,1991
2,3,AW00000003,3,517-555-0117,BM,Riders Company,15,Road,80 Sunview Terrace,,800000.0,Primary Bank & Reserve,80000.0,1999
3,4,AW00000004,2,257-555-0154,BM,The Bike Mechanics,16,Mountain,8154 Via Mexico,,800000.0,International Security,80000.0,1994
4,5,AW00000005,9,1 (11) 500 555-0190,BM,Nationwide Supply,17,Touring,34 Waterloo Road,,800000.0,Guardian Bank,80000.0,1987
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
696,697,AW00000697,6,185-555-0169,BM,Retreat Inn,8,Road,9100 Sheppard Avenue North,,300000.0,Primary Bank & Reserve,30000.0,1982
697,698,AW00000698,4,330-555-0120,BM,Technical Parts Manufacturing,5,Touring,5725 Glaze Drive,,300000.0,International Security,30000.0,1976
698,699,AW00000699,3,517-555-0117,BM,Totes & Baskets Company,2,Road,80 Sunview Terrace,,300000.0,Guardian Bank,30000.0,1970
699,700,AW00000700,3,517-555-0117,BM,World of Bikes,17,Mountain,80 Sunview Terrace,,800000.0,Primary Bank & Reserve,80000.0,1997
