# Import libraries

In [1]:
import pandas as pd
import pyodbc

# Initialize connection parameters

In [108]:
DB = {'servername': 'MSI',
      'source': 'AdventureWorks2019',
      'destination':'DW',
      'user':'user1',
      'pass':'123456'}

# Create connection

In [196]:
#SourceDB
sourceConn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};SERVER=' + DB['servername'] + ';DATABASE=' + DB['source'] +';UID='+ DB['user']+';PWD='+DB['pass']+ ';Trusted_Connection=yes')

#DestinationDB
destConn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};SERVER=' + DB['servername'] + ';DATABASE='+DB['destination'] +';UID='+ DB['user']+';PWD='+DB['pass']+ ';Trusted_Connection=yes')
cursor=destConn.cursor()

# Extract data from source DB to pandas DataFrames

In [63]:
Tables= [('Purchasing.ShipMethod','ShipMethodID'),('Production.Product','ProductID'),('Purchasing.Vendor','BusinessEntityID'),('Purchasing.PurchaseOrderHeader','PurchaseOrderID'),
         ('Purchasing.PurchaseOrderDetail',['PurchaseOrderID','PurchaseOrderDetailID']), 
         ('Person.Person','BusinessEntityID'),('Sales.SalesTerritory','TerritoryID'),
         ('Person.CountryRegion','CountryRegionCode'),('Sales.SalesOrderHeader','SalesOrderID'),('Sales.SalesOrderDetail',['SalesOrderID','SalesOrderDetailID'])
        ]

In [64]:
dfs={}
for item in Tables:
    dfs[item[0]]=pd.read_sql(f'SELECT * FROM {item[0]}',sourceConn,index_col=item[1])
dfs['HumanResources.Employee']=pd.read_sql('SELECT BusinessEntityID FROM HumanResources.Employee',sourceConn,index_col='BusinessEntityID')




# Transform staged data

In [65]:
dw={}

## Extract essential columns for data warehouse

In [66]:
#columns to extract in each table
Columns= [('Purchasing.ShipMethod','Name'),('Production.Product',['Name','ProductLine']),('Purchasing.Vendor',['Name','CreditRating']),
         ('Purchasing.PurchaseOrderHeader',['Status','EmployeeID','VendorID','ShipMethodID','OrderDate','ShipDate']),
         ('Purchasing.PurchaseOrderDetail',['DueDate','OrderQty','ProductID','LineTotal','ReceivedQty','RejectedQty','StockedQty']), 
         ('Person.Person',['FirstName','MiddleName','LastName','Suffix']),('Sales.SalesTerritory',['Name','CountryRegionCode','Group']),
         ('Person.CountryRegion','Name'),
         ('Sales.SalesOrderHeader',['OrderDate','DueDate','ShipDate','Status','OnlineOrderFlag','TerritoryID','ShipMethodID']),
         ('Sales.SalesOrderDetail',['OrderQty','ProductID','LineTotal'])
        ]

In [67]:
# df=pd.DataFrame(dfs['Production.Product'][['Name','Color','Size','Class','Style']])
# df

In [68]:
dfst={}
for item in Columns:
    dfst[item[0]]=dfs[item[0]][item[1]]
dfst['HumanResources.Employee']=dfs['HumanResources.Employee']

## Transform  Data for Dimensions

### DimEmpolyee

In [69]:
dfst['Person.Person']['Name']=dfst['Person.Person']['FirstName'].map(str)+' '+dfst['Person.Person']['MiddleName'].map(str)+'. '+dfst['Person.Person']['LastName'].map(str)
dfst['HumanResources.Employee']=pd.merge(dfst['HumanResources.Employee'],dfst['Person.Person'],how='left',on='BusinessEntityID')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfst['Person.Person']['Name']=dfst['Person.Person']['FirstName'].map(str)+' '+dfst['Person.Person']['MiddleName'].map(str)+' '+dfst['Person.Person']['LastName'].map(str)


In [112]:
dw['DimEmployee']=pd.DataFrame(dfst['HumanResources.Employee']['Name']).rename_axis('EmployeeID')
dw['DimEmployee']['Name']=dw['DimEmployee']['Name'].str.replace( "'","''")
dw['DimEmployee']

Unnamed: 0_level_0,Name
EmployeeID,Unnamed: 1_level_1
263,Jean E Trenary
78,Reuben H D''sa
242,Deborah E Poe
125,Matthias T Berndt
278,Garrett R Vargas
...,...
82,Jack T Creasey
157,Linda A Randall
95,Jim H Scardelis
215,Mark L Harrington


### DimProduct

In [70]:
dfst['Production.Product']

Unnamed: 0_level_0,Name,ProductLine
ProductID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Adjustable Race,
2,Bearing Ball,
3,BB Ball Bearing,
4,Headset Ball Bearings,
316,Blade,
...,...,...
995,ML Bottom Bracket,
996,HL Bottom Bracket,
997,"Road-750 Black, 44",R
998,"Road-750 Black, 48",R


In [71]:
mapper={None:'Accessory','R':'Road','M':'Mountain','T':'Touring','S':'Standard'}
dfst['Production.Product']['ProductLine']=dfst['Production.Product']['ProductLine'].replace(mapper,regex=True)
dfst['Production.Product']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfst['Production.Product']['ProductLine']=dfst['Production.Product']['ProductLine'].replace(mapper,regex=True)


Unnamed: 0_level_0,Name,ProductLine
ProductID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Adjustable Race,Accessory
2,Bearing Ball,Accessory
3,BB Ball Bearing,Accessory
4,Headset Ball Bearings,Accessory
316,Blade,Accessory
...,...,...
995,ML Bottom Bracket,Accessory
996,HL Bottom Bracket,Accessory
997,"Road-750 Black, 44",Road
998,"Road-750 Black, 48",Road


In [135]:
dw['DimProduct']=pd.DataFrame(dfst['Production.Product'])
dw['DimProduct']['Name']=dw['DimProduct']['Name'].str.replace( "'","''")
dw['DimProduct']

Unnamed: 0_level_0,Name,ProductLine
ProductID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Adjustable Race,Accessory
2,Bearing Ball,Accessory
3,BB Ball Bearing,Accessory
4,Headset Ball Bearings,Accessory
316,Blade,Accessory
...,...,...
995,ML Bottom Bracket,Accessory
996,HL Bottom Bracket,Accessory
997,"Road-750 Black, 44",Road
998,"Road-750 Black, 48",Road


### DimDate

In [73]:
df=pd.concat([dfst['Purchasing.PurchaseOrderHeader']['OrderDate'],dfst['Purchasing.PurchaseOrderDetail']['DueDate'],dfst['Purchasing.PurchaseOrderHeader']['ShipDate'],
dfst['Sales.SalesOrderHeader']['OrderDate'],dfst['Sales.SalesOrderHeader']['DueDate'],dfst['Sales.SalesOrderHeader']['ShipDate']],ignore_index=True,axis=0)

In [74]:
df=df.drop_duplicates().sort_values()
df=pd.DataFrame({'FullDate':df})
df

Unnamed: 0,FullDate
0,2011-04-16
12857,2011-04-25
4,2011-04-30
12861,2011-05-09
4017,2011-05-14
...,...
12733,2014-08-16
12763,2014-08-17
4001,2014-09-22
16858,2014-10-17


In [75]:
df['DayOfWeek']=df['FullDate'].dt.day_name()
df


Unnamed: 0,FullDate,DayOfWeek
0,2011-04-16,Saturday
12857,2011-04-25,Monday
4,2011-04-30,Saturday
12861,2011-05-09,Monday
4017,2011-05-14,Saturday
...,...,...
12733,2014-08-16,Saturday
12763,2014-08-17,Sunday
4001,2014-09-22,Monday
16858,2014-10-17,Friday


In [173]:
df['FullDate'].dtype

dtype('<M8[ns]')

In [76]:
dw['DimDate']=pd.DataFrame(df).set_index('FullDate')
dw['DimDate']

Unnamed: 0_level_0,DayOfWeek
FullDate,Unnamed: 1_level_1
2011-04-16,Saturday
2011-04-25,Monday
2011-04-30,Saturday
2011-05-09,Monday
2011-05-14,Saturday
...,...
2014-08-16,Saturday
2014-08-17,Sunday
2014-09-22,Monday
2014-10-17,Friday


### DimShipMethod

In [77]:
dw['DimShipMethod']=pd.DataFrame(dfst['Purchasing.ShipMethod'])

In [78]:
dw['DimShipMethod']

Unnamed: 0_level_0,Name
ShipMethodID,Unnamed: 1_level_1
1,XRQ - TRUCK GROUND
2,ZY - EXPRESS
3,OVERSEAS - DELUXE
4,OVERNIGHT J-FAST
5,CARGO TRANSPORT 5


### DimVendor

In [79]:
dfst['Purchasing.Vendor']

Unnamed: 0_level_0,Name,CreditRating
BusinessEntityID,Unnamed: 1_level_1,Unnamed: 2_level_1
1492,Australia Bike Retailer,1
1494,Allenson Cycles,2
1496,Advanced Bicycles,1
1498,"Trikes, Inc.",2
1500,Morgan Bike Accessories,1
...,...,...
1690,Bloomington Multisport,1
1692,Carlson Specialties,2
1694,"Compete, Inc.",1
1696,Chicago City Saddles,1


In [80]:
dfst['Purchasing.Vendor']['CreditRating']=dfst['Purchasing.Vendor']['CreditRating'].map({1:'Superior',2:'Excellent',3:'Above Average',4:'Average',5:'Below Average'})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfst['Purchasing.Vendor']['CreditRating']=dfst['Purchasing.Vendor']['CreditRating'].map({1:'Superior',2:'Excellent',3:'Above Average',4:'Average',5:'Below Average'})


In [142]:
dw['DimVendor']=pd.DataFrame(dfst['Purchasing.Vendor']).rename_axis('VendorID')
dw['DimVendor']['Name']=dw['DimVendor']['Name'].str.replace( "'","''")
dw['DimVendor']

Unnamed: 0_level_0,Name,CreditRating
VendorID,Unnamed: 1_level_1,Unnamed: 2_level_1
1492,Australia Bike Retailer,Superior
1494,Allenson Cycles,Excellent
1496,Advanced Bicycles,Superior
1498,"Trikes, Inc.",Excellent
1500,Morgan Bike Accessories,Superior
...,...,...
1690,Bloomington Multisport,Superior
1692,Carlson Specialties,Excellent
1694,"Compete, Inc.",Superior
1696,Chicago City Saddles,Superior


### DimTerritory

In [81]:
dfst['Sales.SalesTerritory']

Unnamed: 0_level_0,Name,CountryRegionCode,Group
TerritoryID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Northwest,US,North America
2,Northeast,US,North America
3,Central,US,North America
4,Southwest,US,North America
5,Southeast,US,North America
6,Canada,CA,North America
7,France,FR,Europe
8,Germany,DE,Europe
9,Australia,AU,Pacific
10,United Kingdom,GB,Europe


In [82]:
pd.DataFrame(dfst['Person.CountryRegion'])

Unnamed: 0_level_0,Name
CountryRegionCode,Unnamed: 1_level_1
AD,Andorra
AE,United Arab Emirates
AF,Afghanistan
AG,Antigua and Barbuda
AI,Anguilla
...,...
YE,Yemen
YT,Mayotte
ZA,South Africa
ZM,Zambia


In [83]:
df4=dfst['Sales.SalesTerritory'].merge(dfst['Person.CountryRegion'],on='CountryRegionCode').set_index(dfst['Sales.SalesTerritory'].index).rename(columns={'Name_y':'Country','Name_x':'Region'})
df4

Unnamed: 0_level_0,Region,CountryRegionCode,Group,Country
TerritoryID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,Northwest,US,North America,United States
2,Northeast,US,North America,United States
3,Central,US,North America,United States
4,Southwest,US,North America,United States
5,Southeast,US,North America,United States
6,Canada,CA,North America,Canada
7,France,FR,Europe,France
8,Germany,DE,Europe,Germany
9,Australia,AU,Pacific,Australia
10,United Kingdom,GB,Europe,United Kingdom


In [84]:
mask=df4['Region'].map(str)!=df4['Country'].map(str) 
mask2=df4['Region'].map(str)==df4['Country'].map(str) 
df4['CountryRegion']=df4.loc[mask,'Region'].map(str)+' '+df4['Country'].map(str) 
df4.loc[ mask2,'CountryRegion']=df4['Country'].map(str) 
df4

Unnamed: 0_level_0,Region,CountryRegionCode,Group,Country,CountryRegion
TerritoryID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,Northwest,US,North America,United States,Northwest United States
2,Northeast,US,North America,United States,Northeast United States
3,Central,US,North America,United States,Central United States
4,Southwest,US,North America,United States,Southwest United States
5,Southeast,US,North America,United States,Southeast United States
6,Canada,CA,North America,Canada,Canada
7,France,FR,Europe,France,France
8,Germany,DE,Europe,Germany,Germany
9,Australia,AU,Pacific,Australia,Australia
10,United Kingdom,GB,Europe,United Kingdom,United Kingdom


In [85]:
dw['DimTerritory']=pd.DataFrame(df4[['Group','Country','CountryRegion']]).rename(columns={'Group':'TerritoryGroup'})
dw['DimTerritory']

Unnamed: 0_level_0,TerritoryGroup,Country,CountryRegion
TerritoryID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,North America,United States,Northwest United States
2,North America,United States,Northeast United States
3,North America,United States,Central United States
4,North America,United States,Southwest United States
5,North America,United States,Southeast United States
6,North America,Canada,Canada
7,Europe,France,France
8,Europe,Germany,Germany
9,Pacific,Australia,Australia
10,Europe,United Kingdom,United Kingdom


### DimPurchasingStatus

In [86]:
dw['DimPurchasingStatus']=pd.DataFrame({'PurchasingStatusID':[1,2,3,4],'Name':['Pending','Approved','Rejected','Completed']}).set_index('PurchasingStatusID')
dw['DimPurchasingStatus']

Unnamed: 0_level_0,Name
PurchasingStatusID,Unnamed: 1_level_1
1,Pending
2,Approved
3,Rejected
4,Completed


### DimSalesStatus

In [87]:
dw['DimSalesStatus']=pd.DataFrame({'SalesStatusID':[1,2,3,4,5,6],'Name':['In process','Approved','Backordered','Rejected','Shipped','Cancelled']}).set_index('SalesStatusID')
dw['DimSalesStatus']

Unnamed: 0_level_0,Name
SalesStatusID,Unnamed: 1_level_1
1,In process
2,Approved
3,Backordered
4,Rejected
5,Shipped
6,Cancelled


### DimOnlineOrderFlag

In [88]:
dw['DimOnlineOrderFlag']=pd.DataFrame({'OnlineOrderFlagID':[0,1],'Name':['Sale person','Online']}).set_index('OnlineOrderFlagID')
dw['DimOnlineOrderFlag']

Unnamed: 0_level_0,Name
OnlineOrderFlagID,Unnamed: 1_level_1
0,Sale person
1,Online


## Transform data for Facts

### FactPurchaseOrder

In [89]:
dfst['Purchasing.PurchaseOrderHeader']

Unnamed: 0_level_0,Status,EmployeeID,VendorID,ShipMethodID,OrderDate,ShipDate
PurchaseOrderID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,4,258,1580,3,2011-04-16,2011-04-25
2,1,254,1496,5,2011-04-16,2011-04-25
3,4,257,1494,2,2011-04-16,2011-04-25
4,3,261,1650,5,2011-04-16,2011-04-25
5,4,251,1654,4,2011-04-30,2011-05-09
...,...,...,...,...,...,...
4008,2,258,1676,3,2014-04-22,2014-05-17
4009,2,261,1546,3,2013-11-09,2013-12-04
4010,2,260,1574,3,2013-11-09,2013-12-04
4011,2,254,1546,3,2014-06-24,2014-07-19


In [90]:
dfst['Purchasing.PurchaseOrderDetail']

Unnamed: 0_level_0,Unnamed: 1_level_0,DueDate,OrderQty,ProductID,LineTotal,ReceivedQty,RejectedQty,StockedQty
PurchaseOrderID,PurchaseOrderDetailID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,1,2011-04-30,4,1,201.0400,3.0,0.0,3.0
2,2,2011-04-30,3,359,135.3600,3.0,0.0,3.0
2,3,2011-04-30,3,360,136.7415,3.0,0.0,3.0
3,4,2011-04-30,550,530,8847.3000,550.0,0.0,550.0
4,5,2011-04-30,3,4,171.0765,2.0,1.0,1.0
...,...,...,...,...,...,...,...,...
4011,8841,2014-07-24,1000,880,20560.0000,1000.0,0.0,1000.0
4012,8842,2014-07-24,6000,881,249420.0000,6000.0,0.0,6000.0
4012,8843,2014-07-24,6000,882,249420.0000,6000.0,0.0,6000.0
4012,8844,2014-07-24,6000,883,249420.0000,6000.0,0.0,6000.0


In [91]:
dw['FactPurchaseOrders']=dfst['Purchasing.PurchaseOrderDetail'].join(dfst['Purchasing.PurchaseOrderHeader'].reindex(dfst['Purchasing.PurchaseOrderDetail'].index, level=0))

In [182]:
column_order=[ 'ProductID', 'StatusID', 'EmployeeID', 'VendorID','ShipMethodID', 'OrderDate','DueDate',
           'ShipDate', 'LineTotal', 'OrderQty','ReceivedQty','RejectedQty', 'StockedQty']
dw['FactPurchaseOrders'].rename(columns={'Status':'StatusID'},inplace=True)
dw['FactPurchaseOrders']=dw['FactPurchaseOrders'].reindex(columns=column_order)
dw['FactPurchaseOrders']

Unnamed: 0_level_0,Unnamed: 1_level_0,ProductID,StatusID,EmployeeID,VendorID,ShipMethodID,OrderDate,DueDate,ShipDate,LineTotal,OrderQty,ReceivedQty,RejectedQty,StockedQty
PurchaseOrderID,PurchaseOrderDetailID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,1,1,4,258,1580,3,2011-04-16,2011-04-30,2011-04-25,201.0400,4,3.0,0.0,3.0
2,2,359,1,254,1496,5,2011-04-16,2011-04-30,2011-04-25,135.3600,3,3.0,0.0,3.0
2,3,360,1,254,1496,5,2011-04-16,2011-04-30,2011-04-25,136.7415,3,3.0,0.0,3.0
3,4,530,4,257,1494,2,2011-04-16,2011-04-30,2011-04-25,8847.3000,550,550.0,0.0,550.0
4,5,4,3,261,1650,5,2011-04-16,2011-04-30,2011-04-25,171.0765,3,2.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4011,8841,880,2,254,1546,3,2014-06-24,2014-07-24,2014-07-19,20560.0000,1000,1000.0,0.0,1000.0
4012,8842,881,2,254,1636,3,2014-06-24,2014-07-24,2014-07-19,249420.0000,6000,6000.0,0.0,6000.0
4012,8843,882,2,254,1636,3,2014-06-24,2014-07-24,2014-07-19,249420.0000,6000,6000.0,0.0,6000.0
4012,8844,883,2,254,1636,3,2014-06-24,2014-07-24,2014-07-19,249420.0000,6000,6000.0,0.0,6000.0


### FactSalesOrder

In [93]:
dfst['Sales.SalesOrderHeader']

Unnamed: 0_level_0,OrderDate,DueDate,ShipDate,Status,OnlineOrderFlag,TerritoryID,ShipMethodID
SalesOrderID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
43659,2011-05-31,2011-06-12,2011-06-07,5,False,5,5
43660,2011-05-31,2011-06-12,2011-06-07,5,False,5,5
43661,2011-05-31,2011-06-12,2011-06-07,5,False,6,5
43662,2011-05-31,2011-06-12,2011-06-07,5,False,6,5
43663,2011-05-31,2011-06-12,2011-06-07,5,False,4,5
...,...,...,...,...,...,...,...
75119,2014-06-30,2014-07-12,2014-07-07,5,True,1,1
75120,2014-06-30,2014-07-12,2014-07-07,5,True,6,1
75121,2014-06-30,2014-07-12,2014-07-07,5,True,6,1
75122,2014-06-30,2014-07-12,2014-07-07,5,True,6,1


In [94]:
dfst['Sales.SalesOrderDetail']

Unnamed: 0_level_0,Unnamed: 1_level_0,OrderQty,ProductID,LineTotal
SalesOrderID,SalesOrderDetailID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
43659,1,1,776,2024.994
43659,2,3,777,6074.982
43659,3,1,778,2024.994
43659,4,1,771,2039.994
43659,5,1,772,2039.994
...,...,...,...,...
75122,121313,1,878,21.980
75122,121314,1,712,8.990
75123,121315,1,878,21.980
75123,121316,1,879,159.000


In [181]:
column_order=['ProductID', 'StatusID', 'TerritoryID','ShipMethodID', 'OnlineOrderFlagID', 'OrderDate', 'DueDate',
           'ShipDate', 'LineTotal','OrderQty']
dw['FactSalesOrders']=dfst['Sales.SalesOrderDetail'].join(dfst['Sales.SalesOrderHeader'].reindex(dfst['Sales.SalesOrderDetail'].index, level=0))
dw['FactSalesOrders']['OnlineOrderFlag']=dw['FactSalesOrders']['OnlineOrderFlag'].map({False:0,True:1})
dw['FactSalesOrders'].rename(columns={'Status':'StatusID','OnlineOrderFlag':'OnlineOrderFlagID'},inplace=True)
dw['FactSalesOrders']=dw['FactSalesOrders'].reindex(columns=column_order)
dw['FactSalesOrders']

Unnamed: 0_level_0,Unnamed: 1_level_0,ProductID,StatusID,TerritoryID,ShipMethodID,OnlineOrderFlagID,OrderDate,DueDate,ShipDate,LineTotal,OrderQty
SalesOrderID,SalesOrderDetailID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
43659,1,776,5,5,5,0,2011-05-31,2011-06-12,2011-06-07,2024.994,1
43659,2,777,5,5,5,0,2011-05-31,2011-06-12,2011-06-07,6074.982,3
43659,3,778,5,5,5,0,2011-05-31,2011-06-12,2011-06-07,2024.994,1
43659,4,771,5,5,5,0,2011-05-31,2011-06-12,2011-06-07,2039.994,1
43659,5,772,5,5,5,0,2011-05-31,2011-06-12,2011-06-07,2039.994,1
...,...,...,...,...,...,...,...,...,...,...,...
75122,121313,878,5,6,1,1,2014-06-30,2014-07-12,2014-07-07,21.980,1
75122,121314,712,5,6,1,1,2014-06-30,2014-07-12,2014-07-07,8.990,1
75123,121315,878,5,6,1,1,2014-06-30,2014-07-12,2014-07-07,21.980,1
75123,121316,879,5,6,1,1,2014-06-30,2014-07-12,2014-07-07,159.000,1


### Create views for DimDate

In [209]:
cursor.execute(''' CREATE VIEW [dbo].[PurchaseOrderDate] AS
                   SELECT DISTINCT FullDate,DayOfWeek FROM dbo.DimDate AS dd
                   INNER JOIN dbo.FactPurchaseOrders as fpo
                   on dd.FullDate = fpo.OrderDate
''')

cursor.execute(''' CREATE VIEW [dbo].[PurchaseDueDate] AS
                   SELECT DISTINCT FullDate,DayOfWeek FROM dbo.DimDate AS dd
                   INNER JOIN dbo.FactPurchaseOrders as fpo
                   on dd.FullDate = fpo.DueDate
''')

cursor.execute(''' CREATE VIEW [dbo].[PurchaseShipDate] AS
                   SELECT DISTINCT FullDate,DayOfWeek FROM dbo.DimDate AS dd
                   INNER JOIN dbo.FactPurchaseOrders as fpo
                   on dd.FullDate = fpo.ShipDate
''')


cursor.commit()

In [210]:
cursor.execute(''' CREATE VIEW [dbo].[SalesOrderDate] AS
                   SELECT DISTINCT FullDate,DayOfWeek FROM dbo.DimDate AS dd
                   INNER JOIN dbo.FactSalesOrders as spo
                   on dd.FullDate = spo.OrderDate
''')

cursor.execute(''' CREATE VIEW [dbo].[SalesDueDate] AS
                   SELECT DISTINCT FullDate,DayOfWeek FROM dbo.DimDate AS dd
                   INNER JOIN dbo.FactSalesOrders as spo
                   on dd.FullDate = spo.DueDate
''')


cursor.execute(''' CREATE VIEW [dbo].[SalesShipDate] AS
                   SELECT DISTINCT FullDate,DayOfWeek FROM dbo.DimDate AS dd
                   INNER JOIN dbo.FactSalesOrders as spo
                   on dd.FullDate = spo.ShipDate
''')
cursor.commit()

# Load transformed data to Data Warehouse

In [163]:
DWTables=[('DimEmployee',['EmployeeID','Name']),('DimProduct',['ProductID','Name','ProductLine']),('DimDate',['FullDate','DayOfWeek']),
          ('DimShipMethod',['ShipMethodID','Name']),('DimVendor',['VendorID','Name','CreditRating']),('DimTerritory',['TerritoryID','TerritoryGroup','Country','CountryRegion']),
          ('DimPurchasingStatus',['PurchasingStatusID','Name']),('DimSalesStatus',['SalesStatusID','Name']),('DimOnlineOrderFlag',['OnlineOrderFlagID','Name']),
          ('FactPurchaseOrders',['PurchaseOrderID','PurchaseOrderDetailID', 'ProductID', 'StatusID', 'EmployeeID', 'VendorID','ShipMethodID', 'OrderDate','DueDate',
           'ShipDate', 'LineTotal', 'OrderQty','ReceivedQty','RejectedQty', 'StockedQty']),
          ('FactSalesOrders',['SalesOrderID','SalesOrderDetailID', 'ProductID', 'StatusID', 'TerritoryID','ShipMethodID', 'OnlineOrderFlagID', 'OrderDate', 'DueDate',
           'ShipDate', 'LineTotal','OrderQty'])]

In [185]:
for item in DWTables:
    for table_name, column_name in [item]:
        print(table_name)
        param=','.join(column_name)
        for index, row in dw[table_name].iterrows():
            value="'"+pd.Series(index).astype(str).str.cat(sep="','")+"'"+','+"'"+str(pd.Series(row).astype(str).str.cat(sep="','"))+"'"
            
            string=f'''INSERT INTO dbo.{table_name} ({param}) values ({value})'''
            
            cursor.execute(string)

cursor.commit()

DimEmployee
DimProduct
DimDate
DimShipMethod
DimVendor
DimTerritory
DimPurchasingStatus
DimSalesStatus
DimOnlineOrderFlag
FactPurchaseOrders
FactSalesOrders


In [None]:
for index, row in dw['FactPurchaseOrders'].iterrows():
      # print(row)
      # print(str(row.str.cat(sep="','")))
      value="'"+pd.Series(index).astype(str).str.cat(sep="','")+"'"+','+"'"+str(row.astype(str).str.cat(sep="','"))+"'"
      print(value)
      string=f'''INSERT INTO dbo.DimDate (FullDAte,DayOfWeek) values ({value})'''
      # cursor.execute(string)      
# cursor.commit()
