### Python Dependencies and SQL Connection

In [1]:
import pyodbc
import pandas as pd
import os

In [9]:
# Define server and database names for connection
sql_server = os.environ.get('SQL_EXPRESS_SERVER') + '\SQLEXPRESS'
database_name = 'AdventureWorks2019'
cxn_str = f"""
DRIVER={{ODBC Driver 17 for SQL SERVER}};
SERVER={sql_server};
DATABASE={database_name};
Trusted_Connection=yes;
MARS_CONNECTION=yes;
"""

cxn = pyodbc.connect(cxn_str)
cursor = cxn.cursor()

### AW Database Query

In [10]:
query = """SELECT 
                demo.BusinessEntityID, 
                state.Name, 
                address.StateProvinceID, 
                state.StateProvinceCode, 
                CAST(BirthDate AS date) AS BirthDate, 
                MaritalStatus, 
                YearlyIncome, 
                Gender, 
                TotalChildren, 
                NumberCarsOwned, 
                Education, 
                Occupation, 
                City, 
                state.StateProvinceCode AS StateCode, 
                state.Name AS State, 
                state.CountryRegionCode, 
                country.Name AS Country, 
                address.PostalCode, 
                cat.Name AS Category, 
                subcat.Name AS Subcategory,  
                SUM(detail.OrderQty) AS OrderQty, 
                SUM(header.TotalDue) AS TotalDue 
        FROM Sales.SalesOrderHeader header 
        JOIN Person.Address address 
                ON address.AddressID = header.ShipToAddressID 
        JOIN Person.StateProvince state 
                ON state.StateProvinceID = address.StateProvinceID 
        JOIN Person.CountryRegion country 
                ON country.CountryRegionCode = state.CountryRegionCode 
        JOIN Sales.vPersonDemographics demo 
                ON demo.BusinessEntityID = header.CustomerID 
        JOIN Person.BusinessEntityAddress ent
                ON ent.BusinessEntityID = demo.BusinessEntityID 
        JOIN Sales.SalesOrderDetail detail
                ON detail.SalesOrderID = header.SalesOrderID 
        JOIN Production.Product prod
                ON prod.ProductID = detail.ProductID 
        JOIN Production.ProductSubcategory subcat
                ON subcat.ProductSubcategoryID = prod.ProductSubcategoryID 
        JOIN Production.ProductCategory cat
                ON cat.ProductCategoryID = subcat.ProductCategoryID 
        GROUP BY 
                demo.BusinessEntityID, 
                state.Name, 
                address.StateProvinceID, 
                state.StateProvinceCode, 
                CAST(BirthDate AS date), 
                MaritalStatus, 
                YearlyIncome, 
                Gender, 
                TotalChildren, 
                NumberCarsOwned, 
                Education, 
                Occupation, 
                City, 
                state.StateProvinceCode, 
                state.Name, 
                state.CountryRegionCode, 
                country.Name, 
                address.PostalCode, 
                cat.Name, 
                subcat.Name 
        ORDER BY state.Name"""

combined_df = pd.read_sql(query, cxn) 
print(combined_df.shape)
combined_df.head()

(27556, 22)


Unnamed: 0,BusinessEntityID,Name,StateProvinceID,StateProvinceCode,BirthDate,MaritalStatus,YearlyIncome,Gender,TotalChildren,NumberCarsOwned,...,City,StateCode,State,CountryRegionCode,Country,PostalCode,Category,Subcategory,OrderQty,TotalDue
0,11533,Alabama,3,AL,1974-04-21,S,25001-50000,F,0,1,...,Birmingham,AL,Alabama,US,United States,35203,Accessories,Tires and Tubes,2,41.2055
1,15175,Alberta,1,AB,1969-08-16,M,75001-100000,F,1,0,...,Calgary,AB,Alberta,CA,Canada,T2P 2G8,Accessories,Bike Racks,1,2725.2173
2,15175,Alberta,1,AB,1969-08-16,M,75001-100000,F,1,0,...,Calgary,AB,Alberta,CA,Canada,T2P 2G8,Accessories,Tires and Tubes,3,8175.6519
3,15175,Alberta,1,AB,1969-08-16,M,75001-100000,F,1,0,...,Calgary,AB,Alberta,CA,Canada,T2P 2G8,Bikes,Mountain Bikes,1,2725.2173
4,15175,Alberta,1,AB,1969-08-16,M,75001-100000,F,1,0,...,Calgary,AB,Alberta,CA,Canada,T2P 2G8,Bikes,Touring Bikes,1,2634.3974


In [11]:
combined_df.dropna(how='any')

Unnamed: 0,BusinessEntityID,Name,StateProvinceID,StateProvinceCode,BirthDate,MaritalStatus,YearlyIncome,Gender,TotalChildren,NumberCarsOwned,...,City,StateCode,State,CountryRegionCode,Country,PostalCode,Category,Subcategory,OrderQty,TotalDue
0,11533,Alabama,3,AL,1974-04-21,S,25001-50000,F,0,1,...,Birmingham,AL,Alabama,US,United States,35203,Accessories,Tires and Tubes,2,41.2055
1,15175,Alberta,1,AB,1969-08-16,M,75001-100000,F,1,0,...,Calgary,AB,Alberta,CA,Canada,T2P 2G8,Accessories,Bike Racks,1,2725.2173
2,15175,Alberta,1,AB,1969-08-16,M,75001-100000,F,1,0,...,Calgary,AB,Alberta,CA,Canada,T2P 2G8,Accessories,Tires and Tubes,3,8175.6519
3,15175,Alberta,1,AB,1969-08-16,M,75001-100000,F,1,0,...,Calgary,AB,Alberta,CA,Canada,T2P 2G8,Bikes,Mountain Bikes,1,2725.2173
4,15175,Alberta,1,AB,1969-08-16,M,75001-100000,F,1,0,...,Calgary,AB,Alberta,CA,Canada,T2P 2G8,Bikes,Touring Bikes,1,2634.3974
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27551,20573,Yveline,164,78,1979-04-05,S,0-25000,F,4,2,...,Versailles,78,Yveline,FR,France,78000,Bikes,Mountain Bikes,1,2668.5640
27552,20582,Yveline,164,78,1969-07-02,S,25001-50000,F,0,0,...,Versailles,78,Yveline,FR,France,78000,Accessories,Helmets,1,157.4073
27553,20582,Yveline,164,78,1969-07-02,S,25001-50000,F,0,0,...,Versailles,78,Yveline,FR,France,78000,Accessories,Tires and Tubes,2,314.8146
27554,20582,Yveline,164,78,1969-07-02,S,25001-50000,F,0,0,...,Versailles,78,Yveline,FR,France,78000,Clothing,Gloves,1,157.4073


### Export CSV

In [None]:
# 3.0 -- Our dataframe is now ready for export
combined_df.to_csv('adventureWorks_tentables.csv')