# Project

### Importando as bibliotecas

In [1]:
import sys
sys.path.append('..')
import pandas as pd
import pyodbc
from sqlalchemy import create_engine
import params.consts as consts

### Conectando com o banco de dados

In [2]:
driver = 'SQL+Server' # Passando o nome do programa a ser utilizado (com + onde é espaço)
server = 'GalaxyBook2-360' # Passando o nome do servidor/usuário. Para descobrir, basta digitar no cmd hostname
database = 'AdventureWorksDW2022' # Passando o nome do banco de dados a ser consultado

engine = create_engine(f'mssql+pyodbc://{server}/{database}?driver={driver}') # Fazendo a conexão com o banco de dados

print('Conexão realizada com sucesso.') # Confirmando que a conexão foi realizada com sucesso

Conexão realizada com sucesso.


### Consultando o banco de dados

In [3]:
sql_code = ''' 
    SELECT TOP (100)
        fis.SalesOrderNumber AS 'order_number',
        fis.OrderDate AS 'order_date',
        dpc.EnglishProductCategoryName AS 'category',
        fis.CustomerKey AS 'customer_id',
        dc.FirstName + ' ' + dc.LastName AS 'name',
        REPLACE(REPLACE(dc.Gender, 'M','Male'), 'F', 'Female') AS 'gender',
        dg.EnglishCountryRegionName AS 'country',
        fis.OrderQuantity AS 'order_quantity',
        fis.SalesAmount AS 'sales_amount',
        fis.TotalProductCost AS 'total_product_cost',
        fis.SalesAmount - fis.TotalProductCost AS 'sales_profit'
    FROM
        FactInternetSales fis
    INNER JOIN DimProduct dp ON fis.ProductKey = dp.ProductKey
        INNER JOIN DimProductSubcategory dps ON dp.ProductSubcategoryKey = dps.ProductSubcategoryKey
            INNER JOIN DimProductCategory dpc ON dps.ProductCategoryKey = dpc.ProductCategoryKey
    INNER JOIN DimCustomer dc ON fis.CustomerKey = dc.CustomerKey
        INNER JOIN DimGeography dg ON dc.GeographyKey = dg.GeographyKey
    WHERE YEAR(fis.OrderDate) = 2013
''' # Escrevendo o código em SQL para consultar os dados

df = pd.read_sql(sql_code, engine) # Consultando o banco de dados e criando um dataset com os dados extraídos

### Visualizando o dataset

In [4]:
df.head() # Exibindo o início do dataset

Unnamed: 0,order_number,order_date,category,customer_id,name,gender,country,order_quantity,sales_amount,total_product_cost,sales_profit
0,SO51250,2013-01-01,Clothing,19360,Joanna Hernandez,Female,United Kingdom,1,53.99,41.5723,12.4177
1,SO51259,2013-01-01,Bikes,11433,Maurice Shan,Male,France,1,2384.07,1481.9379,902.1321
2,SO51259,2013-01-01,Accessories,11433,Maurice Shan,Male,France,1,8.99,3.3623,5.6277
3,SO51259,2013-01-01,Accessories,11433,Maurice Shan,Male,France,1,4.99,1.8663,3.1237
4,SO51259,2013-01-01,Accessories,11433,Maurice Shan,Male,France,1,34.99,13.0863,21.9037


In [5]:
df.tail() # Exibindo o fim do dataset

Unnamed: 0,order_number,order_date,category,customer_id,name,gender,country,order_quantity,sales_amount,total_product_cost,sales_profit
95,SO51365,2013-01-08,Accessories,11156,Maria Roberts,Female,United States,1,54.99,20.5663,34.4237
96,SO51360,2013-01-08,Bikes,11388,Joseph Martin,Male,United Kingdom,1,2319.99,1265.6195,1054.3705
97,SO51358,2013-01-07,Accessories,17833,Monique Suarez,Female,Germany,1,2.29,0.8565,1.4335
98,SO51358,2013-01-07,Accessories,17833,Monique Suarez,Female,Germany,1,4.99,1.8663,3.1237
99,SO51358,2013-01-07,Accessories,17833,Monique Suarez,Female,Germany,1,28.99,10.8423,18.1477


### Salvando o dataset

In [6]:
df.to_csv(consts.DATASET, index=False) # Salvando o dataset em formato csv