In [1]:
import pandas as pd
import sqlalchemy as sa
from sqlalchemy import Table, MetaData, Column, Integer
import os

In [2]:
username = os.getenv('AZURE_MSDS432_USERNAME')
password = os.getenv('AZURE_MSDS432_PASSWORD')
host = 'mysqlserver-432.database.windows.net'
database = 'mySampleDatabase'
authentication = "SqlPassword"
conn_string = sa.engine.url.URL(
     "mssql+pyodbc",
     username=username,
     password=password,
     host=host,
     port=1433,
     database=database,
     query={"driver": "ODBC Driver 18 for SQL Server", "authentication": authentication}
 )

In [3]:
engine = sa.create_engine(conn_string, pool_timeout=60)
connection = engine.connect()

In [4]:
query = """
select schema_name(t.schema_id) as schema_name,
       t.name as table_name,
       t.create_date,
       t.modify_date
from sys.tables t
order by schema_name,
         table_name;
"""

tables = pd.read_sql_query(query, connection)
tables

Unnamed: 0,schema_name,table_name,create_date,modify_date
0,dbo,BuildVersion,2022-05-15 20:04:00.113,2022-05-15 20:04:01.767
1,dbo,ErrorLog,2022-05-15 20:04:00.113,2022-05-15 20:04:01.767
2,dbo,Rel_Building,2022-05-17 22:49:05.513,2022-05-19 22:05:23.507
3,dbo,Rel_BuyoutAgreements,2022-05-17 22:47:28.627,2022-05-19 22:05:23.503
4,dbo,Rel_Demographics,2022-05-17 22:34:37.987,2022-05-19 22:21:30.853
5,dbo,Rel_Eviction_Notices,2022-05-17 21:59:47.223,2022-05-19 23:24:13.557
6,dbo,Rel_Location,2022-05-17 22:25:35.210,2022-05-25 12:56:24.520
7,dbo,SF_Addresses_With_Units_Raw,2022-05-19 01:46:22.717,2022-05-19 01:46:22.793
8,dbo,SF_Buyout_Agreements_Raw,2022-05-16 00:27:00.917,2022-05-16 00:27:00.943
9,dbo,SF_Eviction_Notices_Raw,2022-05-16 00:41:20.143,2022-05-16 00:41:20.190


### DIM_Eviction_Reason Table

In [47]:
## Drop the table
engine = sa.create_engine(conn_string, pool_timeout=60)
connection = engine.connect()

engine.execute("DROP TABLE dbo.FACT_SanFrancisco;")

<sqlalchemy.engine.result.ResultProxy at 0x7fa43cce6dc0>

In [23]:
create_DIM_Eviction_Reason_table_query = """
CREATE TABLE DIM_Eviction_Reason
(
    Non_Payment                                          BIT,
    Breach                                               BIT,
    Nuisance                                             BIT,
    Illegal_Use                                          BIT,
    Failure_to_Sign_Renewal                              BIT,
    Access_Denial                                        BIT,
    Unapproved_Subtenant                                 BIT,
    Owner_Move_In                                        BIT,
    Demolition                                           BIT,
    Capital_Improvement                                  BIT,
    Substantial_Rehab                                    BIT,
    Ellis_Act_WithDrawal                                 BIT,
    Condo_Conversion                                     BIT,
    Roommate_Same_Unit                                   BIT,
    Other_Cause                                          BIT,
    Late_Payments                                        BIT,
    Lead_Remediation                                     BIT,
    Development                                          BIT,
    Good_Samaritan_Ends                                  BIT,
    Reason_Key                                           VARCHAR(432) PRIMARY KEY
)
"""

In [24]:
## Create the table
engine = sa.create_engine(conn_string, pool_timeout=60)
connection = engine.connect()

engine.execute(create_DIM_Eviction_Reason_table_query)

<sqlalchemy.engine.result.ResultProxy at 0x7fa43c519430>

In [25]:
tables = pd.read_sql_query(query, connection)
tables

Unnamed: 0,schema_name,table_name,create_date,modify_date
0,dbo,BuildVersion,2022-05-15 20:04:00.113,2022-05-15 20:04:01.767
1,dbo,DIM_Eviction_Reason,2022-05-26 01:34:00.140,2022-05-26 01:34:00.140
2,dbo,ErrorLog,2022-05-15 20:04:00.113,2022-05-15 20:04:01.767
3,dbo,FACT_SanFrancisco,2022-05-25 23:57:41.150,2022-05-25 23:57:41.150
4,dbo,Rel_Building,2022-05-17 22:49:05.513,2022-05-19 22:05:23.507
5,dbo,Rel_BuyoutAgreements,2022-05-17 22:47:28.627,2022-05-26 00:44:10.810
6,dbo,Rel_Demographics,2022-05-17 22:34:37.987,2022-05-19 22:21:30.853
7,dbo,Rel_Eviction_Notices,2022-05-17 21:59:47.223,2022-05-19 23:24:13.557
8,dbo,Rel_Location,2022-05-17 22:25:35.210,2022-05-25 12:56:24.520
9,dbo,SF_Addresses_With_Units_Raw,2022-05-19 01:46:22.717,2022-05-19 01:46:22.793


### BR_Reason Table

In [44]:
create_BR_Reason_table_query = """
CREATE TABLE BR_Reason
(
    Reason_GroupKey                                     VARCHAR(432) PRIMARY KEY,
    Reason_Key                                          VARCHAR(432) FOREIGN KEY REFERENCES DIM_Eviction_Reason(Reason_Key)
)
"""

In [45]:
## Create the table
engine = sa.create_engine(conn_string, pool_timeout=60)
connection = engine.connect()

engine.execute(create_BR_Reason_table_query)

<sqlalchemy.engine.result.ResultProxy at 0x7fa43ccf4700>

In [46]:
tables = pd.read_sql_query(query, connection)
tables

Unnamed: 0,schema_name,table_name,create_date,modify_date
0,dbo,BR_Reason,2022-05-26 01:45:35.133,2022-05-26 01:45:35.133
1,dbo,BuildVersion,2022-05-15 20:04:00.113,2022-05-15 20:04:01.767
2,dbo,Date,2022-05-26 01:44:54.340,2022-05-26 01:44:54.340
3,dbo,DIM_Eviction_Reason,2022-05-26 01:34:00.140,2022-05-26 01:45:35.137
4,dbo,ErrorLog,2022-05-15 20:04:00.113,2022-05-15 20:04:01.767
5,dbo,FACT_SanFrancisco,2022-05-26 01:42:34.680,2022-05-26 01:42:34.680
6,dbo,Rel_Building,2022-05-17 22:49:05.513,2022-05-19 22:05:23.507
7,dbo,Rel_BuyoutAgreements,2022-05-17 22:47:28.627,2022-05-26 01:39:29.083
8,dbo,Rel_Demographics,2022-05-17 22:34:37.987,2022-05-19 22:21:30.853
9,dbo,Rel_Eviction_Notices,2022-05-17 21:59:47.223,2022-05-19 23:24:13.557


### Date Table

In [40]:
create_Date_table_query = """
CREATE TABLE Date
(
    Date_Key                                            VARCHAR(432) PRIMARY KEY,
    Date_Type                                           VARCHAR(max) NOT NULL,
    Day_Name                                            VARCHAR(max),
    Day_Of_Week                                         INT,
    Month                                               INT,
    Month_Name                                          VARCHAR(max),
    Month_Year                                          DATE,
    Year                                                INT,
    Year_Month                                          DATE,
)
"""

In [41]:
## Create the table
engine = sa.create_engine(conn_string, pool_timeout=60)
connection = engine.connect()

engine.execute(create_Date_table_query)

<sqlalchemy.engine.result.ResultProxy at 0x7fa43c596370>

In [42]:
tables = pd.read_sql_query(query, connection)
tables

Unnamed: 0,schema_name,table_name,create_date,modify_date
0,dbo,BR_Reason,2022-05-26 01:35:07.260,2022-05-26 01:35:07.260
1,dbo,BuildVersion,2022-05-15 20:04:00.113,2022-05-15 20:04:01.767
2,dbo,Date,2022-05-26 01:44:54.340,2022-05-26 01:44:54.340
3,dbo,DIM_Eviction_Reason,2022-05-26 01:34:00.140,2022-05-26 01:35:07.263
4,dbo,ErrorLog,2022-05-15 20:04:00.113,2022-05-15 20:04:01.767
5,dbo,FACT_SanFrancisco,2022-05-26 01:42:34.680,2022-05-26 01:42:34.680
6,dbo,Rel_Building,2022-05-17 22:49:05.513,2022-05-19 22:05:23.507
7,dbo,Rel_BuyoutAgreements,2022-05-17 22:47:28.627,2022-05-26 01:39:29.083
8,dbo,Rel_Demographics,2022-05-17 22:34:37.987,2022-05-19 22:21:30.853
9,dbo,Rel_Eviction_Notices,2022-05-17 21:59:47.223,2022-05-19 23:24:13.557


### DIM_Demographics

In [51]:
create_DIM_Demographics_table_query = """
CREATE TABLE DIM_Demographics
(
    People_per_Sq_Mile                                  INT,
    Avg_Income_Per_Household                            INT,
    Population                                          INT,
    Zip_Code                                            VARCHAR(max),
    Avg_Population_Age                                  INT,
    City                                                VARCHAR(max),
    Latitude_Longitude                                  VARCHAR(max)
)
"""

In [52]:
## Create the table
engine = sa.create_engine(conn_string, pool_timeout=60)
connection = engine.connect()

engine.execute(create_DIM_Demographics_table_query)

<sqlalchemy.engine.result.ResultProxy at 0x7fa43ccd6700>

In [53]:
tables = pd.read_sql_query(query, connection)
tables

Unnamed: 0,schema_name,table_name,create_date,modify_date
0,dbo,BR_Reason,2022-05-26 01:45:35.133,2022-05-26 01:45:35.133
1,dbo,BuildVersion,2022-05-15 20:04:00.113,2022-05-15 20:04:01.767
2,dbo,Date,2022-05-26 01:44:54.340,2022-05-26 01:44:54.340
3,dbo,DIM_Demographics,2022-05-26 01:59:27.683,2022-05-26 01:59:27.683
4,dbo,DIM_District,2022-05-26 01:55:06.617,2022-05-26 01:55:06.617
5,dbo,DIM_Eviction_Reason,2022-05-26 01:34:00.140,2022-05-26 01:45:35.137
6,dbo,ErrorLog,2022-05-15 20:04:00.113,2022-05-15 20:04:01.767
7,dbo,Rel_Building,2022-05-17 22:49:05.513,2022-05-19 22:05:23.507
8,dbo,Rel_BuyoutAgreements,2022-05-17 22:47:28.627,2022-05-26 01:39:29.083
9,dbo,Rel_Demographics,2022-05-17 22:34:37.987,2022-05-19 22:21:30.853


### DIM_District

In [48]:
create_DIM_District_table_query = """
CREATE TABLE DIM_District
(
    Location_Key                                         VARCHAR(432) PRIMARY KEY,
    Block_Level_Address                                  VARCHAR(max),
    Zip_Code                                             INT,
    Supervisor_District                                  INT,
    City                                                 VARCHAR(max),
    State                                                VARCHAR(max),
    Location                                             VARCHAR(max),
    Shape                                                VARCHAR(max)
)
"""

In [49]:
## Create the table
engine = sa.create_engine(conn_string, pool_timeout=60)
connection = engine.connect()

engine.execute(create_DIM_District_table_query)

<sqlalchemy.engine.result.ResultProxy at 0x7fa43ccc6f70>

In [50]:
tables = pd.read_sql_query(query, connection)
tables

Unnamed: 0,schema_name,table_name,create_date,modify_date
0,dbo,BR_Reason,2022-05-26 01:45:35.133,2022-05-26 01:45:35.133
1,dbo,BuildVersion,2022-05-15 20:04:00.113,2022-05-15 20:04:01.767
2,dbo,Date,2022-05-26 01:44:54.340,2022-05-26 01:44:54.340
3,dbo,DIM_District,2022-05-26 01:55:06.617,2022-05-26 01:55:06.617
4,dbo,DIM_Eviction_Reason,2022-05-26 01:34:00.140,2022-05-26 01:45:35.137
5,dbo,ErrorLog,2022-05-15 20:04:00.113,2022-05-15 20:04:01.767
6,dbo,Rel_Building,2022-05-17 22:49:05.513,2022-05-19 22:05:23.507
7,dbo,Rel_BuyoutAgreements,2022-05-17 22:47:28.627,2022-05-26 01:39:29.083
8,dbo,Rel_Demographics,2022-05-17 22:34:37.987,2022-05-19 22:21:30.853
9,dbo,Rel_Eviction_Notices,2022-05-17 21:59:47.223,2022-05-19 23:24:13.557


## Create FACT table

In [54]:
create_fact_table_query = """
CREATE TABLE FACT_SanFrancisco
(
    Count_BuyoutID                                       INT,
    Eviction_Key                                         VARCHAR(432),
    Location_Key                                         VARCHAR(432) FOREIGN KEY REFERENCES DIM_District(Location_Key),
    Date_Key                                             VARCHAR(432) FOREIGN KEY REFERENCES Date(Date_Key),
    Reason_GroupKey                                      VARCHAR(432) FOREIGN KEY REFERENCES BR_Reason(Reason_GroupKey),
    Count_EvictionID                                     INT,
    Count_Buyout                                         INT,
    Count_PreBuyout                                      INT
)
"""

In [55]:
## Create the table
engine = sa.create_engine(conn_string, pool_timeout=60)
connection = engine.connect()

engine.execute(create_fact_table_query)

<sqlalchemy.engine.result.ResultProxy at 0x7fa43ccd6760>

In [56]:
tables = pd.read_sql_query(query, connection)
tables

Unnamed: 0,schema_name,table_name,create_date,modify_date
0,dbo,BR_Reason,2022-05-26 01:45:35.133,2022-05-26 01:59:52.133
1,dbo,BuildVersion,2022-05-15 20:04:00.113,2022-05-15 20:04:01.767
2,dbo,Date,2022-05-26 01:44:54.340,2022-05-26 01:59:52.133
3,dbo,DIM_Demographics,2022-05-26 01:59:27.683,2022-05-26 01:59:27.683
4,dbo,DIM_District,2022-05-26 01:55:06.617,2022-05-26 01:59:52.133
5,dbo,DIM_Eviction_Reason,2022-05-26 01:34:00.140,2022-05-26 01:45:35.137
6,dbo,ErrorLog,2022-05-15 20:04:00.113,2022-05-15 20:04:01.767
7,dbo,FACT_SanFrancisco,2022-05-26 01:59:52.130,2022-05-26 01:59:52.130
8,dbo,Rel_Building,2022-05-17 22:49:05.513,2022-05-19 22:05:23.507
9,dbo,Rel_BuyoutAgreements,2022-05-17 22:47:28.627,2022-05-26 01:39:29.083
