In [13]:
import pandas as pd
from tqdm import tqdm
import os

import sqlalchemy as sa
from sqlalchemy import Table, MetaData, Column, Integer, text

### Connect to the DB

Get the data related to the location tables

In [14]:
## You must have this driver installed on your local machine

username = os.getenv('AZURE_MSDS432_USERNAME')
password = os.getenv('AZURE_MSDS432_PASSWORD')
host = 'mysqlserver-432.database.windows.net'
database = 'mySampleDatabase'
authentication = "SqlPassword"
conn_string = sa.engine.url.URL(
     "mssql+pyodbc",
     username=username,
     password=password,
     host=host,
     port=1433,
     database=database,
     query={"driver": "ODBC Driver 18 for SQL Server", "authentication": authentication}
 )

engine = sa.create_engine(conn_string, pool_timeout=60)
connection = engine.connect()

In [15]:
get_evictions = """
SELECT *
FROM SF_Eviction_Notices_Raw;
"""

get_buyout_agreements = """
SELECT *
FROM SF_Buyout_Agreements_Raw;
"""

evictions = pd.read_sql_query(get_evictions, connection)
buyouts = pd.read_sql_query(get_buyout_agreements, connection)

In [16]:
evictions.head()

Unnamed: 0,id,eviction_id,address,city,state,zip,file_date,non_payment,breach,nuisance,...,other_cause,late_payments,lead_remediation,development,good_samaritan_ends,supervisor_district,neighborhood,shape,constraints_date,client_location
0,0,M220284,2000 Block Of Broadway Street,San Francisco,CA,94123,2022-02-18T00:00:00.000,False,True,False,...,False,False,False,False,False,2,Pacific Heights,"{'type': 'Point', 'coordinates': [-122.430824,...",,
1,1,M220262,500 Block Of Beale Street,San Francisco,CA,94105,2022-02-14T00:00:00.000,False,True,False,...,False,False,False,False,False,6,Financial District/South Beach,"{'type': 'Point', 'coordinates': [-122.38909, ...",,
2,2,M220049,2000 Block Of Broadway Street,San Francisco,CA,94123,2022-01-07T00:00:00.000,False,True,False,...,False,False,False,False,False,2,Pacific Heights,"{'type': 'Point', 'coordinates': [-122.430824,...",,
3,3,M211708,100 Block Of Bartlett Street,San Francisco,CA,94115,2021-11-12T00:00:00.000,False,False,True,...,False,False,False,False,False,9,Mission,"{'type': 'Point', 'coordinates': [-122.41979, ...",,
4,4,M211587,700 Block Of Fillmore Street,San Francisco,CA,94134,2021-10-25T00:00:00.000,False,False,True,...,False,False,False,False,False,5,Hayes Valley,"{'type': 'Point', 'coordinates': [-122.431305,...",,


In [17]:
buyouts.head()

Unnamed: 0,id,case_number,pre_buyout_disclosure_declaration_date,buyout_agreement_date,buyout_amount,number_of_tenants,address,zip_code,supervisor_district,analysis_neighborhood,point,geocoding_confidence,unknown_amount,other_consideration
0,0,B153174,2015-11-16T00:00:00.000,2016-03-28T00:00:00.000,55000.0,2.0,6439 (aka 6443) California Street,,,,,,,
1,1,B171997,2017-08-04T00:00:00.000,,,,676 10th Avenue,94118.0,1.0,Inner Richmond,"{'type': 'Point', 'coordinates': [-122.4681713...",100.0,,
2,2,B153129,2015-11-12T00:00:00.000,2016-02-19T00:00:00.000,35000.0,1.0,733 Fillmore Street,94117.0,5.0,Hayes Valley,"{'type': 'Point', 'coordinates': [-122.4313123...",100.0,,
3,3,B161104,2016-03-21T00:00:00.000,,,,877 Shotwell Street,94110.0,9.0,Mission,"{'type': 'Point', 'coordinates': [-122.4153385...",100.0,,
4,4,B172057,2017-08-09T00:00:00.000,2018-08-24T00:00:00.000,15000.0,1.0,759 06th Avenue,94118.0,1.0,Inner Richmond,"{'type': 'Point', 'coordinates': [-122.4638491...",100.0,,


### Get the target column names and types

In [18]:
engine = sa.create_engine(conn_string, pool_timeout=60)
connection = engine.connect()
get_tables_query = """
select schema_name(t.schema_id) as schema_name,
       t.name as table_name,
       t.create_date,
       t.modify_date
from sys.tables t
order by schema_name,
         table_name;
"""

tables = pd.read_sql_query(get_tables_query, connection)
tables

Unnamed: 0,schema_name,table_name,create_date,modify_date
0,dbo,BuildVersion,2022-05-15 20:04:00.113,2022-05-15 20:04:01.767
1,dbo,ErrorLog,2022-05-15 20:04:00.113,2022-05-15 20:04:01.767
2,dbo,Rel_Building,2022-05-17 22:49:05.513,2022-05-19 22:05:23.507
3,dbo,Rel_BuyoutAgreements,2022-05-17 22:47:28.627,2022-05-19 22:05:23.503
4,dbo,Rel_Demographics,2022-05-17 22:34:37.987,2022-05-19 22:21:30.853
5,dbo,Rel_Eviction_Notices,2022-05-17 21:59:47.223,2022-05-19 23:24:13.557
6,dbo,Rel_Location,2022-05-17 22:25:35.210,2022-05-19 23:24:13.557
7,dbo,SF_Addresses_With_Units_Raw,2022-05-19 01:46:22.717,2022-05-19 01:46:22.793
8,dbo,SF_Buyout_Agreements_Raw,2022-05-16 00:27:00.917,2022-05-16 00:27:00.943
9,dbo,SF_Eviction_Notices_Raw,2022-05-16 00:41:20.143,2022-05-16 00:41:20.190


In [19]:
location_table_target_names = """
select 
    col.column_id as id,
    col.name,
    t.name as data_type,
    col.max_length,
    col.precision,
    col.is_nullable
from sys.tables as tab
    inner join sys.columns as col
        on tab.object_id = col.object_id
    left join sys.types as t
    on col.user_type_id = t.user_type_id
where tab.name = 'Rel_Location'
order by tab.name, column_id;
"""

location_table_fields = pd.read_sql_query(location_table_target_names, connection)
location_table_fields

Unnamed: 0,id,name,data_type,max_length,precision,is_nullable
0,1,location_id,uniqueidentifier,16,0,False
1,2,block_level_address,varchar,1,0,False
2,3,zipcode,int,4,10,False
3,4,current_supervisor_districts,tinyint,1,3,True
4,5,analysis_neighborhoods,tinyint,1,3,True
5,6,city,varchar,1,0,False
6,7,state,varchar,1,0,False
7,8,supervisor_district,tinyint,1,3,False
8,9,neighborhoods_analysis_boundaries,varchar,1,0,True
9,10,location,geography,-1,0,True


In [21]:
buyouts.columns

Index(['id', 'case_number', 'pre_buyout_disclosure_declaration_date',
       'buyout_agreement_date', 'buyout_amount', 'number_of_tenants',
       'address', 'zip_code', 'supervisor_district', 'analysis_neighborhood',
       'point', 'geocoding_confidence', 'unknown_amount',
       'other_consideration'],
      dtype='object')

In [20]:
evictions.columns

Index(['id', 'eviction_id', 'address', 'city', 'state', 'zip', 'file_date',
       'non_payment', 'breach', 'nuisance', 'illegal_use',
       'failure_to_sign_renewal', 'access_denial', 'unapproved_subtenant',
       'owner_move_in', 'demolition', 'capital_improvement',
       'substantial_rehab', 'ellis_act_withdrawal', 'condo_conversion',
       'roommate_same_unit', 'other_cause', 'late_payments',
       'lead_remediation', 'development', 'good_samaritan_ends',
       'supervisor_district', 'neighborhood', 'shape', 'constraints_date',
       'client_location'],
      dtype='object')

## Select only the columns needed for the location table

In [30]:
evictions_columns = ['address', 'zip', 'city', 'state', 'supervisor_district', 'client_location', 'shape']
evictions_grouped = evictions.groupby(evictions_columns).count().reset_index()

buyouts_columns = ['address', 'zip_code', 'analysis_neighborhood', 'supervisor_district']
buyouts_grouped = buyouts.groupby(buyouts_columns).count().reset_index()

In [31]:
evictions_grouped = evictions_grouped[evictions_columns]
buyouts_grouped = buyouts_grouped[buyouts_columns]

In [32]:
## Columns needed for location data

## missing (get from addresses with units): neighborhoods_analysis_boundaries, sf_find_neighborhoods,
## current_police_districts, cbd_bid_gbd_boundaries, central_market_tenderloin_boundary,
## areas_of_vulnerability, central_market_tenderloin_boundary_polygon, fix_it_zones, neighborhoods

## FK: demographic_id

evictions_grouped_renamed = evictions_grouped.rename(columns={                     
    'address': 'block_level_address',
    'zip': 'zipcode',
    'city': 'city',
    'state': 'state',
    'supervisor_district': 'supervisor_district',
    'client_location': 'location',
    'shape': 'shape'
})


buyouts_grouped_renamed = buyouts_grouped.rename(columns={ 
    'address': 'block_level_address',
    'zip': 'zipcode',
    'analysis_neighborhood': 'analysis_neighborhoods',
    'supervisor_district': 'supervisor_district'
})

In [33]:
buyouts_grouped_renamed

Unnamed: 0,block_level_address,zip_code,analysis_neighborhoods,supervisor_district
0,1 Baker Street,94117,Haight Ashbury,5
1,1 Cielito Drive,94134,Visitacion Valley,10
2,1 Dawson Place,94108,Nob Hill,3
3,1 Homestead Street,94114,Noe Valley,8
4,1 Mallorca Way,94123,Marina,2
...,...,...,...,...
4561,991 Greenwich Street,94133,Russian Hill,3
4562,994 Le Conte Avenue,94124,Bayview Hunters Point,10
4563,996 Carolina Street,94107,Potrero Hill,10
4564,997 Clayton Street,94117,Haight Ashbury,5
