In [1085]:
import pandas as pd
from sqlalchemy import create_engine
import re

In [1086]:
# Extracting building permit data
buildingPermits = pd.read_csv('clearedpermits2016.csv',low_memory=False)
buildingPermitsActive = pd.read_csv('building-permits-active-permits.csv', low_memory=False)
buildingPermits2017 = pd.read_csv('Cleared Building Permits since 2017.csv', low_memory=False)

# Extracting demolition data
demolition = pd.read_csv('Rental Demolition and Replacement.csv',low_memory=False)

In [1087]:
#standardizing dataframes to prepare for merge
buildingPermitsActive.drop(['_id', 'REVISION_NUM'], axis=1, inplace=True)
buildingPermits.drop(['REVISION_NUM'], axis=1, inplace=True)
buildingPermits2017.drop(['_id', 'REVISION_NUM'], axis=1, inplace=True)

buildingPermitsActive['COMPLETED_DATE'] = pd.to_datetime(buildingPermitsActive['COMPLETED_DATE'])
buildingPermits['COMPLETED_DATE'] = pd.to_datetime(buildingPermits['COMPLETED_DATE'])
buildingPermits2017['COMPLETED_DATE'] = pd.to_datetime(buildingPermits2017['COMPLETED_DATE'])

#merge the datasets
combined_buildingPermits = pd.concat([buildingPermits, buildingPermitsActive, buildingPermits2017], ignore_index=True)

In [1088]:
# Print a Series with the data type of each column
print(combined_buildingPermits.dtypes)

PERMIT_NUM                                object
PERMIT_TYPE                               object
STRUCTURE_TYPE                            object
WORK                                      object
STREET_NUM                                object
STREET_NAME                               object
STREET_TYPE                               object
STREET_DIRECTION                          object
POSTAL                                    object
GEO_ID                                   float64
WARD_GRID                                 object
APPLICATION_DATE                          object
ISSUED_DATE                               object
COMPLETED_DATE                    datetime64[ns]
STATUS                                    object
DESCRIPTION                               object
CURRENT_USE                               object
PROPOSED_USE                              object
DWELLING_UNITS_CREATED                   float64
DWELLING_UNITS_LOST                      float64
EST_CONST_COST      

In [1089]:
#dropping columns that are not needed
columns_to_drop = ['CURRENT_USE', 'PROPOSED_USE', 'DWELLING_UNITS_CREATED', 'DWELLING_UNITS_LOST']
start_col = 'ASSEMBLY'
end_col = 'BUILDER_NAME'

start_col_index = combined_buildingPermits.columns.get_loc(start_col)
end_col_index = combined_buildingPermits.columns.get_loc(end_col) + 1 

# extend the list with the range of columns
columns_to_drop.extend(combined_buildingPermits.columns[start_col_index:end_col_index])

# drop the columns
combined_buildingPermits.drop(columns=columns_to_drop, axis=1, inplace=True)


In [1090]:
# Print a Series with the data type of each column
print(combined_buildingPermits.dtypes)

PERMIT_NUM                  object
PERMIT_TYPE                 object
STRUCTURE_TYPE              object
WORK                        object
STREET_NUM                  object
STREET_NAME                 object
STREET_TYPE                 object
STREET_DIRECTION            object
POSTAL                      object
GEO_ID                     float64
WARD_GRID                   object
APPLICATION_DATE            object
ISSUED_DATE                 object
COMPLETED_DATE      datetime64[ns]
STATUS                      object
DESCRIPTION                 object
EST_CONST_COST              object
dtype: object


In [1091]:
# analyzing which structure types, statuses, and work types are useful in analysis

print("STRUCTURE TYPES:")
unique_structure_types = combined_buildingPermits['STRUCTURE_TYPE'].unique()
print(unique_structure_types)

print("WORK TYPES:")
unique_work_types = combined_buildingPermits['WORK'].unique()
print(unique_work_types)



STRUCTURE TYPES:
['SFD - Detached' 'Office' 'SFD: P/D/F/E/R Drains' 'SFD - Semi-Detached'
 'Group A & B' 'Restaurant Greater Than 30 Seats'
 'Industrial Warehouse/Hazardous Building' 'Commercial/Industrial Use'
 ' Mixed Comm/Res ' 'Electromagnetic Locks' 'Apartment Building'
 'HVAC Alt. add on Sys. or Ductwork Alt.' 'Nursing Home Facility'
 'Multiple Unit Building' 'Home for the Aged' 'Retaining Wall'
 'SFD Garages' 'Retail Store' 'Open Public Swimming Pool'
 'Mixed Use/Res w Non Res' 'Piping(all other bldgs):Outside Water..'
 'Elementary School' 'Secondary School'
 'P/D/F/E/R Drains: all other buildings' 'Non-Residential Building'
 'Hospital' 'Hair, Barber and Other Salon'
 'Parking Garage Repairs (all other)' 'SFD - Townhouse'
 'Restaurant 30 Seats or Less ' 'Grandstand' 'Residential Porches'
 'Residential Decks' 'Repair Garage'
 'College/Trade/Tech School/Training Cent.' 'Place of Worship'
 'Converted House' 'Industrial Manufacturing Plant' 'Duplex/Semi-Detached'
 'Medical/Dental Of

In [1092]:
# removing statuses that aren't needed in analysis
statuses_to_remove = [
    "Pending Cancellation",
    "Application Withdrawn",
    "Superseded",
    "Refused",
    "Abandoned",
    "Not Accepted",
    "VIOLATION",
    "Work Suspended",
    "Refusal Notice",
    "Cancelled"
]

combined_buildingPermits = combined_buildingPermits[~combined_buildingPermits['STATUS'].isin(statuses_to_remove)]


In [1093]:
# dropping STRUCTURE_TYPES not useful to analysis
# STRUCTURE TYPE
not_included = [
    'Office', 'Industrial Processing Plant', 'Gas Station/Car Wash/Repair Garage', 'Personal Service Shop', 'Industrial Manufacturing Plant', 'SFD: P/D/F/E/R Drains', 'Industrial Warehouse/Hazardous Building', 
    'Electromagnetic Locks', 'HVAC Alt. add on Sys. or Ductwork Alt.', 'Nursing Home Facility',
    'Home for the Aged', 'Retaining Wall', 'Industrial', 'SFD Garages', 'Piping(all other bldgs):Outside Water..',
    'P/D/F/E/R Drains: all other buildings', 'Laboratory', 'Water and Sewage Pumping Stations', 'Warehouse', 'Parking Garage Repairs (all other)', 'Grandstand',
    'Residential Porches', 'Residential Decks', 'Repair Garage', 'Converted House', 'Industrial - Shell',
    'Laundromat', 'Third Party', 'Storage Room', 'Convent/Monastery', 'Police Station with Detention', 'Manufacturing - MMPF ',
    'Manufacturing - MMPF', 'Undertaking Premises', 'Crematorium/Cemetary Structure', 'Jails/Detention Facility', 
    'Self-Service Storage Building', 'Triplex/Semi-Detached', 'Courtroom', 'Distillery', 'Car Dealership', 'Power Plant',
    'Dry Cleaning/Laundry Plant', 'Printing Plant', 'Dry Cleaning Depot', 'Long Term Care Facility', 'Tree  Declaration Form',
    'Police Station with No Detention', 'Live/Work Unit', 'Unknown', 'Group D & E', 'SFD Access. Structures',
    'Fire Alarms', 'Standpipes', 'Piping(SF) Water Serv., Sanitary/Storm', 'Fireplaces', 'Registration and Discharge  of Unsafe Order', 'Farm Building',
 'MGO Memo To ', 'HVAC Alt. Boiler/Furn Rplmt. or A/C', 'Exterior Storage Tank', 'Canopy w/o enclosure', 'Sprinklers',
    'Underpinning', 'Spray Painting Operation', 'Group F (< 230 m2)', 'Piping(SF):Repair/Rplmt/Add. Pool Drain',
    'HVAC: Special Ventilation System', 'Basements - Finishing - in Dwellings/TH', 'Mixed Comm/Inst./Res',
    'SFD/TH HVAC', 'Balcony Repairs', 'Repairs/Re-cladding Walls, Re-roofing', 'Temporary Buildings',
    'Trailers', 'Parking Garage Repairs (slab)', 'Sales Pavilions', 'Mixed Industrial Use',
    'SFD/TH Heat. Vent. only', 'Commercial/Institutional Use', 'Industrial/Institutional Use', 'Tent',
    'Mixed Assembly Use', 'Window Replacements (except SFD)', 'Communication Tower', 'SFD/TH Boiler/Furn. Replac.',
    'Residential Carports', 'Group F (> 230m2)', 'Portable Classroom',
    'Re-roofing with structural work', 'Multiple-Use Building', 'Mixed Institutional', 'Fire Doors Retrofit', 'Piping(all other bldgs):Inside San/Storm', 'Exhibition Hall(With Sales)', 'Exhibition Hall(Without Sales)', 'Mixed Inst/Res', 'Other School',
    'House', 'Mixed Comm/Inst/Ind/Res', 'Air Supported Stuctures', 'Home Office', 'Mixed Ind/Comm/Res',
    'Industrial Chemical Plant', 'Municipal Shelter', 'Penthouse/Mechanical Room', 'Lecture Hall', 'Subdivision', 'Public Health', 'ZR - Licensing LPR Notice',
    'HAP Folder', "ZR - Examiner's Notice", 'Tree Declaration Form', 'Municipal Road Damage Deposit Form',
    'Demolition Permit Application Checklist',
    'Parks Levy Appraisal Request', 'Registration and Discharge of Unsafe Order', 'ZR Folder - Planning Source',
    'MGO Memo To', 'Search Titles', 'HP Property DM Folder', 'Sump Pump Program', 'Supermarket',
    'Laneway / Rear Yard Suite', 'Toronto Fire Notifications', 'Laneway / Rear Yard Suites',
    'Television Studio(with audience)', 'HVAC for other Group C', 'Backflow Prevention Devices',
    'Manholes, Catch Basin, Interceptors, Smp', 'SFD/TH A/C Unit Addition', 'Tent (permits for certified)',
    'Balcony Guards'
]

combined_buildingPermits = combined_buildingPermits[~combined_buildingPermits['STRUCTURE_TYPE'].isin(not_included)]


In [1094]:
# removing work types not needed in analysis
no_work_types = [
    'Install/Alter Plumbing - only', 'Demolition', 'Partial Demolition',
    'Partial Permit - Shoring', ' Fixtures/Roof Drains: SFD', 'Addition(s) ', 'Porch', 'Garage', 'Deck',
    'Install/Alter HVAC - only', 'Alter:  Add on /Ductwork', 'Balcony/Guard Repairs',
    'Building Permit Related(PS)', 'Window Replacement', 'Re-Roofing/Re-Cladding', 'Special Ventilation System',
    'Other Proposal', 'Carport', 'Change of Use', 'HVAC: Groups  D & E', ' Fixtures/Roof Drains: Other ', 'Finishing Basements',
    'Fixtures/Roof Drains: Other', 'Certified Portables', 'Non-Certified Portables', 'MGO 565 Remediation',
    'Multiple Projects', 'New Laneway / Rear Yard Suite', ' Backflow Prevention Devices (Water only)',
    'Fixtures/Roof Drains: SFD', 'Septic System:  Sewage System',
    'Communication Tower', 'City Planning',
    'Building Permit Related(MS)',
    'Partial Permit - Foundation',
    'Electromagnetic Locks',
    'Alter: Add on /Ductwork',
    'Other(BA)',
    'HVAC: Parking Garages',
    'Canopy',
    'Other(SR)',
    'Fire Damage',
    'Walk-Out Stair',
    'HVAC: Groups A & B',
    'Piping: SFD/Semi',
    'Fire Alarm',
    'Air Conditioning: SFD/Semi/TH',
    'Garage Repair/Reconstruction',
    'Piping: Other Buildings',
    'Sprinklers',
    'Manholes/Catch Basins/Sumps/Interceptors',
    'Underpinning',
    'HVAC: Groups D & E',
    'Partial Permit - Structural Framing',
    'Certified Tents',
    'Other(DS)',
    'Building Permit Related (DR)',
    'Heat/Ventilation: SFD/Semi/TH',
    'Canopy w/o Enclosure',
    'Sales Pavilions',
    'Interior Demolition',
    'Site Service',
    'Septic System: Sewage System',
    'Other(PS)',
    'Boiler/Furnace: SFD/Semi/TH',
    'Other(FS)',
    'Other(MS)',
    'Inside and Outside Drains',
    'Other Temporary Tents',
    'Building Permit Related(FS)',
    'Other(TS)',
    'Alter: Boiler/Furnace/AC Replacement',
    'Shoring',
    'Backflow Prevention Devices (Water only)',
    'Temporary Structures',
    'HVAC: SFD/Semi/TH',
    'Retaining Wall',
    'HVAC: Other Group C Buildings',
    'Emergency Lighting',
    'Solar Domestic Hot Water (Res)',
    'Sign Building Permit Related',
    'Crane Runway',
    'Alternative Solution',
    'Solar Collector',
    'Standpipes',
    'Party Wall Admin Permits',
    'Back Water Valve (Sewer only)',
    'HVAC: Group F > 230 Sq M',
    'Pool Fence Enclosure',
    'Fire Doors Retrofit',
    'Exterior Tank & Support',
    'Fireplace/Wood Stoves',
    'Trailers',
    'HVAC: Group F up to 230 Sq M',
    'Material Evaluation',
    'Unknown',
    'HVAC',
    'Install/Alter Plumbing & HVAC only',
    'Accessory Structure',
    'Partial Permit - Other',
    'Addition',
    'Satellite Dish',
    'Pedestrian Bridge',
    'Holding Tank: Sewage System',
    'HVAC: Laboratories',
    'Ceilings (Add or Replace)',
    'Other'
]

combined_buildingPermits = combined_buildingPermits[~combined_buildingPermits['WORK'].isin(no_work_types)]


In [1095]:
print("STRUCTURE TYPES:")
unique_structure_types = combined_buildingPermits['STRUCTURE_TYPE'].unique()
print(unique_structure_types)

STRUCTURE TYPES:
['SFD - Detached' 'SFD - Semi-Detached' 'Group A & B'
 'Restaurant Greater Than 30 Seats' ' Mixed Comm/Res '
 'Commercial/Industrial Use' 'Multiple Unit Building'
 'Open Public Swimming Pool' 'Mixed Use/Res w Non Res' 'Elementary School'
 'Secondary School' 'Hospital' 'Apartment Building'
 'Hair, Barber and Other Salon' 'SFD - Townhouse' 'Place of Worship'
 'Retail Store' 'Medical/Dental Office' 'Restaurant 30 Seats or Less '
 'Parking Garage' 'Bank' 'College/Trade/Tech School/Training Cent.'
 'Multiple Use/Non Residential' nan 'Museum' 'Other' 'Motel/Hotel'
 'Performing Arts Centre' 'Fitness Centre' 'Club'
 'Rental and Service Establishment' 'Community Hall' 'Stacked Townhouses'
 'Recreational' 'Motion Picture Theatre' 'Library' 'University' 'Triplex'
 'Transit Station,Subway, Bus Terminal'
 'Child Care Facility/DayCare Centre' 'Duplex/Semi-Detached'
 '3+ Unit - Semi-detached' 'Apartment Hotel' 'Retail Mall/Plaza'
 'Art Gallery' 'Duplex ' '2 Unit - Detached' '2 Unit -

In [1096]:
Amenities_Structure_Types_Sample = [
    'Performing Arts Centre',
    'Fitness Centre',
    'Club',
    'Motel/Hotel'
    'Library',
    'Art Gallery',
    'Hair, Barber and Other Salon',
    'Restaurant 30 Seats or Less',
    'Museum',
    'Retail Mall/Plaza',
    'Indoor Swimming Pool',
    'Television Studio(no audience)',
    'Gymnasium',
    'Amusement Park Structure',
    'Dance Hall',
    'Auditorium',
    'Stadium',
    'Radio Station',
    'Recreational'
]


Transporation_Structure_Types_Sample = ['Transit Station,Subway, Bus Terminal']

Social_Development = [
    'Elementary School', 
    'College/Trade/Tech School/Training Cent.', 
    'Place of Worship', 
    'University', 
    'Secondary School', 
    'Hospital', 
    'Student Residence', 
    'Child Care Facility/DayCare Centre'
]

real_estate_structure_types = [
    'SFD - Detached',
    'SFD - Semi-Detached',
    'Group A & B',
    'Mixed Comm/Res',
    'Multiple Unit Building',
    'Mixed Use/Res w Non Res',
    'Apartment Building',
    'Parking Garage',
    'Multiple Use/Non Residential',
    'Stacked Townhouses',
    'Triplex',
    'Duplex/Semi-Detached',
    '3+ Unit - Semi-detached',
    'Apartment Hotel',
    'Duplex',
    '2 Unit - Detached',
    '2 Unit - Semi-detached',
    'Boarding/Lodging House',
    '2 Unit - Townhouse',
    '3+ Unit - Detached',
    'Non-Residential Building',
    'Other (New Housing)',
    '3+ Unit - Townhouse'
]




In [1097]:
print("WORK TYPES:")
unique_work_types = combined_buildingPermits['WORK'].unique()
print(unique_work_types)


WORK TYPES:
['Addition to Existing Building'
 'Addition/Alteration to Existing Building' 'New Building'
 'Alteration to Existing Building' 'Interior Alterations'
 'Accessory Building(s)' nan 'Second Suite (New)' 'New Building-Certified'
 'New Building - Lead' 'MGO Remediation' 'New Building Certified - Lead'
 'Green Roof' 'New Building - By Renovation']


In [1098]:
# handling null values
columns_with_null = combined_buildingPermits.isnull().any()
print(columns_with_null)

PERMIT_NUM          False
PERMIT_TYPE         False
STRUCTURE_TYPE       True
WORK                 True
STREET_NUM           True
STREET_NAME          True
STREET_TYPE         False
STREET_DIRECTION    False
POSTAL              False
GEO_ID               True
WARD_GRID            True
APPLICATION_DATE    False
ISSUED_DATE          True
COMPLETED_DATE       True
STATUS              False
DESCRIPTION          True
EST_CONST_COST       True
dtype: bool


In [1099]:
# dont include any records where Structure_Type is null
combined_buildingPermits = combined_buildingPermits.dropna(subset=['STRUCTURE_TYPE'])

In [1100]:
# dont include any records where Application_Date or Issued_Date is null
combined_buildingPermits = combined_buildingPermits.dropna(subset=['APPLICATION_DATE'])
combined_buildingPermits = combined_buildingPermits.dropna(subset=['ISSUED_DATE'])

In [1101]:
# handling duplicate values for Permit_Num
print("number of rows before removing duplicates:", len(combined_buildingPermits))

# Calculate the number of duplicate Permit_Num values 
number_of_duplicates = combined_buildingPermits['PERMIT_NUM'].duplicated().sum()
print("duplicate permit ids:", number_of_duplicates)

# Remove duplicates by keeping only the first occurrence of each 'Permit_Num'
combined_buildingPermits = combined_buildingPermits.drop_duplicates(subset='PERMIT_NUM', keep='first')

print("number of rows after removing duplicates:", len(combined_buildingPermits))





number of rows before removing duplicates: 90480
duplicate permit ids: 16710
number of rows after removing duplicates: 73770


In [1102]:
# filtering to find sample data that paints a more concentrated image for gentrification analysis
combined_buildingPermits['EST_CONST_COST'] = combined_buildingPermits['EST_CONST_COST'].str.replace(',', '')  # Remove commas
combined_buildingPermits['EST_CONST_COST'] = pd.to_numeric(combined_buildingPermits['EST_CONST_COST'], errors='coerce')
combined_buildingPermits['EST_CONST_COST'] = combined_buildingPermits['EST_CONST_COST'].fillna(0).astype(int)


amenities_BuildingPermits = combined_buildingPermits[combined_buildingPermits['STRUCTURE_TYPE'].isin(Amenities_Structure_Types_Sample)&
    (combined_buildingPermits['EST_CONST_COST'] > 500)]
print(len(amenities_BuildingPermits))

transporation_BuildingPermits = combined_buildingPermits[combined_buildingPermits['STRUCTURE_TYPE'].isin(Transporation_Structure_Types_Sample)&
    (combined_buildingPermits['EST_CONST_COST'] > 500)]
print(len(transporation_BuildingPermits))

social_development_BuildingPermits = combined_buildingPermits[combined_buildingPermits['STRUCTURE_TYPE'].isin(Social_Development)&
    (combined_buildingPermits['EST_CONST_COST'] > 500)]
print(len(social_development_BuildingPermits))

realestate_BuildingPermits = combined_buildingPermits[
    combined_buildingPermits['STRUCTURE_TYPE'].isin(real_estate_structure_types) &
    (combined_buildingPermits['EST_CONST_COST'] > 500000)
]
print(len(realestate_BuildingPermits))

710
379
3026
7782


In [1103]:
# exporting samples to csv
# at this point, our team member added the ward ids to the files and now we are bringing them back in

# Export amenities_BuildingPermits to CSV
# amenities_BuildingPermits.to_csv('amenities_sample.csv', index=False)

# Export transporation_BuildingPermits to CSV
# transporation_BuildingPermits.to_csv('transporation_sample.csv', index=False)

# Export social_development_BuildingPermits to CSV
# social_development_BuildingPermits.to_csv('social_development_sample.csv', index=False)

# Export realestate_BuildingPermits to CSV
# realestate_BuildingPermits.to_csv('realestate_sample.csv', index=False)

# bringing in the files with the ward id added
realestate = pd.read_csv('RealEstate_Sample_Wards.csv', low_memory=False)
transporation = pd.read_csv('Amenities_Sample_Ward.csv', low_memory=False)
amenities = pd.read_csv('Transportation_Sample_Wards.csv', low_memory=False)

combined_buildingPermits = pd.concat([realestate, amenities, transporation], ignore_index=True)

# change name
combined_buildingPermits.rename(columns={'Ward_Index': 'Ward_ID'}, inplace=True)

# Remove records where 'Ward_ID' is empty 
combined_buildingPermits = combined_buildingPermits[combined_buildingPermits['Ward_ID'].notna() & (combined_buildingPermits['Ward_ID'] != '')]

# Convert 'Ward_ID' to an integer
combined_buildingPermits['Ward_ID'] = combined_buildingPermits['Ward_ID'].astype(int)
combined_buildingPermits.drop(['Ward'], axis=1, inplace=True)

print(combined_buildingPermits.dtypes)
print(len(combined_buildingPermits))


Permit_Num           object
Permit_Type          object
Structure_Type       object
Work                 object
Street_Num           object
Street_Name          object
Street_Type          object
Street_Direction     object
Postal               object
Geo_Id              float64
Ward_Grid            object
Application_Date     object
Issued_Date          object
Completed_Date       object
Status               object
Description          object
Est_Const_Cost        int64
Address              object
Coordinates          object
Ward_ID               int32
dtype: object
8762


In [1104]:

# changing date types
combined_buildingPermits['Issued_Date'] = pd.to_datetime(combined_buildingPermits['Issued_Date'])
combined_buildingPermits['Application_Date'] = pd.to_datetime(combined_buildingPermits['Application_Date'])
combined_buildingPermits['Completed_Date'] = pd.to_datetime(combined_buildingPermits['Completed_Date'])

# changing int types
combined_buildingPermits['Est_Const_Cost'] = combined_buildingPermits['Est_Const_Cost'].fillna(0).astype(int)
combined_buildingPermits['Geo_Id'] = pd.to_numeric(combined_buildingPermits['Geo_Id'], errors='coerce').astype('Int64')

# changing string types
combined_buildingPermits['Permit_Num'] = combined_buildingPermits['Permit_Num'].astype(str)
combined_buildingPermits['Permit_Type'] = combined_buildingPermits['Permit_Type'].astype(str)
combined_buildingPermits['Work'] = combined_buildingPermits['Work'].astype(str)
combined_buildingPermits['Street_Name'] = combined_buildingPermits['Street_Name'].astype(str)
combined_buildingPermits['Street_Type'] = combined_buildingPermits['Street_Type'].astype(str)
combined_buildingPermits['Street_Num'] = combined_buildingPermits['Street_Num'].astype(str)
combined_buildingPermits['Street_Direction'] = combined_buildingPermits['Street_Direction'].astype(str)
combined_buildingPermits['Postal'] = combined_buildingPermits['Postal'].astype(str)
combined_buildingPermits['Ward_Grid'] = combined_buildingPermits['Ward_Grid'].astype(str)
combined_buildingPermits['Description'] = combined_buildingPermits['Description'].astype(str)
combined_buildingPermits['Status'] = combined_buildingPermits['Status'].astype(str)

print(combined_buildingPermits.dtypes)


Permit_Num                  object
Permit_Type                 object
Structure_Type              object
Work                        object
Street_Num                  object
Street_Name                 object
Street_Type                 object
Street_Direction            object
Postal                      object
Geo_Id                       Int64
Ward_Grid                   object
Application_Date    datetime64[ns]
Issued_Date         datetime64[ns]
Completed_Date      datetime64[ns]
Status                      object
Description                 object
Est_Const_Cost               int32
Address                     object
Coordinates                 object
Ward_ID                      int32
dtype: object


In [1105]:
# generating surrogate keys
combined_buildingPermits['Permit_Key'] = range(1, len(combined_buildingPermits) + 1)

# Move 'Permit_Key' to the first position
cols = ['Permit_Key'] + [col for col in combined_buildingPermits.columns if col != 'Permit_Key']

# Reorder the DataFrame
combined_buildingPermits = combined_buildingPermits[cols]

In [1106]:
combined_buildingPermits[:10]

Unnamed: 0,Permit_Key,Permit_Num,Permit_Type,Structure_Type,Work,Street_Num,Street_Name,Street_Type,Street_Direction,Postal,...,Ward_Grid,Application_Date,Issued_Date,Completed_Date,Status,Description,Est_Const_Cost,Address,Coordinates,Ward_ID
0,1,01 185413 BLD,Building Additions/Alterations,Parking Garage,Interior Alterations,20,THE QUEENSWAY,,,M6R,...,S1404,2001-10-12,2001-11-08,2016-01-26,Closed,Demolish and reconstruct existing steel/concre...,1200000,"20,THE QUEENSWAY, M6R",POINT (-79.448028 43.639437),13
1,2,04 167768 BLD,Building Additions/Alterations,Apartment Building,Interior Alterations,600-604,ROGERS,RD,,M6M,...,W1209,2004-08-20,2004-11-09,2016-12-15,Closed,Conversion of existing ground floor commercial...,650000,"600-604,ROGERS,RD M6M",POINT (-79.472849 43.682202),22
2,3,06 176865 BLD,New Houses,SFD - Detached,New Building,12,THE BRIDLE,PATH,,M2L,...,N2504,2006-09-19,2006-10-06,2016-10-07,Closed,Construct a new two storey single family dwelling,700000,"12,THE BRIDLE,PATH M2L",POINT (-79.380299 43.739285),11
3,4,07 132494 BLD,New Houses,Stacked Townhouses,New Building,651,WARDEN,AVE,,M1L,...,E3507,2007-03-30,2010-06-16,2016-09-21,Closed,to construct stacked townhouses Block E (12 un...,2000000,"651,WARDEN,AVE M1L",POINT (-79.276472 43.701708),5
4,5,07 244088 BLD,New Houses,SFD - Detached,New Building,514,VESTA,DR,,M5P,...,S2103,2007-07-17,2007-08-31,2016-01-11,Closed,Proposal to construct a new 2 sty SFD. See PA...,1500000,"514,VESTA,DR M5P",POINT (-79.420906 43.701265),12
5,6,07 266299 BLD,New Houses,SFD - Detached,New Building,84,KINGSWAY,CRES,,M8X,...,W0503,2007-10-01,2007-11-19,2016-04-27,Closed,erect a new two storey dwelling as approved by...,600000,"84,KINGSWAY,CRES M8X",POINT (-79.503256 43.658356),15
6,7,08 173856 B05,New Houses,Stacked Townhouses,New Building,2230,GERRARD,ST,E,###,...,S3203,2008-06-23,2009-05-13,2016-07-27,Closed,(LEAD FILE FOR STACKED TOWNHOUSE UNITS) - BUIL...,2000000,"2230,GERRARD,ST E ###",POINT (-79.298143 43.68495),4
7,8,08 173856 B07,New Houses,Stacked Townhouses,New Building,2230,GERRARD,ST,E,###,...,S3203,2008-06-23,2009-05-13,2016-07-27,Closed,(LEAD FILE FOR STACKED TOWNHOUSE UNITS) - BUIL...,2000000,"2230,GERRARD,ST E ###",POINT (-79.298143 43.68495),4
8,9,08 173856 B08,New Houses,Stacked Townhouses,New Building,2230,GERRARD,ST,E,###,...,S3203,2008-06-23,2009-05-13,2016-10-18,Closed,(LEAD FILE FOR STACKED TOWNHOUSE UNITS) - BUIL...,2000000,"2230,GERRARD,ST E ###",POINT (-79.298143 43.68495),4
9,10,08 173856 B09,New Houses,Stacked Townhouses,New Building,2230,GERRARD,ST,E,###,...,S3203,2008-06-23,2009-05-13,2016-09-21,Closed,(LEAD FILE FOR STACKED TOWNHOUSE UNITS) - BUIL...,2000000,"2230,GERRARD,ST E ###",POINT (-79.298143 43.68495),4


In [1107]:
# bringing in the new demolition file with the correct ward ids after the geo spatial analysis
demolition = pd.read_csv('DemolitionDimension_Sample.csv',low_memory=False)
demolition.rename(columns={'Ward_Index': 'Ward_ID'}, inplace=True)


# Remove records where 'Ward_ID' is empty 
demolition = demolition[demolition['Ward_ID'].notna() & (demolition['Ward_ID'] != '')]

# Convert 'Ward_ID' to an integer
demolition['Ward_ID'] = demolition['Ward_ID'].astype(int)
demolition.drop(['Ward'], axis=1, inplace=True)


# generating surrogate keys
demolition['Demolition_Key'] = demolition.index + 1

# Move 'Permit_Key' to the first position
cols = ['Demolition_Key'] + [col for col in demolition.columns if col != 'Demolition_Key']

# Reorder the DataFrame
demolition = demolition[cols]
demolition = demolition.drop(columns=['(Post 2018) Ward'])

print(demolition.dtypes)

Demolition_Key                             int64
IBMS Address                              object
Address of Existing Rental Building       object
RH File Number                            object
City Council Approval Date                object
Link to Staff Report                      object
Type                                      object
Total Rental Homes for Demolition          int64
Affordable Rental Homes for Demolition     int64
Mid-Range Rental Homes for Demolition      int64
High-End Rental Homes for Demolition       int64
Total Rental Homes Replaced               object
Affordable Rental Homes Replaced           int64
Mid-Range Rental Homes Replaced            int64
High-End Rental Homes Replaced             int64
Coordinates                               object
Ward_ID                                    int32
dtype: object


In [1108]:

# changing date types
demolition['City Council Approval Date'] = pd.to_datetime(demolition['City Council Approval Date'])

# changing string types
demolition['IBMS Address'] = demolition['IBMS Address'].astype(str)
demolition['Address of Existing Rental Building'] = demolition['Address of Existing Rental Building'].astype(str)
demolition['RH File Number'] = demolition['RH File Number'].astype(str)
demolition['Type'] = demolition['Type'].astype(str)

# changing int types
columns = [
 
    'Affordable Rental Homes for Demolition', 
    'Mid-Range Rental Homes for Demolition', 
    'High-End Rental Homes for Demolition', 
    'Affordable Rental Homes Replaced', 
    'Mid-Range Rental Homes Replaced', 
    'High-End Rental Homes Replaced'
]

for column in columns:
    demolition[column] = pd.to_numeric(demolition[column], errors='coerce').fillna(0).astype('Int64')



In [1109]:
def simplify_address(address):
    # define street abbreviations
    abbreviations = {'St': 'ST', 'Ave': 'AVE', 'Rd': 'RD', 'Blvd': 'BLVD', 'Dr': 'DR'}
    
    # find the first number in a range because we only need to map one address to one ward, and if there are multiple numbers, we can safely 
    # keep just one because one street is in one ward
    match = re.match(r"(\d+)[-\s]*\d*\s+(.*)", address)
    if match:
        first_number, street_name = match.groups()
        street_name_parts = street_name.title().split()
        final_parts = [abbreviations.get(part, part) for part in street_name_parts]
        simplified_address = f"{first_number} {' '.join(final_parts)}"
    else:
        simplified_address

    return simplified_address  # Convert the entire address to uppercase

# Apply the function to the IBMS Address column and change the format of the addresses to be the proper capitalization 
demolition['IBMS Address'] = demolition['IBMS Address'].apply(simplify_address)
demolition['IBMS Address'] = demolition['IBMS Address'].astype(str).str.title()




In [1110]:
print(demolition.dtypes)

Demolition_Key                                     int64
IBMS Address                                      object
Address of Existing Rental Building               object
RH File Number                                    object
City Council Approval Date                datetime64[ns]
Link to Staff Report                              object
Type                                              object
Total Rental Homes for Demolition                  int64
Affordable Rental Homes for Demolition             Int64
Mid-Range Rental Homes for Demolition              Int64
High-End Rental Homes for Demolition               Int64
Total Rental Homes Replaced                       object
Affordable Rental Homes Replaced                   Int64
Mid-Range Rental Homes Replaced                    Int64
High-End Rental Homes Replaced                     Int64
Coordinates                                       object
Ward_ID                                            int32
dtype: object


In [1111]:
# determining start and ends for date dimension
earliest_date_completed = combined_buildingPermits['Completed_Date'].min()
latest_date_completed = combined_buildingPermits['Completed_Date'].max()

earliest_date_issued = combined_buildingPermits['Issued_Date'].min()
latest_date_issued = combined_buildingPermits['Issued_Date'].max()

earliest_date_applied = combined_buildingPermits['Application_Date'].min()
latest_date_applied = combined_buildingPermits['Application_Date'].max()

earliest_date_approval = demolition['City Council Approval Date'].min()
latest_date_approval = demolition['City Council Approval Date'].max()

print(f"Earliest Completed Date: {earliest_date_completed}")
print(f"Latest Completed Date: {latest_date_completed}")
print(f"Earliest Issued Date: {earliest_date_issued}")
print(f"Latest Issued Date: {latest_date_issued}")
print(f"Earliest Applied Date: {earliest_date_applied}")
print(f"Latest Applied Date: {latest_date_applied}")
print(f"Earliest Approved Date: {earliest_date_approval}")
print(f"Latest Approved Date: {latest_date_approval}")


Earliest Completed Date: 2016-01-04 00:00:00
Latest Completed Date: 2024-03-07 00:00:00
Earliest Issued Date: 1999-12-02 00:00:00
Latest Issued Date: 2024-03-07 00:00:00
Earliest Applied Date: 1999-10-04 00:00:00
Latest Applied Date: 2024-02-20 00:00:00
Earliest Approved Date: 2017-01-31 00:00:00
Latest Approved Date: 2023-12-13 00:00:00


In [1112]:
# generate date range with minimum date and maximum date to cover the entire range
start_date = '1999-10-04'
end_date = '2024-03-07'

dates = pd.date_range(start=start_date, end=end_date)

In [1113]:
DateDimension = pd.DataFrame(dates, columns=['Date'])

# Extract date parts
DateDimension['Year'] = DateDimension['Date'].dt.year
DateDimension['Month'] = DateDimension['Date'].dt.month
DateDimension['Day'] = DateDimension['Date'].dt.day
DateDimension['Quarter'] = DateDimension['Date'].dt.quarter
DateDimension['WeekOfYear'] = DateDimension['Date'].dt.isocalendar().week
DateDimension['DayOfWeek'] = DateDimension['Date'].dt.dayofweek
DateDimension['DayName'] = DateDimension['Date'].dt.day_name()
DateDimension['IsWeekend'] = DateDimension['DayOfWeek'].isin([5, 6]).astype(int)

In [1114]:
# generating surrogate keys
DateDimension['Date_Key'] = DateDimension.index + 1

# Move 'Permit_Key' to the first position
cols = ['Date_Key'] + [col for col in DateDimension.columns if col != 'Date_Key']

# Reorder the DataFrame
DateDimension = DateDimension[cols]

In [1115]:
DateDimension[:10]

Unnamed: 0,Date_Key,Date,Year,Month,Day,Quarter,WeekOfYear,DayOfWeek,DayName,IsWeekend
0,1,1999-10-04,1999,10,4,4,40,0,Monday,0
1,2,1999-10-05,1999,10,5,4,40,1,Tuesday,0
2,3,1999-10-06,1999,10,6,4,40,2,Wednesday,0
3,4,1999-10-07,1999,10,7,4,40,3,Thursday,0
4,5,1999-10-08,1999,10,8,4,40,4,Friday,0
5,6,1999-10-09,1999,10,9,4,40,5,Saturday,1
6,7,1999-10-10,1999,10,10,4,40,6,Sunday,1
7,8,1999-10-11,1999,10,11,4,41,0,Monday,0
8,9,1999-10-12,1999,10,12,4,41,1,Tuesday,0
9,10,1999-10-13,1999,10,13,4,41,2,Wednesday,0


In [1116]:
# Create a date to Date_Key mapping
date_to_date_key = pd.Series(DateDimension['Date_Key'].values, index=DateDimension['Date']).to_dict()

# Map each date in combined_buildingPermits to a Date_Key
combined_buildingPermits['Application_Date_Key'] = combined_buildingPermits['Application_Date'].map(date_to_date_key).astype('Int64')
combined_buildingPermits['Issued_Date_Key'] = combined_buildingPermits['Issued_Date'].map(date_to_date_key).astype('Int64')
combined_buildingPermits['Completed_Date_Key'] = combined_buildingPermits['Completed_Date'].map(date_to_date_key).astype('Int64')

# Map Approval_Date in demolition_dimension to a Date_Key
demolition['Approval_Date_Key'] = demolition['City Council Approval Date'].map(date_to_date_key)

# changing to int
demolition['Total Rental Homes Replaced'] = pd.to_numeric(demolition['Total Rental Homes Replaced'], errors='coerce')
demolition['Total Rental Homes Replaced'] = demolition['Total Rental Homes Replaced'].fillna(0)
demolition['Total Rental Homes Replaced'] = demolition['Total Rental Homes Replaced'].astype(int)

In [1117]:
print(demolition.dtypes)

Demolition_Key                                     int64
IBMS Address                                      object
Address of Existing Rental Building               object
RH File Number                                    object
City Council Approval Date                datetime64[ns]
Link to Staff Report                              object
Type                                              object
Total Rental Homes for Demolition                  int64
Affordable Rental Homes for Demolition             Int64
Mid-Range Rental Homes for Demolition              Int64
High-End Rental Homes for Demolition               Int64
Total Rental Homes Replaced                        int32
Affordable Rental Homes Replaced                   Int64
Mid-Range Rental Homes Replaced                    Int64
High-End Rental Homes Replaced                     Int64
Coordinates                                       object
Ward_ID                                            int32
Approval_Date_Key              

In [1118]:
# creating the demolition fact table
demolition_fact_table = demolition[['Ward_ID', 'Demolition_Key', 'Approval_Date_Key', 'Total Rental Homes for Demolition', 'Total Rental Homes Replaced']].copy()



In [1119]:
# creating the development fact table
development_fact_table = combined_buildingPermits[['Permit_Key', 'Ward_ID', 'Est_Const_Cost', 'Application_Date_Key', 'Issued_Date_Key', 'Completed_Date_Key']].copy()

# Print the first 25 rows of the development_fact_table, ensuring for each Permit_key, there is associated keys for each date field
print(development_fact_table[:20])

    Permit_Key  Ward_ID  Est_Const_Cost  Application_Date_Key  \
0            1       13         1200000                   740   
1            2       22          650000                  1783   
2            3       11          700000                  2543   
3            4        5         2000000                  2735   
4            5       12         1500000                  2844   
5            6       15          600000                  2920   
6            7        4         2000000                  3186   
7            8        4         2000000                  3186   
8            9        4         2000000                  3186   
9           10        4         2000000                  3186   
10          11        4         2000000                  3186   
11          12        4         2000000                  3186   
12          13        4         2000000                  3186   
13          14       24       190000000                  3229   
14          15       12  

In [1120]:

print(demolition.dtypes)

Demolition_Key                                     int64
IBMS Address                                      object
Address of Existing Rental Building               object
RH File Number                                    object
City Council Approval Date                datetime64[ns]
Link to Staff Report                              object
Type                                              object
Total Rental Homes for Demolition                  int64
Affordable Rental Homes for Demolition             Int64
Mid-Range Rental Homes for Demolition              Int64
High-End Rental Homes for Demolition               Int64
Total Rental Homes Replaced                        int32
Affordable Rental Homes Replaced                   Int64
Mid-Range Rental Homes Replaced                    Int64
High-End Rental Homes Replaced                     Int64
Coordinates                                       object
Ward_ID                                            int32
Approval_Date_Key              

In [1121]:
# drop unnecessary columns from demolition, they are now stored in the fact table
demolition = demolition.drop(columns=['Ward_ID', 'Total Rental Homes for Demolition', 'Total Rental Homes Replaced'])
print(demolition.dtypes)

Demolition_Key                                     int64
IBMS Address                                      object
Address of Existing Rental Building               object
RH File Number                                    object
City Council Approval Date                datetime64[ns]
Link to Staff Report                              object
Type                                              object
Affordable Rental Homes for Demolition             Int64
Mid-Range Rental Homes for Demolition              Int64
High-End Rental Homes for Demolition               Int64
Affordable Rental Homes Replaced                   Int64
Mid-Range Rental Homes Replaced                    Int64
High-End Rental Homes Replaced                     Int64
Coordinates                                       object
Approval_Date_Key                                  int64
dtype: object


In [1122]:
combined_buildingPermits = combined_buildingPermits.drop(columns=['Ward_ID', 'Est_Const_Cost'])
print(combined_buildingPermits.dtypes)

Permit_Key                       int64
Permit_Num                      object
Permit_Type                     object
Structure_Type                  object
Work                            object
Street_Num                      object
Street_Name                     object
Street_Type                     object
Street_Direction                object
Postal                          object
Geo_Id                           Int64
Ward_Grid                       object
Application_Date        datetime64[ns]
Issued_Date             datetime64[ns]
Completed_Date          datetime64[ns]
Status                          object
Description                     object
Address                         object
Coordinates                     object
Application_Date_Key             Int64
Issued_Date_Key                  Int64
Completed_Date_Key               Int64
dtype: object


In [1123]:
# create a mapping from Date_Key to actual Date
date_key_to_date = pd.Series(DateDimension['Date'].values, index=DateDimension['Date_Key']).to_dict()

# calculate the dif in days between Application Date and Issued Date 
development_fact_table['Application_to_Issuance_Duration'] = (
    development_fact_table['Issued_Date_Key'].map(date_key_to_date).sub(
    development_fact_table['Application_Date_Key'].map(date_key_to_date))
).dt.days

print(development_fact_table[:20])


    Permit_Key  Ward_ID  Est_Const_Cost  Application_Date_Key  \
0            1       13         1200000                   740   
1            2       22          650000                  1783   
2            3       11          700000                  2543   
3            4        5         2000000                  2735   
4            5       12         1500000                  2844   
5            6       15          600000                  2920   
6            7        4         2000000                  3186   
7            8        4         2000000                  3186   
8            9        4         2000000                  3186   
9           10        4         2000000                  3186   
10          11        4         2000000                  3186   
11          12        4         2000000                  3186   
12          13        4         2000000                  3186   
13          14       24       190000000                  3229   
14          15       12  

In [1124]:
temp_df = development_fact_table.copy()

# use the mapping to add the year 
temp_df['Year'] = temp_df['Application_Date_Key'].map(date_key_to_date).dt.year

# calculate the average duration by year
avg_duration_by_year = temp_df.groupby('Year')['Application_to_Issuance_Duration'].mean().reset_index(name='Avg_App_to_Issuance_Duration_by_Year')

# merge this average back with the fact table based on the application year 
development_fact_table['Application_Year_Temp'] = development_fact_table['Application_Date_Key'].map(date_key_to_date).dt.year

# merge with year column
development_fact_table = pd.merge(development_fact_table,
                     avg_duration_by_year,
                     left_on='Application_Year_Temp',
                     right_on='Year',
                     how='left')

# drop the temp columns
development_fact_table.drop(['Application_Year_Temp', 'Year'], axis=1, inplace=True)
development_fact_table['Avg_App_to_Issuance_Duration_by_Year'] = development_fact_table['Avg_App_to_Issuance_Duration_by_Year'].round(2)




In [1125]:
# calculate average estimated construction cost by ward
avg_cost_by_ward = development_fact_table.groupby('Ward_ID')['Est_Const_Cost'].mean().reset_index(name='Avg_Est_Const_Cost_by_Ward')

# merge this average cost with the fact table
development_fact_table = pd.merge(
    development_fact_table,
    avg_cost_by_ward,
    on='Ward_ID',
    how='left'
)
print(development_fact_table.head())


   Permit_Key  Ward_ID  Est_Const_Cost  Application_Date_Key  Issued_Date_Key  \
0           1       13         1200000                   740              767   
1           2       22          650000                  1783             1864   
2           3       11          700000                  2543             2560   
3           4        5         2000000                  2735             3909   
4           5       12         1500000                  2844             2889   

   Completed_Date_Key  Application_to_Issuance_Duration  \
0                5959                                27   
1                6283                                81   
2                6214                                17   
3                6198                              1174   
4                5944                                45   

   Avg_App_to_Issuance_Duration_by_Year  Avg_Est_Const_Cost_by_Ward  
0                                202.94                7.795655e+06  
1                 

In [1126]:
temp_df = development_fact_table.copy()

# add the year based on the Application Date
temp_df['Application_Year'] = temp_df['Application_Date_Key'].map(date_key_to_date).dt.year

# average estimated construction cost by year
avg_cost_by_year = temp_df.groupby('Application_Year')['Est_Const_Cost'].mean().reset_index(name='Avg_Est_Const_Cost_by_Year')

development_fact_table['Application_Year'] = development_fact_table['Application_Date_Key'].map(date_key_to_date).dt.year

# merge
development_fact_table = pd.merge(
    development_fact_table,
    avg_cost_by_year,
    on='Application_Year',
    how='left'
)

# drop 'Application_Year' 
development_fact_table.drop(['Application_Year'], axis=1, inplace=True)

# round to two decimal places
development_fact_table['Avg_Est_Const_Cost_by_Year'] = development_fact_table['Avg_Est_Const_Cost_by_Year'].round(2)

# print the results
print(development_fact_table.head())

   Permit_Key  Ward_ID  Est_Const_Cost  Application_Date_Key  Issued_Date_Key  \
0           1       13         1200000                   740              767   
1           2       22          650000                  1783             1864   
2           3       11          700000                  2543             2560   
3           4        5         2000000                  2735             3909   
4           5       12         1500000                  2844             2889   

   Completed_Date_Key  Application_to_Issuance_Duration  \
0                5959                                27   
1                6283                                81   
2                6214                                17   
3                6198                              1174   
4                5944                                45   

   Avg_App_to_Issuance_Duration_by_Year  Avg_Est_Const_Cost_by_Ward  \
0                                202.94                7.795655e+06   
1               

In [1127]:
print(demolition.dtypes)

Demolition_Key                                     int64
IBMS Address                                      object
Address of Existing Rental Building               object
RH File Number                                    object
City Council Approval Date                datetime64[ns]
Link to Staff Report                              object
Type                                              object
Affordable Rental Homes for Demolition             Int64
Mid-Range Rental Homes for Demolition              Int64
High-End Rental Homes for Demolition               Int64
Affordable Rental Homes Replaced                   Int64
Mid-Range Rental Homes Replaced                    Int64
High-End Rental Homes Replaced                     Int64
Coordinates                                       object
Approval_Date_Key                                  int64
dtype: object


In [1128]:

print(demolition_fact_table.dtypes)

Ward_ID                              int32
Demolition_Key                       int64
Approval_Date_Key                    int64
Total Rental Homes for Demolition    int64
Total Rental Homes Replaced          int32
dtype: object


In [1129]:
# merge the demolition fact table with the DateDimension table to add the year
demolition_fact_table = demolition_fact_table.merge(
    DateDimension[['Date_Key', 'Year']],
    left_on='Approval_Date_Key',
    right_on='Date_Key',
    how='left'
)

# add the total demolitions per year 
demolition_fact_table['Total Demolitions Yearly'] = demolition_fact_table.groupby('Year')['Total Rental Homes for Demolition'].transform('sum')

# add the total demolitions per ward 
demolition_fact_table['Total Demolitions Per Ward'] = demolition_fact_table.groupby('Ward_ID')['Total Rental Homes for Demolition'].transform('sum')

# drop not needed columns and change types
demolition_fact_table.drop(columns=['Year', 'Date_Key'], inplace=True)
demolition_fact_table['Approval_Date_Key'] = demolition_fact_table['Approval_Date_Key'].astype('Int64')


# replacement rate fact
demolition_fact_table['Replacement_Rate'] = (
    demolition_fact_table['Total Rental Homes Replaced'] / 
    demolition_fact_table['Total Rental Homes for Demolition']
) * 100

total_demolished_replaced_by_ward = demolition_fact_table.groupby('Ward_ID').agg({
    'Total Rental Homes for Demolition': 'sum',
    'Total Rental Homes Replaced': 'sum'
}).reset_index()

# calculate the replacement rate by ward
total_demolished_replaced_by_ward['Ward_Replacement_Rate'] = (
    total_demolished_replaced_by_ward['Total Rental Homes Replaced'] / 
    total_demolished_replaced_by_ward['Total Rental Homes for Demolition']
) * 100



In [1130]:
# confirm facts added
print(demolition_fact_table.dtypes) 

Ward_ID                                int32
Demolition_Key                         int64
Approval_Date_Key                      Int64
Total Rental Homes for Demolition      int64
Total Rental Homes Replaced            int32
Total Demolitions Yearly               int64
Total Demolitions Per Ward             int64
Replacement_Rate                     float64
dtype: object


In [1131]:
# pushing all updated dimension and fact tables to postgreSQL database

# connection to PostgreSQL database
engine = create_engine("postgresql+psycopg2://postgres:Bucnuoa!@localhost:5432/main")

# load dataframe to database
development_fact_table.to_sql("development_fact_table", engine, if_exists="append", index=False)
combined_buildingPermits.to_sql("BuildingPermitsDimension", engine, if_exists="append", index=False)
DateDimension.to_sql("DateDimension", engine, if_exists="append", index=False)
demolition_fact_table.to_sql("demolition_fact_table", engine, if_exists="append", index=False)
demolition.to_sql("DemolitionDimension", engine, if_exists="append", index=False)

102