## Imports

In [1]:
import pandas as pd
import numpy as np

## Notes:

Data link: https://datahub-miamigis.opendata.arcgis.com/datasets/MiamiGIS::building-permits-since-2014/explore?location=25.782311%2C-80.231660%2C13.00

Date Data pulled: 6.26.23

## PD Set Options

In [2]:
pd.set_option('display.max_columns',None)

## Data read-in

In [3]:
df = pd.read_csv('Building_Permits_Since_2014.csv')

## Clean & Filter Data

In [4]:
## Filter 'ScopeofWork' to:  REMODELING/REPARIS, NEW CONSTRUCTION, ADDITION AND REMODELING
df_filtered = df[(df['ScopeofWork'] == 'REMODELING/REPAIRS') | (df['ScopeofWork'] == 'NEW CONSTRUCTION') | (df['ScopeofWork'] == 'ADDITION AND REMODELING')] 

In [5]:
## Remove expired permits 
df_filtered = df_filtered[df_filtered['BuildingPermitStatusDescription'] != 'Expired']

## Create 3, 5, 10, and all-time reports

In [6]:
# Convert "PlanAcceptedDate"
df_filtered['PlanAcceptedDate'] = pd.to_datetime(df_filtered['PlanAcceptedDate'])

### Create reports

In [7]:
df_three_years = df_filtered[df_filtered['PlanAcceptedDate'] > '2020-06-26']
df_five_years = df_filtered[df_filtered['PlanAcceptedDate'] > '2018-06-26']
df_ten_years = df_filtered[df_filtered['PlanAcceptedDate'] > '2013-06-26']

In [8]:
df_three_years = df_three_years[df_three_years['BuildingPermitStatusDescription'] != 'Revoked']
df_five_years = df_five_years[df_five_years['BuildingPermitStatusDescription'] != 'Revoked']
df_ten_years = df_ten_years[df_ten_years['BuildingPermitStatusDescription'] != 'Revoked']
df_filtered = df_filtered[df_filtered['BuildingPermitStatusDescription'] != 'Revoked']

In [9]:
df_five_years.to_csv('Individual_permits_five_years.csv')

## Analysis

In [10]:
# pd.set_option('display.max_columns',None)
# df_filtered[df_filtered['CompanyName'] == 'STILES MCHUGH LLC']

In [11]:
three_years_grouped = df_three_years.groupby('CompanyName')['TotalCost'].sum().sort_values(ascending=False)
five_years_grouped = df_five_years.groupby('CompanyName')['TotalCost'].sum().sort_values(ascending=False)
ten_years_grouped = df_ten_years.groupby('CompanyName')['TotalCost'].sum().sort_values(ascending=False)
all_years_grouped = df_filtered.groupby('CompanyName')['TotalCost'].sum().sort_values(ascending=False)

In [12]:
df_five_years.columns

Index(['X', 'Y', 'ID', 'AdditionSQFT', 'ApplicationNumber',
       'BuildingFinalLastInspDate', 'BuildingFinalLastInspResult',
       'BuildingPermitStatusDescription', 'BuildingPermitStatusReasonDescr',
       'Certificatecode', 'Certificatedate', 'CompanyAddress', 'CompanyCity',
       'CompanyName', 'CompanyZip', 'DaysInCity', 'DaysInCityNumeric',
       'DeliveryAddress', 'FirstSubmissionDate', 'FolioNumber',
       'IsPermitFinal', 'IsPrivateProvider', 'IssuedDate', 'Latitude',
       'Longitude', 'Miami21Zone', 'NewAdditionCost', 'PermitNumber',
       'PlanAcceptedDate', 'PlanCreatedDate', 'ProcessNumber', 'PropertyType',
       'RemodelingCost', 'RemSQFT', 'RequiredCertificate', 'ScopeofWork',
       'Statusdate', 'TotalCost', 'TotalDaysInPlanReview',
       'TotalDaysInPlanReviewNumeric', 'TotalSQFT', 'WorkItems', 'ObjectId'],
      dtype='object')

In [13]:
x = df_five_years.groupby('CompanyName').agg({'TotalCost':'sum','CompanyName':'count','TotalSQFT':'sum'})

In [14]:
x.sort_values('TotalCost',ascending=False).to_csv('fives_years_totalCost_companyCounts_TotalSQFT.csv')

In [15]:
len(df_five_years[df_five_years['CompanyName'] == 'COASTAL CONSTRUCTION OF MIAMI DADE COUNTY  INC. dba COASTAL CONSTRUCTION OF MIAMIDADE'])

29

In [16]:
three_years_grouped.to_csv('MiamiCityPermits_three_years.csv')
five_years_grouped.to_csv('MiamiCityPermits_five_years.csv')
ten_years_grouped.to_csv('MiamiCityPermits_ten_years.csv')
all_years_grouped.to_csv('MiamiCityPermits_all_years.csv')

## Create CSV files

In [17]:
five_years_grouped = five_years_grouped.to_frame()
five_years_grouped = five_years_grouped.reset_index()
five_years_grouped = five_years_grouped.head(30)
top_25_list = five_years_grouped['CompanyName']

In [18]:
# for company in top_25_list:
#     x = df_five_years[df_five_years['CompanyName'] == company]
#     x.to_csv(f'{company}.csv')

In [19]:
# df_five_years[df_five_years['CompanyName'] == 'CMC CONSTRUCTION COMPANY']

In [21]:
df_five_years['TotalSQFT'].sum()

247253135.0