# Clean 2021 BPS Data

Dataset downloaded from [City of Seattle Open Data](https://data.seattle.gov/Community/2021-Building-Energy-Benchmarking/bfsh-nrm6).

In [1]:
import pandas as pd
import numpy as np

In [3]:
raw_data_2021 = pd.read_csv('2021_Building_Energy_Benchmarking.csv')

To-do:
- Normalize irregular city/state/neighborhood
- Fill in empty/None types
- Normalize column names
- Add legislation building type
- Correct districts using a separate script; we have found that the districts are usually incorrect in past years

In [4]:
raw_data_2021.columns

Index(['OSEBuildingID', 'DataYear', 'BuildingName', 'BuildingType',
       'TaxParcelIdentificationNumber', 'Address', 'City', 'State', 'ZipCode',
       'Latitude', 'Longitude', 'Neighborhood', 'CouncilDistrictCode',
       'YearBuilt', 'NumberofFloors', 'NumberofBuildings', 'PropertyGFATotal',
       'PropertyGFABuilding(s)', 'PropertyGFAParking', 'ENERGYSTARScore',
       'SiteEUIWN(kBtu/sf)', 'SiteEUI(kBtu/sf)', 'SiteEnergyUse(kBtu)',
       'SiteEnergyUseWN(kBtu)', 'SourceEUIWN(kBtu/sf)', 'SourceEUI(kBtu/sf)',
       'EPAPropertyType', 'LargestPropertyUseType',
       'LargestPropertyUseTypeGFA', 'SecondLargestPropertyUseType',
       'SecondLargestPropertyUseTypeGFA', 'ThirdLargestPropertyUseType',
       'ThirdLargestPropertyUseTypeGFA', 'Electricity(kWh)', 'SteamUse(kBtu)',
       'NaturalGas(therms)', 'ComplianceStatus', 'ComplianceIssue',
       'Electricity(kBtu)', 'NaturalGas(kBtu)', 'TotalGHGEmissions',
       'GHGEmissionsIntensity'],
      dtype='object')

## Clean up individual columns

In [5]:
raw_data_2021['City'].unique()

array(['SEATTLE', nan, 'Seattle', 'SEATTLEW'], dtype=object)

In [6]:
raw_data_2021['City'] = 'Seattle'

In [8]:
raw_data_2021['State'].unique()

array(['WA', nan], dtype=object)

In [9]:
raw_data_2021['State'] = 'WA'

## Standardize neighborhood names

In [11]:
raw_data_2021['Neighborhood'].unique()

array(['DOWNTOWN', 'SOUTHEAST', 'NORTHEAST', 'EAST',
       'SHARED: CENTRAL &amp; EAST', 'NORTH', 'MAGNOLIA / QUEEN ANNE',
       'LAKE UNION', 'GREATER DUWAMISH', 'BALLARD', 'NORTHWEST',
       'CENTRAL', 'SOUTHWEST', 'DELRIDGE NEIGHBORHOODS',
       'SHARED: NORTH &amp; NORTHWEST',
       'SHARED: GREATER DUWAMISH &amp; DELRIDGE NEIGHBORHOODS',
       'SHARED: BALLARD &amp; LAKE UNION',
       'SHARED: LAKE UNION &amp; NORTHWEST',
       'SHARED: BALLARD &amp; NORTHWEST', 'water', nan], dtype=object)

In [13]:
raw_data_2021['Neighborhood'] = raw_data_2021['Neighborhood'].str.replace('&amp;', '&')

Update buildings listed as in the "water" neighborhood, because that's not an actual neighborhood.

Method: search GoogleMaps for the address, match with neighborhood names in the rest of the dataset.

In [16]:
raw_data_2021[raw_data_2021['Neighborhood'] == 'water']

Unnamed: 0,OSEBuildingID,DataYear,BuildingName,BuildingType,TaxParcelIdentificationNumber,Address,City,State,ZipCode,Latitude,...,ThirdLargestPropertyUseTypeGFA,Electricity(kWh),SteamUse(kBtu),NaturalGas(therms),ComplianceStatus,ComplianceIssue,Electricity(kBtu),NaturalGas(kBtu),TotalGHGEmissions,GHGEmissionsIntensity
2341,25464,2021,WATERWORKS OFFICE & MARINA,NonResidential,4088803975,1818 WESTLAKE AVE N,Seattle,WA,98109.0,47.63575,...,,626458.0,0.0,18467.0,Compliant,No Issue,2137475,1846660,106.9,3.5
2370,25563,2021,NAUTICAL LANDING,NonResidential,4088804350,2500 WESTLAKE AVE N,Seattle,WA,98109.0,47.64306,...,,566342.0,0.0,10455.0,Not Compliant,Default Data,1932358,1045460,63.5,2.2
2528,26138,2021,UNION HARBOR CONDOMINIUM,Multifamily MR (5-9),8807200000,2301 FAIRVIEW AVE E,Seattle,WA,98102.0,47.6401,...,0.0,417072.0,0.0,0.0,Compliant,No Issue,1423050,0,5.9,0.1
2988,27951,2021,THE PIER AT LESCHI,Multifamily LR (1-4),6780900000,334 LAKESIDE AVE S,Seattle,WA,98144.0,47.59926,...,,202872.0,0.0,0.0,Compliant,No Issue,692199,0,2.9,0.1


In [28]:
raw_data_2021.loc[raw_data_2021['OSEBuildingID'] == 25464, 'Neighborhood'] = 'MAGNOLIA / QUEEN ANNE'

In [30]:
raw_data_2021.loc[raw_data_2021['OSEBuildingID'] == 25464]['Neighborhood']

2341    MAGNOLIA / QUEEN ANNE
Name: Neighborhood, dtype: object

In [31]:
raw_data_2021.loc[raw_data_2021['OSEBuildingID'] == 25563, 'Neighborhood'] = 'MAGNOLIA / QUEEN ANNE'

In [32]:
raw_data_2021.loc[raw_data_2021['OSEBuildingID'] == 26138, 'Neighborhood'] = 'EAST'

In [33]:
raw_data_2021.loc[raw_data_2021['OSEBuildingID'] == 26138, 'Neighborhood'] = 'SHARED: CENTRAL & EAST'

## Fill in missing data

In [34]:
raw_data_2021['ComplianceStatus'].unique()

array(['Compliant', 'Not Compliant', 'Contact the Help Desk', nan],
      dtype=object)

In [36]:
raw_data_2021['ComplianceStatus'] = raw_data_2021['ComplianceStatus'].replace(np.nan, '')

In [37]:
raw_data_2021['ComplianceStatus'].unique()

array(['Compliant', 'Not Compliant', 'Contact the Help Desk', ''],
      dtype=object)

In [35]:
raw_data_2021['ComplianceIssue'].unique()

array(['No Issue', 'Missing 2021 EUI or Electricity Data',
       'Account Requires Verification',
       'Portfolio Manager Account Not Shared', 'Default Data',
       'Unknown - Contact the Help Desk', nan], dtype=object)

In [38]:
raw_data_2021['ComplianceIssue'] = raw_data_2021['ComplianceIssue'].replace(np.nan, '')

In [42]:
# Check that years are valid

print(raw_data_2021['YearBuilt'].dtype)
print(raw_data_2021['YearBuilt'].min())
print(raw_data_2021['YearBuilt'].max())

int64
1896
2021


In [43]:
# Check council districts

raw_data_2021['CouncilDistrictCode'].unique()

array([ 1.,  7.,  3.,  4.,  2.,  6.,  5., nan])

In [46]:
# Numbers are correct, but misformatted

raw_data_2021['CouncilDistrictCode'].dtype

dtype('float64')

In [51]:
# Convert NaN to 0; we will update districts in a different script

raw_data_2021['CouncilDistrictCode'] = raw_data_2021['CouncilDistrictCode'].replace(np.nan, 0)

In [52]:
raw_data_2021 = raw_data_2021.astype({'CouncilDistrictCode': 'int64'})

In [55]:
# Replace NaN with 0

raw_data_2021[['NumberofFloors', 'NumberofBuildings', 'PropertyGFATotal','PropertyGFABuilding(s)', 'PropertyGFAParking', 'SiteEnergyUse(kBtu)','SiteEnergyUseWN(kBtu)', 'LargestPropertyUseTypeGFA', 'SecondLargestPropertyUseTypeGFA','ThirdLargestPropertyUseTypeGFA', 'Electricity(kWh)', 'SteamUse(kBtu)','NaturalGas(therms)']] = raw_data_2021[['NumberofFloors', 'NumberofBuildings', 'PropertyGFATotal','PropertyGFABuilding(s)', 'PropertyGFAParking', 'SiteEnergyUse(kBtu)','SiteEnergyUseWN(kBtu)', 'LargestPropertyUseTypeGFA', 'SecondLargestPropertyUseTypeGFA','ThirdLargestPropertyUseTypeGFA', 'Electricity(kWh)', 'SteamUse(kBtu)','NaturalGas(therms)']].replace(np.nan, 0)

In [62]:
### Check property types

raw_data_2021['EPAPropertyType'].unique()

array(['Hotel', 'Police Station', 'Other - Entertainment/Public Assembly',
       'Multifamily MR (5-9)', nan, 'Library',
       'Fitness Center/Health Club/Gym', 'Mixed Use Property',
       'Courthouse', 'Prison/Incarceration', 'K-12 School',
       'College/University', 'Office', 'Self-Storage Facility',
       'Other - Mall', 'Parking', 'Medical Office', 'Other',
       'Social/Meeting Hall', 'Performing Arts', 'Data Center',
       'Supermarket/Grocery Store', 'Multifamily Housing',
       'Hospital (General Medical & Surgical)', 'Fire Station', 'Museum',
       'Worship Facility', 'Other - Lodging/Residential',
       'Multifamily LR (1-4)', 'Non-Refrigerated Warehouse',
       'Retail Store', 'Financial Office',
       'Manufacturing/Industrial Plant', 'Other - Utility',
       'Residence Hall/Dormitory', 'Laboratory', 'Convention Center',
       'Outpatient Rehabilitation/Physical Therapy',
       'Distribution Center', 'Other/Specialty Hospital',
       'Other - Services', 'St

In [70]:
raw_data_2021['EPAPropertyType'] = raw_data_2021['EPAPropertyType'].replace(np.nan, 'Other')

In [73]:
all_property_types = raw_data_2021['EPAPropertyType'].unique()
all_property_types.sort()

print(all_property_types)

['Adult Education' 'Automobile Dealership' 'Bank Branch'
 'College/University' 'Convenience Store without Gas Station'
 'Convention Center' 'Courthouse' 'Data Center' 'Distribution Center'
 'Enclosed Mall' 'Energy/Power Station' 'Financial Office' 'Fire Station'
 'Fitness Center/Health Club/Gym' 'Food Service'
 'Hospital (General Medical & Surgical)' 'Hotel' 'K-12 School'
 'Laboratory' 'Library' 'Mailing Center/Post Office'
 'Manufacturing/Industrial Plant' 'Medical Office' 'Mixed Use Property'
 'Movie Theater' 'Multifamily HR (10+)' 'Multifamily Housing'
 'Multifamily LR (1-4)' 'Multifamily MR (5-9)' 'Museum'
 'Non-Refrigerated Warehouse' 'Office' 'Other' 'Other - Education'
 'Other - Entertainment/Public Assembly' 'Other - Lodging/Residential'
 'Other - Mall' 'Other - Public Services' 'Other - Recreation'
 'Other - Restaurant/Bar' 'Other - Services' 'Other - Utility'
 'Other/Specialty Hospital' 'Outpatient Rehabilitation/Physical Therapy'
 'Parking' 'Performing Arts'
 'Personal Servi

In [77]:
raw_data_2021[['LargestPropertyUseType','SecondLargestPropertyUseType','ThirdLargestPropertyUseType']] = raw_data_2021[['LargestPropertyUseType','SecondLargestPropertyUseType','ThirdLargestPropertyUseType']].replace(np.nan, 'Other')

Find irregular property use types:

In [95]:
irreg_prop_uses = set(np.unique(raw_data_2021[['LargestPropertyUseType','SecondLargestPropertyUseType','ThirdLargestPropertyUseType']].values)) - set(raw_data_2021['LargestPropertyUseType'])
irreg_prop_uses

{'Bar/Nightclub',
 'Barracks',
 'Fast Food Restaurant',
 'Food Sales',
 'Other - Technology/Science',
 'Swimming Pool',
 'Veterinary Office',
 'Vocational School'}

In [98]:
raw_data_2021.loc[(raw_data_2021['LargestPropertyUseType'] == 'Other - Technology/Science') | (raw_data_2021['SecondLargestPropertyUseType'] == 'Other - Technology/Science') | (raw_data_2021['ThirdLargestPropertyUseType'] == 'Other - Technology/Science')]

Unnamed: 0,OSEBuildingID,DataYear,BuildingName,BuildingType,TaxParcelIdentificationNumber,Address,City,State,ZipCode,Latitude,...,ThirdLargestPropertyUseTypeGFA,Electricity(kWh),SteamUse(kBtu),NaturalGas(therms),ComplianceStatus,ComplianceIssue,Electricity(kBtu),NaturalGas(kBtu),TotalGHGEmissions,GHGEmissionsIntensity
308,456,2021,1100 DEXTER,NonResidential,2249500235,1100 DEXTER AVE N,Seattle,WA,98109.0,47.62929,...,828.0,1202508.0,0.0,0.0,Compliant,No Issue,4102957,0,17.0,0.1


According to Google Maps, this is an office building.

In [100]:
property_type_mappings = {
    'Bar/Nightclub': 'Other - Restaurant/Bar',
     'Barracks': 'Other - Lodging/Residential',
     'Fast Food Restaurant': 'Restaurant',
     'Food Sales': 'Food Service',
     'Other - Technology/Science': 'Office',
     'Swimming Pool': 'Fitness Center/Health Club/Gym',
     'Veterinary Office': 'Other - Services',
     'Vocational School': 'Adult Education'
}

raw_data_2021[['LargestPropertyUseType','SecondLargestPropertyUseType','ThirdLargestPropertyUseType']] = raw_data_2021[['LargestPropertyUseType','SecondLargestPropertyUseType','ThirdLargestPropertyUseType']].replace(property_type_mappings)

## Legislation building type

These building types come from the proposed BPS legislation; the mapping is the same one used for the 2020 data, as documented [here](https://docs.google.com/spreadsheets/d/186L8lxdeRxiTjsdRWSYdr7DcOjVbcZEtVV4gt2OcgwI/edit?usp=sharing).

In [104]:
epa_building_types = '''Adult Education
Automobile Dealership
Bank Branch
College/University
Convenience Store without Gas Station
Convention Center
Courthouse
Data Center
Distribution Center
Enclosed Mall
Energy/Power Station
Financial Office
Fire Station
Fitness Center/Health Club/Gym
Food Service
Hospital (General Medical & Surgical)
Hotel
K-12 School
Laboratory
Library
Lifestyle Center
Mailing Center/Post Office
Manufacturing/Industrial Plant
Medical Office
Mixed Use Property
Movie Theater
Multifamily Housing
Multifamily HR (10+)
Multifamily LR (1-4)
Multifamily MR (5-9)
Museum
Non-Refrigerated Warehouse
Office
Other
Other - Education
Other - Entertainment/Public Assembly
Other - Lodging/Residential
Other - Mall
Other - Public Services
Other - Recreation
Other - Services
Other - Utility
Other/Specialty Hospital
Outpatient Rehabilitation/Physical Therapy
Parking
Performing Arts
Personal Services (Health/Beauty, Dry Cleaning, etc)
Police Station
Pre-school/Daycare
Refrigerated Warehouse
Repair Services (Vehicle, Shoe, Locksmith, etc)
Residence Hall/Dormitory
Residential Care Facility
Restaurant
Retail Store
Self-Storage Facility
Social/Meeting Hall
Stadium (Open)
Strip Mall
Supermarket/Grocery Store
Transportation Terminal/Station
Unclassified
Urgent Care/Clinic/Other Outpatient
Wholesale Club/Supercenter
Worship Facility
Prison/Incarceration'''

legislation_building_types = '''Other
Retail Store
Services
College/University
Retail Store
Entertainment/Public Assembly
Services
Other
Other
Mixed Use Property
Other
Office
Fire/Police Station
Recreation
Services
Hospital
Hotel
K-12 School
Laboratory
Services
Entertainment/Public Assembly
Services
Other
Other
Mixed Use Property
Entertainment/Public Assembly
Multifamily Housing
Multifamily Housing
Multifamily Housing
Multifamily Housing
Entertainment/Public Assembly
Other
Office
Other
Other
Entertainment/Public Assembly
Other
Entertainment/Public Assembly
Services
Recreation
Services
Other
Hospital
Services
Other
Entertainment/Public Assembly
Services
Fire/Police Station
Other
Refrigerated Warehouse
Services
College/University
Senior Living Community
Restaurant
Retail Store
Self-Storage Facility
Entertainment/Public Assembly
Entertainment/Public Assembly
Mixed Use Property
Supermarket/Grocery Store
Services
Other
Services
Refrigerated Warehouse
Worship Facility
Other'''

leg_building_type_mappings = dict(zip(epa_building_types.split('\n'), legislation_building_types.split('\n')))

In [112]:
raw_data_2021['LegislationPropertyType'] = raw_data_2021['EPAPropertyType'].map(leg_building_type_mappings)

In [114]:
raw_data_2021['LegislationPropertyType']

0                     Hotel
1                     Hotel
2                     Hotel
3                     Hotel
4                     Hotel
               ...         
3660    Multifamily Housing
3661            K-12 School
3662    Multifamily Housing
3663                  Other
3664                  Other
Name: LegislationPropertyType, Length: 3665, dtype: object

## Fix column names

In [115]:
col_names_2020 = pd.read_csv('2020_Building_Energy_Benchmarking.csv').columns

In [116]:
col_names_2020

Index(['OSEBuildingID', 'TaxParcelIdentificationNumber', 'DataYear',
       'BuildingType', 'BuildingName', 'Owner', 'CouncilDistrictCode',
       'Neighborhood', 'Units', 'YearBuilt', 'Address', 'City', 'State',
       'ZipCode', 'Latitude', 'Longitude', 'PrimaryPropertyType',
       'ListOfAllPropertyUseTypes', 'EPAPropertyType',
       'TotalGHGEmissions__metric_tons_', 'ENERGYSTARScore',
       'YearsENERGYSTARCertified', 'SiteEUI_kBtu_sf_', 'SiteEUIWN_kBtu_sf_',
       'SiteEnergyUse_kBtu_', 'SiteEnergyUseWN_kBtu_', 'SourceEUI_kBtu_sf_',
       'SourceEUIWN_kBtu_sf_', 'GHGEmissionsIntensityKgSqFt',
       'NaturalGas_kBtu_', 'NaturalGas_therms_', 'Electricity_kBtu_',
       'Electricity_kWh_', 'SteamUse_kBtu_', 'OtherFuelUse_kBtu_',
       'PropertyGFATotal', 'PropertyGFABuilding_s_', 'PropertyGFAParking',
       'NumberofBuildings', 'NumberofFloors', 'LargestPropertyUseType',
       'LargestPropertyUseTypeGFA', 'SecondLargestPropertyUseType',
       'SecondLargestPropertyUseTypeG

In [118]:
# update column names

new_cols = ['OSEBuildingID', 'DataYear', 'BuildingName', 'BuildingType',
       'TaxParcelIdentificationNumber', 'Address', 'City', 'State', 'ZipCode',
       'Latitude', 'Longitude', 'Neighborhood', 'CouncilDistrictCode',
       'YearBuilt', 'NumberofFloors', 'NumberofBuildings', 'PropertyGFATotal',
       'PropertyGFABuilding_s_', 'PropertyGFAParking', 'ENERGYSTARScore',
       'SiteEUIWN_kBtu_sf_', 'SiteEUI_kBtu_sf_', 'SiteEnergyUse_kBtu_',
       'SiteEnergyUseWN_kBtu_', 'SourceEUIWN_kBtu_sf_', 'SourceEUI_kBtu_sf_',
       'EPAPropertyType', 'LargestPropertyUseType',
       'LargestPropertyUseTypeGFA', 'SecondLargestPropertyUseType',
       'SecondLargestPropertyUseTypeGFA', 'ThirdLargestPropertyUseType',
       'ThirdLargestPropertyUseTypeGFA', 'Electricity_kWh_', 'SteamUse_kBtu_',
       'NaturalGas_therms_', 'ComplianceStatus', 'ComplianceIssue',
       'Electricity_kBtu_', 'NaturalGas_kBtu_', 'TotalGHGEmissions__metric_tons_',
       'GHGEmissionsIntensityKgSqFt', 'LegislationPropertyType']

raw_data_2021.columns = new_cols

raw_data_2021

Unnamed: 0,OSEBuildingID,DataYear,BuildingName,BuildingType,TaxParcelIdentificationNumber,Address,City,State,ZipCode,Latitude,...,Electricity_kWh_,SteamUse_kBtu_,NaturalGas_therms_,ComplianceStatus,ComplianceIssue,Electricity_kBtu_,NaturalGas_kBtu_,TotalGHGEmissions__metric_tons_,GHGEmissionsIntensityKgSqFt,LegislationPropertyType
0,1,2021,MAYFLOWER PARK HOTEL,NonResidential,659000030,405 OLIVE WAY,Seattle,WA,98101.0,47.61220,...,944955.0,1798672.0,14876.0,Compliant,No Issue,3224187,1487620,241.6,2.7,Hotel
1,2,2021,PARAMOUNT HOTEL,NonResidential,659000220,724 PINE ST,Seattle,WA,98101.0,47.61307,...,657478.0,0.0,23738.0,Compliant,No Issue,2243315,2373790,135.4,1.5,Hotel
2,3,2021,WESTIN HOTEL (Parent Building),NonResidential,659000475,1900 5TH AVE,Seattle,WA,98101.0,47.61367,...,8673722.0,10583473.0,37750.0,Compliant,No Issue,29594739,3775000,1201.4,1.6,Hotel
3,5,2021,HOTEL MAX,NonResidential,659000640,620 STEWART ST,Seattle,WA,98101.0,47.61412,...,509497.0,1167770.0,19676.0,Compliant,No Issue,1738403,1967580,208.6,3.4,Hotel
4,8,2021,WARWICK SEATTLE HOTEL,NonResidential,659000970,401 LENORA ST,Seattle,WA,98121.0,47.61375,...,1333597.0,0.0,68087.0,Compliant,No Issue,4550233,6808700,380.4,3.3,Hotel
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3660,50777,2021,CLAY APARTMENTS (LIHI),Multifamily MR (5-9),6848700065,602 E HOWELL ST,Seattle,WA,98122.0,47.61784,...,222388.0,0.0,3158.0,Compliant,No Issue,758789,315750,19.9,0.6,Multifamily Housing
3661,50781,2021,WEBSTER SCHOOL - NEW & REMODELED (SPS-DISTRICT),SPS-District K-12,3693901110,3015 NW 68TH ST,Seattle,WA,98117.0,47.67767,...,219761.0,0.0,11841.0,Compliant,No Issue,749824,1184120,66.0,1.3,K-12 School
3662,50858,2021,Jackson St. Apartments,Multifamily LR (1-4),1250200425,2524 S JACKSON ST,Seattle,WA,98144.0,47.59943,...,350076.0,0.0,11375.0,Compliant,No Issue,1194459,1137500,65.4,1.5,Multifamily Housing
3663,50871,2021,REGENCY CENTERS PL,NonResidential,2768303245,1451 NW 46TH ST,Seattle,WA,98107.0,47.66188,...,0.0,0.0,0.0,Not Compliant,Portfolio Manager Account Not Shared,0,0,0.0,0.0,Other


In [121]:
reordered_cols = [x for x in col_names_2020 if x in new_cols]

In [122]:
cleaned_2021_data = raw_data_2021[reordered_cols]

cleaned_2021_data.head()

Unnamed: 0,OSEBuildingID,TaxParcelIdentificationNumber,DataYear,BuildingType,BuildingName,CouncilDistrictCode,Neighborhood,YearBuilt,Address,City,...,NumberofFloors,LargestPropertyUseType,LargestPropertyUseTypeGFA,SecondLargestPropertyUseType,SecondLargestPropertyUseTypeGFA,ThirdLargestPropertyUseType,ThirdLargestPropertyUseTypeGFA,ComplianceIssue,ComplianceStatus,LegislationPropertyType
0,1,659000030,2021,NonResidential,MAYFLOWER PARK HOTEL,1,DOWNTOWN,1927,405 OLIVE WAY,Seattle,...,12,Hotel,88434.0,Other,0.0,Other,0.0,No Issue,Compliant,Hotel
1,2,659000220,2021,NonResidential,PARAMOUNT HOTEL,1,DOWNTOWN,1996,724 PINE ST,Seattle,...,11,Hotel,88502.0,Parking,15064.0,Other,0.0,No Issue,Compliant,Hotel
2,3,659000475,2021,NonResidential,WESTIN HOTEL (Parent Building),1,DOWNTOWN,1969,1900 5TH AVE,Seattle,...,41,Hotel,945349.0,Parking,117783.0,Fitness Center/Health Club/Gym,0.0,No Issue,Compliant,Hotel
3,5,659000640,2021,NonResidential,HOTEL MAX,1,DOWNTOWN,1926,620 STEWART ST,Seattle,...,10,Hotel,61320.0,Other,0.0,Other,0.0,No Issue,Compliant,Hotel
4,8,659000970,2021,NonResidential,WARWICK SEATTLE HOTEL,1,DOWNTOWN,1980,401 LENORA ST,Seattle,...,18,Hotel,123445.0,Parking,68009.0,Fitness Center/Health Club/Gym,0.0,No Issue,Compliant,Hotel


In [123]:
cleaned_2021_data.to_csv('cleaned_2021_data.csv')