# 1. Readnig csv files

Our data come from the Seattle Open Data website (https://data.seattle.gov/). Unfortunately, they have no united structure. That's why we have to pre-process each year separately. Also, our source page is missing 2018 data, so we have a gap there.  
Problems we found out:
- different data types in one column (e.g flota, int)
- missing key data
- some unnecessary columns (e.g comments)
- incomprehensible way of calculating surface (there are columns that contain 1st, 2nd, 3rd property surface, but often their sum is not equal to the overall surface)
- different columns names
- mixed up columns order

Functions below are made to process each year separately and exclude all discrepancies  

In [22]:
import pandas as pd
import ast

def convert_columns(df, columns, new_type):
    for column in columns:
        df[column] = df[column].astype(new_type)
        

def drop_columns(df, columns_to_drop):
    df.drop(columns_to_drop, axis=1, inplace=True)
    
    # delete Nan and zeros 
    to_zero = df.loc[:, 'SiteEUI(kBtu/sf)':'NaturalGas(kBtu)'].columns.tolist()
    df[to_zero] = df[to_zero].fillna(0)
    df = df[df['SiteEnergyUse(kBtu)'] != 0]
    return df

def convert(df, convert_to_float, convert_to_int):
    convert_columns(df=df, columns=convert_to_float, new_type=float)
    convert_columns(df=df, columns=convert_to_int, new_type=int)
    return df

def rename(df, columns_to_rename):
    df.rename(columns=columns_to_rename, inplace=True)
    return df

def sort_columns(df, columns_order):
    df = df[columns_order]
    return df

fixed_columns_order =[
    'OSEBuildingID',
    'DataYear',
    'BuildingType',
    'PrimaryPropertyType',
    'PropertyName',
    'Address',
    'ZipCode',
    'CouncilDistrictCode',
    'Neighborhood',
    'Latitude',
    'Longitude',
    'YearBuilt',
    'NumberofFloors',
    'NumberofBuildings',
    'PropertyGFATotal',
    'PropertyGFAParking',
    'YearsENERGYSTARCertified',
    'ENERGYSTARScore',
    'SiteEUI(kBtu/sf)',
    'SiteEUIWN(kBtu/sf)',
    'SourceEUI(kBtu/sf)',
    'SourceEUIWN(kBtu/sf)',
    'SiteEnergyUse(kBtu)',
    'SiteEnergyUseWN(kBtu)',
    'SteamUse(kBtu)',
    'Electricity(kBtu)',
    'Electricity(kWh)',
    'NaturalGas(therms)',
    'NaturalGas(kBtu)',
    'GHGEmissions(MetricTonsCO2e)',
    'GHGEmissionsIntensity(kgCO2e/ft2)'
]

## 2015

### Extarcting address data
We need to extract latitude, longitude, address and zip from this dict-like column

In [9]:
path_15 = 'data/raw/2015_Building_Energy_Benchmarking.csv'
sea_2015 = pd.read_csv(path_15)

print(sea_2015['Location'][0])

{'latitude': '47.61219025', 'longitude': '-122.33799744', 'human_address': '{"address": "405 OLIVE WAY", "city": "SEATTLE", "state": "WA", "zip": "98101"}'}


In [10]:
# creating dictionary from a string in "Location" column
sea_2015['Location'] = sea_2015['Location'].apply(ast.literal_eval)
sea_2015['Location'][0]

{'latitude': '47.61219025',
 'longitude': '-122.33799744',
 'human_address': '{"address": "405 OLIVE WAY", "city": "SEATTLE", "state": "WA", "zip": "98101"}'}

In [11]:
# creating 'latitude' and 'longitude' columns from the dictionary
location = pd.json_normalize(sea_2015['Location'])
location.head()

Unnamed: 0,latitude,longitude,human_address
0,47.61219025,-122.33799744,"{""address"": ""405 OLIVE WAY"", ""city"": ""SEATTLE"", ""state"": ""WA"", ""zip"": ""98101""}"
1,47.61310583,-122.33335756,"{""address"": ""724 PINE ST"", ""city"": ""SEATTLE"", ""state"": ""WA"", ""zip"": ""98101""}"
2,47.61334897,-122.33769944,"{""address"": ""1900 5TH AVE"", ""city"": ""SEATTLE"", ""state"": ""WA"", ""zip"": ""98101""}"
3,47.61421585,-122.33660889,"{""address"": ""620 STEWART ST"", ""city"": ""SEATTLE"", ""state"": ""WA"", ""zip"": ""98101""}"
4,47.6137544,-122.3409238,"{""address"": ""401 LENORA ST"", ""city"": ""SEATTLE"", ""state"": ""WA"", ""zip"": ""98121""}"


In [12]:
# do the same operations for "Human_address" column
location['human_address'] = location['human_address'].apply(ast.literal_eval)
address = pd.json_normalize(location['human_address'])
address.head()

Unnamed: 0,address,city,state,zip
0,405 OLIVE WAY,SEATTLE,WA,98101
1,724 PINE ST,SEATTLE,WA,98101
2,1900 5TH AVE,SEATTLE,WA,98101
3,620 STEWART ST,SEATTLE,WA,98101
4,401 LENORA ST,SEATTLE,WA,98121


In [14]:
# inserting new columns in a fixed order
sea_2015.insert(loc=5, column='Address', value=address['address'])
sea_2015.insert(loc=6, column='ZipCode', value=address['zip'])
sea_2015.insert(loc=9, column='Latitude', value=location['latitude'])
sea_2015.insert(loc=10, column='Longitude', value=location['longitude'])

columns_to_drop_15 =  [
    'TaxParcelIdentificationNumber',
    'Location',
    'PropertyGFABuilding(s)',
    'ListOfAllPropertyUseTypes',
    'LargestPropertyUseType', 
    'LargestPropertyUseTypeGFA', 
    'SecondLargestPropertyUseType',
    'SecondLargestPropertyUseTypeGFA', 
    'ThirdLargestPropertyUseType',
    'ThirdLargestPropertyUseTypeGFA', 
    'OtherFuelUse(kBtu)',
    'DefaultData',
    'Comment',
    'ComplianceStatus',
    'Outlier',
    '2010 Census Tracts',
    'Seattle Police Department Micro Community Policing Plan Areas',
    'City Council Districts',
    'SPD Beats',
    'Zip Codes'    
]

convert_to_float_15 = [
    'Latitude', 
    'Longitude',
    'ZipCode'
]

convert_to_int_15 = [
    'SiteEnergyUse(kBtu)',
    'SiteEnergyUseWN(kBtu)',
    'SteamUse(kBtu)',
    'Electricity(kBtu)',
    'Electricity(kWh)',
    'NaturalGas(kBtu)',
    'NaturalGas(therms)'
]

sea_2015 = drop_columns(sea_2015, columns_to_drop_15)
sea_2015 = convert(sea_2015, convert_to_float_15, convert_to_int_15)
sea_2015 = sort_columns(sea_2015, columns_order)

sea_2015.to_csv('data/clean/2015-building-energy-benchmarking-clean.csv', index=False)

## 2016

In [None]:
path_16 = 'data/raw/2016_Building_Energy_Benchmarking.csv'

columns_to_drop_16 = [
    'City', 
    'State', 
    'TaxParcelIdentificationNumber',
    'PropertyGFABuilding(s)', 
    'ListOfAllPropertyUseTypes',
    'LargestPropertyUseType', 
    'LargestPropertyUseTypeGFA', 
    'SecondLargestPropertyUseType',
    'SecondLargestPropertyUseTypeGFA', 
    'ThirdLargestPropertyUseType',
    'ThirdLargestPropertyUseTypeGFA', 
    'DefaultData',
    'Comments',
    'Outlier',
    'ComplianceStatus'
]

convert_to_float_16 = [
    'Latitude', 
    'Longitude',
    'ZipCode'
]

convert_to_int_16 = [
    'SiteEnergyUse(kBtu)',
    'SiteEnergyUseWN(kBtu)',
    'Electricity(kBtu)',
    'NaturalGas(kBtu)'
]

columns_to_rename_16 = {
    'TotalGHGEmissions': 'GHGEmissions(MetricTonsCO2e)',
    'GHGEmissionsIntensity': 'GHGEmissionsIntensity(kgCO2e/ft2)'
}

sea_2016 = pd.read_csv(path_16)
sea_2016 = drop_columns(sea_2016, columns_to_drop_16)
sea_2016 = convert(sea_2016, convert_to_float_16, convert_to_int_16)
sea_2016 = rename(sea_2016, columns_to_rename_16)
sea_2016 = sort_columns(sea_2016, columns_order)

sea_2016.to_csv('data/clean/2016-building-energy-benchmarking-clean.csv', index=False)

## 2017

In [None]:
sea_2017 = pd.read_csv('2017_Building_Energy_Benchmarking.csv')

path_17 = 'data/raw/2017_Building_Energy_Benchmarking.csv'

columns_to_drop_17 = [
    City', 
    'State', 
    'TaxParcelIdentificationNumber',
    'PropertyGFABuilding(s)', 
    'ListOfAllPropertyUseTypes',
    'LargestPropertyUseType', 
    'LargestPropertyUseTypeGFA', 
    'SecondLargestPropertyUseType',
    'SecondLargestPropertyUseTypeGFA', 
    'ThirdLargestPropertyUseType',
    'ThirdLargestPropertyUseTypeGFA', 
    'DefaultData',
    'Outlier',
    'ComplianceStatus'
]

convert_to_float_17 = [
    'Latitude', 
    'Longitude',
    'ZipCode'
]

convert_to_int_17 = [
    'SiteEnergyUse(kBtu)',
    'SiteEnergyUseWN(kBtu)',
    'Electricity(kBtu)',
    'NaturalGas(kBtu)'
]

columns_to_rename_17 = {
    'TotalGHGEmissions': 'GHGEmissions(MetricTonsCO2e)',
    'GHGEmissionsIntensity': 'GHGEmissionsIntensity(kgCO2e/ft2)'
}

sea_2017 = pd.read_csv(path_17)
sea_2017 = drop_columns(sea_2017, columns_to_drop_17)
sea_2017 = convert(sea_2017, convert_to_float_16, convert_to_int_17)
sea_2017 = rename(sea_2017, columns_to_rename_17)
sea_2017 = sort_columns(sea_2017, columns_order)

sea_2017.to_csv('data/clean/2017-building-energy-benchmarking-clean.csv', index=False)

# 2019

In [5]:
path_19 = 'data/raw/2019_Building_Energy_Benchmarking.csv'

columns_to_drop_19 = [
    'TaxParcelIdentificationNumber',
    'City', 
    'State', 
    'PropertyGFABuilding(s)', 
    'LargestPropertyUseType', 
    'LargestPropertyUseTypeGFA', 
    'SecondLargestPropertyUseType',
    'SecondLargestPropertyUseTypeGFA', 
    'ThirdLargestPropertyUseType',
    'ThirdLargestPropertyUseTypeGFA', 
    'EPAPropertyType',
    'ComplianceIssue',
    'ComplianceStatus'
]

convert_to_float_19 = [
    'Latitude', 
    'Longitude',
    'ZipCode'
]

convert_to_int_19 = [
    'SiteEnergyUse(kBtu)',
    'SiteEnergyUseWN(kBtu)',
    'Electricity(kBtu)',
    'NaturalGas(kBtu)'
]

columns_to_rename_19 = {
    'BuildingName': 'PropertyName',
    'TotalGHGEmissions': 'GHGEmissions(MetricTonsCO2e)',
    'GHGEmissionsIntensity': 'GHGEmissionsIntensity(kgCO2e/ft2)'
}

columns_to_add_19 = [
    'NumberofBuildings',
    'YearsENERGYSTARCertified',
]

sea_2019 = pd.read_csv(path_19)
sea_2019 = drop_columns(sea_2019, columns_to_drop_19)
sea_2019 = convert(sea_2019, convert_to_float_19, convert_to_int_19)
sea_2019 = rename(sea_2019, columns_to_rename_19)
sea_2019[[columns_to_add_19]] = None # adding missing columns with no data inside
sea_2019 = sort_columns(sea_2019, columns_order)

sea_2019.to_csv('data/clean/2019-building-energy-benchmarking-clean.csv', index=False)

# 2020

In [3]:
# fixed order of columns for all data is saved in columns_order.py

path_20 = 'data/raw/2020_Building_Energy_Benchmarking.csv'

columns_to_drop_20 = [
    'TaxParcelIdentificationNumber',
    'City', 
    'State', 
    'PropertyGFABuilding(s)', 
    'LargestPropertyUseType', 
    'LargestPropertyUseTypeGFA', 
    'SecondLargestPropertyUseType',
    'SecondLargestPropertyUseTypeGFA', 
    'ThirdLargestPropertyUseType',
    'ThirdLargestPropertyUseTypeGFA', 
    'EPAPropertyType',
    'ComplianceIssue',
    'ComplianceStatus'
]

convert_to_float_20 = [
    'Latitude', 
    'Longitude',
    'ZipCode'
]

convert_to_int_20 = [
    'SiteEnergyUse(kBtu)',
    'SiteEnergyUseWN(kBtu)',
    'Electricity(kBtu)',
    'NaturalGas(kBtu)'
]

columns_to_rename_20 = {
    'BuildingName': 'PropertyName',
    'TotalGHGEmissions': 'GHGEmissions(MetricTonsCO2e)',
    'GHGEmissionsIntensity': 'GHGEmissionsIntensity(kgCO2e/ft2)'
}

columns_to_add_20 = [
    'YearsENERGYSTARCertified',
    'PrimaryPropertyType'
]

sea_2020 = pd.read_csv(path_20)
sea_2020 = drop_columns(sea_2020, columns_to_drop_20)
sea_2020 = convert(sea_2020, convert_to_float_20, convert_to_int_20)
sea_2020 = rename(sea_2020, columns_to_rename_20)
sea_2020[[columns_to_add_20]] = None # adding missing columns with no data inside
sea_2020 = sort_columns(sea_2020, columns_order)

sea_2020.to_csv('data/clean/2020-building-energy-benchmarking-clean.csv', index=False)

# 2. Concatenating data

In [23]:
import pandas as pd
from os import listdir
from os.path import isfile, join

data_path = 'data\clean'
# taking full path of all files 
files_full_path = [join(data_path, f) for f in listdir(data_path) if isfile(join(data_path, f))]
files_full_path

['data\\clean\\2015-building-energy-benchmarking-clean.csv',
 'data\\clean\\2016-building-energy-benchmarking-clean.csv',
 'data\\clean\\2017-building-energy-benchmarking-clean (1).csv',
 'data\\clean\\2019-building-energy-benchmarking-clean.csv',
 'data\\clean\\2020-building-energy-benchmarking-clean.csv']

In [20]:
data_list = []

for file in files_full_path:
    df = pd.read_csv(file)
    data_list.append(df)

sea_all = pd.concat(data_list, axis=0, ignore_index=True)

data\clean\2015-building-energy-benchmarking-clean.csv
data\clean\2016-building-energy-benchmarking-clean.csv
data\clean\2017-building-energy-benchmarking-clean (1).csv
data\clean\2019-building-energy-benchmarking-clean.csv
data\clean\2020-building-energy-benchmarking-clean.csv


In [25]:
sea_all.to_csv('data\clean\ALL_YEARS_building-energy-benchmarking-clean.csv')

In [27]:
pd.set_option('display.max_columns', None)
sea_all.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17009 entries, 0 to 17008
Data columns (total 31 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   OSEBuildingID                      17009 non-null  int64  
 1   DataYear                           17009 non-null  int64  
 2   BuildingType                       17009 non-null  object 
 3   PrimaryPropertyType                13517 non-null  object 
 4   PropertyName                       17009 non-null  object 
 5   Address                            17009 non-null  object 
 6   ZipCode                            16893 non-null  float64
 7   CouncilDistrictCode                16966 non-null  float64
 8   Neighborhood                       16988 non-null  object 
 9   Latitude                           16988 non-null  float64
 10  Longitude                          16988 non-null  float64
 11  YearBuilt                          17009 non-null  int

In [32]:
sea_all['YearsENERGYSTARCertified'].unique()

array([nan, '2016', '2014', '2012', '2015',
       '2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009, 2008',
       '2010, 2009, 2007', '2010, 2007', '2016, 2015', '2017, 2015',
       '2015, 2014, 2012, 2011, 2009', '2012, 2009, 2008',
       '2016, 2015, 2014, 2013, 2011, 2010, 2009, 2007, 2000',
       '2016, 2009, 2008', '2016, 2014, 2012',
       '2016, 2015, 2014, 2012, 2008',
       '2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009, 2008, 2007, 2006, 2005, 2004',
       '2014, 2013, 2011, 2010, 2009, 2008, 2007',
       '2016, 2015, 2014, 2013, 2011, 2008',
       '2016, 2015, 2014, 2013, 2008', '2016, 2015, 2013, 2012, 2010',
       '2015, 2013, 2012, 2010', '2007', '2010', '2016, 2015, 2009, 2008',
       '2013, 2009, 2008', '2017', '2017, 2011, 2010, 2007',
       '2017, 2016, 2014, 2013, 2012, 2011, 2010, 2009, 2007',
       '2015, 2014, 2012, 2010, 2008', '2012, 2010, 2009, 2008',
       '2009, 2005', '2017, 2015, 2014, 2013', '2016, 2015, 2012',
       '2012, 2010, 2007, 2006'

In [33]:
sea_all['YearsENERGYSTARCertified'] = sea_all['YearsENERGYSTARCertified'].astype(str)

In [34]:
sea_all['YearsENERGYSTARCertified'].unique()

array(['nan', '2016', '2014', '2012', '2015',
       '2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009, 2008',
       '2010, 2009, 2007', '2010, 2007', '2016, 2015', '2017, 2015',
       '2015, 2014, 2012, 2011, 2009', '2012, 2009, 2008',
       '2016, 2015, 2014, 2013, 2011, 2010, 2009, 2007, 2000',
       '2016, 2009, 2008', '2016, 2014, 2012',
       '2016, 2015, 2014, 2012, 2008',
       '2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009, 2008, 2007, 2006, 2005, 2004',
       '2014, 2013, 2011, 2010, 2009, 2008, 2007',
       '2016, 2015, 2014, 2013, 2011, 2008',
       '2016, 2015, 2014, 2013, 2008', '2016, 2015, 2013, 2012, 2010',
       '2015, 2013, 2012, 2010', '2007', '2010', '2016, 2015, 2009, 2008',
       '2013, 2009, 2008', '2017', '2017, 2011, 2010, 2007',
       '2017, 2016, 2014, 2013, 2012, 2011, 2010, 2009, 2007',
       '2015, 2014, 2012, 2010, 2008', '2012, 2010, 2009, 2008',
       '2009, 2005', '2017, 2015, 2014, 2013', '2016, 2015, 2012',
       '2012, 2010, 2007, 200

In [36]:
sea_all[sea_all['YearsENERGYSTARCertified'] =='2.0182017201520143e+27']

Unnamed: 0,OSEBuildingID,DataYear,BuildingType,PrimaryPropertyType,PropertyName,Address,ZipCode,CouncilDistrictCode,Neighborhood,Latitude,Longitude,YearBuilt,NumberofFloors,NumberofBuildings,PropertyGFATotal,PropertyGFAParking,YearsENERGYSTARCertified,ENERGYSTARScore,SiteEUI(kBtu/sf),SiteEUIWN(kBtu/sf),SourceEUI(kBtu/sf),SourceEUIWN(kBtu/sf),SiteEnergyUse(kBtu),SiteEnergyUseWN(kBtu),SteamUse(kBtu),Electricity(kBtu),Electricity(kWh),NaturalGas(therms),NaturalGas(kBtu),GHGEmissions(MetricTonsCO2e),GHGEmissionsIntensity(kgCO2e/ft2)
6957,406,2017,NonResidential,Large Office,520 Pike Tower,520 Pike Street,98101.0,7.0,DOWNTOWN,47.61125,-122.33494,1983,29.0,1.0,500000,0,2.0182017201520143e+27,92.0,47.1,45.9,147.9,144.2,20055690,19554536,0.0,20055686,5877985.0,0.0,0,82.97,0.166


In [102]:
sea_17 = pd.read_csv('data/clean/2017-building-energy-benchmarking-clean (1).csv')

In [99]:
sea_17['YearsENERGYSTARCertified'] = sea_17['YearsENERGYSTARCertified'].astype(int)
sea_17['YearsENERGYSTARCertified'].unique()

array([          0,        2016, -2147483648,        2014,    20182012,
          20172015,    20182017,        2018,    20162015,        2007,
              2010,        2015,        2017,    20092005,    20112009,
              2009,    20102008,    20182009,        2011,    20172016,
              2013,    20152012,    20182016,    20182013,    20172013,
          20172014])

In [100]:
sea_17[sea_17['YearsENERGYSTARCertified'] == 20092005]


Unnamed: 0,OSEBuildingID,DataYear,BuildingType,PrimaryPropertyType,PropertyName,Address,ZipCode,CouncilDistrictCode,Neighborhood,Latitude,Longitude,YearBuilt,NumberofFloors,NumberofBuildings,PropertyGFATotal,PropertyGFAParking,YearsENERGYSTARCertified,ENERGYSTARScore,SiteEUI(kBtu/sf),SiteEUIWN(kBtu/sf),SourceEUI(kBtu/sf),SourceEUIWN(kBtu/sf),SiteEnergyUse(kBtu),SiteEnergyUseWN(kBtu),SteamUse(kBtu),Electricity(kBtu),Electricity(kWh),NaturalGas(therms),NaturalGas(kBtu),GHGEmissions(MetricTonsCO2e),GHGEmissionsIntensity(kgCO2e/ft2)
271,422,2017,NonResidential,Large Office,Two Union Square,601 Union St.,98101.0,7.0,DOWNTOWN,47.61043,-122.33206,1989,56,1.0,1735079,389860,20092005,98.0,34.9,34.4,107.3,106.0,46991252,46332688,1471912,45336629,13287406,1827,182705,275.51,0.159
289,423,2017,NonResidential,Large Office,One Union Square,600 University Street,98101.0,7.0,DOWNTOWN,47.60967,-122.33229,1980,36,1.0,794592,36606,20092005,97.0,34.5,33.9,106.3,104.4,26138674,25681880,0,25431089,7453426,7075,707582,142.79,0.18


In [103]:
sea_17[sea_17['OSEBuildingID'] == 423]

Unnamed: 0,OSEBuildingID,DataYear,BuildingType,PrimaryPropertyType,PropertyName,Address,ZipCode,CouncilDistrictCode,Neighborhood,Latitude,Longitude,YearBuilt,NumberofFloors,NumberofBuildings,PropertyGFATotal,PropertyGFAParking,YearsENERGYSTARCertified,ENERGYSTARScore,SiteEUI(kBtu/sf),SiteEUIWN(kBtu/sf),SourceEUI(kBtu/sf),SourceEUIWN(kBtu/sf),SiteEnergyUse(kBtu),SiteEnergyUseWN(kBtu),SteamUse(kBtu),Electricity(kBtu),Electricity(kWh),NaturalGas(therms),NaturalGas(kBtu),GHGEmissions(MetricTonsCO2e),GHGEmissionsIntensity(kgCO2e/ft2)
289,423,2017,NonResidential,Large Office,One Union Square,600 University Street,98101.0,7.0,DOWNTOWN,47.60967,-122.33229,1980,36,1.0,794592,36606,20092005.0,97.0,34.5,33.9,106.3,104.4,26138674,25681880,0,25431089,7453426,7075,707582,142.79,0.18


In [50]:
sea_17[sea_17['YearsENERGYSTARCertified'] == -2147483648]

Unnamed: 0,OSEBuildingID,DataYear,BuildingType,PrimaryPropertyType,PropertyName,Address,ZipCode,CouncilDistrictCode,Neighborhood,Latitude,Longitude,YearBuilt,NumberofFloors,NumberofBuildings,PropertyGFATotal,PropertyGFAParking,YearsENERGYSTARCertified,ENERGYSTARScore,SiteEUI(kBtu/sf),SiteEUIWN(kBtu/sf),SourceEUI(kBtu/sf),SourceEUIWN(kBtu/sf),SiteEnergyUse(kBtu),SiteEnergyUseWN(kBtu),SteamUse(kBtu),Electricity(kBtu),Electricity(kWh),NaturalGas(therms),NaturalGas(kBtu),GHGEmissions(MetricTonsCO2e),GHGEmissionsIntensity(kgCO2e/ft2)
43,57,2017,NonResidential,Large Office,5th and Pine,413 Pine Street,98101.0,7.0,DOWNTOWN,47.61130,-122.33659,1973,5,1.0,168115,0,-2147483648,85.0,56.4,55.5,177.2,174.1,9567585,9402433,0,9567584,2804098,0,0,39.58,0.235
66,100,2017,NonResidential,Large Office,City Place III - SEDO,551 Boren Ave. North,98109.0,7.0,LAKE UNION,47.62424,-122.33646,2010,5,1.0,316306,0,-2147483648,95.0,57.0,57.0,155.9,155.9,16381541,16384160,0,13206806,3870693,31747,3174733,223.25,0.706
71,107,2017,NonResidential,Large Office,City Place IV - SEDO,333 Boren Ave. N.,98109.0,7.0,LAKE UNION,47.62112,-122.33629,2010,12,1.0,571329,0,-2147483648,92.0,62.5,61.8,172.1,169.9,39074696,38648836,0,31822031,9326504,72526,7252659,516.84,0.905
203,329,2017,NonResidential,Large Office,1700 Seventh Ave,1700 Seventh,98101.0,7.0,DOWNTOWN,47.61399,-122.33523,2000,23,1.0,747747,205076,-2147483648,92.0,54.8,54.4,161.0,159.8,30229138,30027230,0,27310446,8004234,29186,2918686,268.00,0.358
205,331,2017,NonResidential,Large Office,The Tower Building,1809 7th Ave,98101.0,7.0,DOWNTOWN,47.61375,-122.33566,1930,17,1.0,204790,30000,-2147483648,91.0,48.1,46.8,129.5,128.1,8360560,8138434,0,6566011,1924388,17945,1794547,122.47,0.598
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3187,49776,2017,NonResidential,Large Office,Market Place Tower,2025 1st Avenue,98121.0,7.0,DOWNTOWN,47.61156,-122.34401,1988,13,1.0,354086,149326,-2147483648,91.0,55.6,54.0,158.9,155.0,13046611,12668063,0,11286552,3307899,17600,1760057,140.17,0.396
3219,49857,2017,Multifamily MR (5-9),Mid-Rise Multifamily,AMLI South Lake Union 1,1260 Republican St.,98109.0,7.0,LAKE UNION,47.62336,-122.33085,2013,7,1.0,229742,0,-2147483648,99.0,31.5,31.2,73.8,73.5,5567889,5521985,0,3442771,1009018,21251,2125118,127.11,0.553
3281,49985,2017,NonResidential,Large Office,Amazon Phase VI,500 9th Avenue North,98109.0,7.0,LAKE UNION,47.62384,-122.33941,2014,6,1.0,596376,169195,-2147483648,99.0,41.4,40.6,117.0,114.6,17705612,17342080,0,15028529,4404610,26770,2677080,204.35,0.343
3305,50016,2017,Multifamily MR (5-9),Mid-Rise Multifamily,AMLI Mark24,2428 NW Market St,98107.0,6.0,BALLARD,47.66895,-122.38881,2014,7,1.0,418285,0,-2147483648,100.0,20.4,20.1,51.7,50.8,8523793,8410720,0,6061019,1776383,24627,2462773,155.87,0.373


In [88]:
value = sea_17['YearsENERGYSTARCertified'][sea_17['OSEBuildingID'] == 57]
value

43    2.018202e+11
Name: YearsENERGYSTARCertified, dtype: float64

In [86]:
int(value.values[0])

201820172016

In [87]:
value.astype(int)

43   -2147483648
Name: YearsENERGYSTARCertified, dtype: int32