In [3]:
import numpy as np
import pandas as pd
import geopandas as gp
from shapely.geometry import Polygon, LineString, Point
import matplotlib.pyplot as plt
import util
from scipy import stats
import seaborn as sns

# 1.0 Preprocessing

In [4]:
df_districts = gp.read_file("../data/Council_Districts.geojson")
df = pd.read_csv('../data/2020_Building_Energy_Benchmarking.csv')
df = gp.GeoDataFrame(df, geometry=gp.points_from_xy(df.Longitude, df.Latitude))
util.clean_districts(df, df_districts)

Building WATERWORKS OFFICE & MARINA 2353/ 4088803975 doesn't have a district POINT (-122.33895 47.63575) 
	 Found district 7 for WATERWORKS OFFICE & MARINA
Building NAUTICAL LANDING 2381/ 4088804350 doesn't have a district POINT (-122.34219 47.64306) 
	 Found district 7 for NAUTICAL LANDING
Building UNION HARBOR CONDOMINIUM 2540/ 8807200000 doesn't have a district POINT (-122.33003 47.6401) 
	 Found district 4 for UNION HARBOR CONDOMINIUM
Building THE PIER AT LESCHI 2997/ 6780900000 doesn't have a district POINT (-122.28563 47.59926) 
	 Found district 3 for THE PIER AT LESCHI
Building THE LAKESHORE 3046/ 1180001715 doesn't have a district POINT EMPTY 
Building EDUCARE 3218/ 2895800030 doesn't have a district POINT EMPTY 


In [5]:
df.columns

Index(['OSEBuildingID', 'DataYear', 'BuildingName', 'BuildingType',
       'TaxParcelIdentificationNumber', 'Address', 'City', 'State', 'ZipCode',
       'Latitude', 'Longitude', 'Neighborhood', 'CouncilDistrictCode',
       'YearBuilt', 'NumberofFloors', 'NumberofBuildings', 'PropertyGFATotal',
       'PropertyGFABuilding(s)', 'PropertyGFAParking', 'ENERGYSTARScore',
       'SiteEUIWN(kBtu/sf)', 'SiteEUI(kBtu/sf)', 'SiteEnergyUse(kBtu)',
       'SiteEnergyUseWN(kBtu)', 'SourceEUIWN(kBtu/sf)', 'SourceEUI(kBtu/sf)',
       'EPAPropertyType', 'LargestPropertyUseType',
       'LargestPropertyUseTypeGFA', 'SecondLargestPropertyUseType',
       'SecondLargestPropertyUseTypeGFA', 'ThirdLargestPropertyUseType',
       'ThirdLargestPropertyUseTypeGFA', 'Electricity(kWh)', 'SteamUse(kBtu)',
       'NaturalGas(therms)', 'ComplianceStatus', 'ComplianceIssue',
       'Electricity(kBtu)', 'NaturalGas(kBtu)', 'TotalGHGEmissions',
       'GHGEmissionsIntensity', 'geometry'],
      dtype='object')

In [5]:
df = df[['OSEBuildingID', 
        'BuildingName', 
        'CouncilDistrictCode', 
        'YearBuilt', 
        'TotalGHGEmissions', 
        'GHGEmissionsIntensity', 
        'EPAPropertyType',
        'geometry']]
df["LegislationBuildingType"] = df['EPAPropertyType'].apply(lambda x: dict_cats.get(x))

# Data Centers
- Westin apparently has an agreement with amazon, so it counts
- "Data Center" is used by AWS, so it counts. 

In [11]:
df[df['LargestPropertyUseType'] == "Data Center"]

Unnamed: 0,OSEBuildingID,DataYear,BuildingName,BuildingType,TaxParcelIdentificationNumber,Address,City,State,ZipCode,Latitude,...,Electricity(kWh),SteamUse(kBtu),NaturalGas(therms),ComplianceStatus,ComplianceIssue,Electricity(kBtu),NaturalGas(kBtu),TotalGHGEmissions,GHGEmissionsIntensity,geometry
234,365,2020,UW TOWER DATA CENTER,NonResidential,1142000290,4310 12TH AVE NE,SEATTLE,WA,98105.0,47.6606,...,9610516,0,0,Compliant,No Issue,32791081.0,0.0,174.4,4.8,POINT (-122.31512 47.66060)
541,753,2020,THE WESTIN BUILDING EXCHANGE (Office),NonResidential,659000950,2001 6TH AVE,SEATTLE,WA,98121.0,47.61438,...,79296072,0,3157,Compliant,No Issue,270558198.0,315739.0,1455.5,3.4,POINT (-122.33871 47.61438)
3165,49780,2020,DATA CENTER,NonResidential,659000905,2020 5TH AVE,SEATTLE,WA,98121.0,47.61449,...,25004180,0,0,Compliant,No Issue,85314262.0,0.0,453.7,9.6,POINT (-122.33956 47.61449)


# Whole Foods that Amazons owns
- There's definitely more whole foods than this- investigate
- Do whole foods show up in different categories (retail?) are they hidden under another name, or just not reported at all?
- How do whole foods compare to the average in supermarket/grocery store?
- What's the footprint of whole foods? 

The problem is whole foods that are in other bulidings. eg. the cap hill whole foods in 
Need to check second, third most popular use type as well! And figure out a list 

Whole foods: 
- First hill: 1001 Broadway, Seattle, WA 98122 --> the danforth 
- west seattle: 4755 Fauntleroy Way Sw (Ste 190), Seattle, WA 98116 --> the whittaker
- roosevelt: 1026 Ne 64th Street (at Roosevelt Way NE), Seattle, WA 98115 --> roosevelt square
- belltown: 2210 Westlake Ave (at Denny Way), Seattle, WA 98121 --> 2200 westlake
- interbay: 2001 15th Ave W, Seattle, WA 98119 --> whole foods interbay

In [18]:
df[df['LargestPropertyUseType']=='Supermarket/Grocery Store'].describe()


Unnamed: 0,OSEBuildingID,DataYear,ZipCode,Latitude,Longitude,CouncilDistrictCode,YearBuilt,NumberofFloors,NumberofBuildings,PropertyGFATotal,...,LargestPropertyUseTypeGFA,SecondLargestPropertyUseTypeGFA,ThirdLargestPropertyUseTypeGFA,Electricity(kWh),SteamUse(kBtu),NaturalGas(therms),Electricity(kBtu),NaturalGas(kBtu),TotalGHGEmissions,GHGEmissionsIntensity
count,42.0,42.0,41.0,42.0,42.0,42.0,42.0,42.0,42.0,42.0,...,42.0,23.0,5.0,42.0,42.0,42.0,41.0,41.0,41.0,41.0
mean,20698.952381,2020.0,98120.829268,47.639741,-122.334256,4.214286,1981.119048,1.547619,1.02381,68231.142857,...,49862.47619,11340.434783,8518.4,1667435.0,0.0,31702.452381,5828053.0,3247568.0,203.468293,4.521951
std,15360.28298,0.0,15.183712,0.059148,0.036118,1.99433,23.971734,1.328898,0.154303,68062.57711,...,35347.151672,19668.25162,12116.979587,896613.4,0.0,24442.778847,2960408.0,2422080.0,136.78148,3.322161
min,132.0,2020.0,98102.0,47.52095,-122.3873,1.0,1916.0,1.0,1.0,21603.0,...,16000.0,0.0,0.0,0.0,0.0,0.0,486355.0,0.0,6.9,0.3
25%,818.25,2020.0,98109.0,47.58983,-122.37157,2.0,1962.25,1.0,1.0,30183.5,...,28175.25,0.0,2663.0,1298874.0,0.0,7153.5,4552591.0,1091063.0,71.3,1.5
50%,22788.5,2020.0,98118.0,47.64379,-122.32943,4.0,1987.5,1.0,1.0,45751.0,...,41322.5,0.0,5000.0,1491280.0,0.0,30160.5,5088786.0,3064567.0,189.1,4.0
75%,26857.75,2020.0,98125.0,47.68256,-122.310875,6.0,1999.5,1.0,1.0,66752.25,...,50639.25,16791.5,5055.0,2094161.0,0.0,53894.75,7285985.0,5551192.0,321.1,7.0
max,50195.0,2020.0,98177.0,47.73315,-122.26825,7.0,2016.0,8.0,2.0,314958.0,...,168735.0,72564.0,29874.0,4078974.0,0.0,85676.0,13917460.0,8567611.0,529.0,13.2


In [8]:
df[df['BuildingName'].str.contains('whole foods', case=False)]

Unnamed: 0,OSEBuildingID,DataYear,BuildingName,BuildingType,TaxParcelIdentificationNumber,Address,City,State,ZipCode,Latitude,...,Electricity(kWh),SteamUse(kBtu),NaturalGas(therms),ComplianceStatus,ComplianceIssue,Electricity(kBtu),NaturalGas(kBtu),TotalGHGEmissions,GHGEmissionsIntensity,geometry
497,700,2020,WHOLE FOODS INTERBAY,NonResidential,7666201460,2001 15TH AVE W,SEATTLE,WA,98119.0,47.63718,...,1944769,0,48180,Compliant,No Issue,6635552.0,4818027.0,291.2,4.5,POINT (-122.37734 47.63718)


# Owned Buildings
- All buildings with "amazon" in the name
- Look up all buildings that rent as well as those without "amazon" in the name
- Look at electricity vs. natural gas breakdown. 
- Look at built date- some are older than expected. 

Findings: 
- "amazon" buildings all have pretty good emissions intensity
- all "amazon" buildings are already compliant. is that a helpful thing? 
- "amazon" buildings are on average more carbon efficient than most office buildings. 0.7 > 0.4 roughly- maybe max boxplot. 


In [40]:
amazon = df[df['BuildingName'].str.contains('amazon', case=False)]
amazon

Unnamed: 0,OSEBuildingID,DataYear,BuildingName,BuildingType,TaxParcelIdentificationNumber,Address,City,State,ZipCode,Latitude,...,Electricity(kWh),SteamUse(kBtu),NaturalGas(therms),ComplianceStatus,ComplianceIssue,Electricity(kBtu),NaturalGas(kBtu),TotalGHGEmissions,GHGEmissionsIntensity,geometry
258,390,2020,LOWES & AMAZON W,NonResidential,3600052,2700 RAINIER AVE S,SEATTLE,WA,98144.0,47.57936,...,2268500,0,17515,Compliant,No Issue,7740122.0,1751457.0,134.2,0.5,POINT (-122.29744 47.57936)
728,19776,2020,AMAZON DISTRIBUTION CENTER,NonResidential,164000020,13537 AURORA AVE N,SEATTLE,WA,98133.0,47.72782,...,656107,0,0,Compliant,No Issue,2238638.0,0.0,11.9,0.4,POINT (-122.34536 47.72782)
3257,49985,2020,AMAZON PHASE VI - NESSIE AND BIGFOOT,NonResidential,1983200075,500 9TH AVE N,SEATTLE,WA,98109.0,47.62384,...,4037111,0,17259,Compliant,No Issue,13774623.0,1725861.0,164.9,0.4,POINT (-122.33941 47.62384)
3300,50037,2020,APOLLO BLDG (AMAZON),NonResidential,1991201265,325 9TH AVE N,SEATTLE,WA,98109.0,47.62154,...,2827600,0,18234,Compliant,No Issue,9647771.0,1823426.0,148.1,0.5,POINT (-122.34015 47.62154)
3305,50045,2020,AMAZON BRAZIL,NonResidential,1983200035,400 9TH AVE N,SEATTLE,WA,98109.0,47.62264,...,3051112,0,13200,Compliant,No Issue,10410394.0,1319979.0,125.5,0.4,POINT (-122.33940 47.62264)
3359,50160,2020,AMAZON DOPPLER BUILDING,NonResidential,659000775,2021 7TH AVE,SEATTLE,WA,,47.61523,...,12682380,0,47104,Compliant,No Issue,43272281.0,4710401.0,480.3,0.4,POINT (-122.33835 47.61523)
3387,50192,2020,MIDTOWN 21 (AMAZON),NonResidential,660001605,1007 STEWART ST,SEATTLE,WA,,47.61632,...,5198792,0,7901,Compliant,No Issue,17738278.0,790106.0,136.3,0.4,POINT (-122.33304 47.61632)
3397,50241,2020,TROY BLOCK (AMAZON),NonResidential,8692000000,300 BOREN AVE N,SEATTLE,WA,98109.0,47.62107,...,9009065,0,28064,Compliant,No Issue,30738930.0,2806441.0,312.5,0.4,POINT (-122.33553 47.62107)
3419,50270,2020,URBAN UNION (AMELIA- AMAZON BLDG),NonResidential,1983200560,501 FAIRVIEW AVE N,SEATTLE,WA,98109.0,47.62334,...,2779352,0,2792,Compliant,No Issue,9483149.0,279192.0,65.3,0.2,POINT (-122.33457 47.62334)
3424,50294,2020,DAY 1 TOWER AND SPHERES (Amazon),NonResidential,660000220,2101 7TH AVE,SEATTLE,WA,98121.0,47.61586,...,13639170,0,61085,Compliant,No Issue,46536848.0,6108492.0,571.9,0.5,POINT (-122.33937 47.61586)


In [44]:
print(df[df['EPAPropertyType']=="Office"]["GHGEmissionsIntensity"].mean())
print(amazon['GHGEmissionsIntensity'].mean())

0.7288747346072186
0.42500000000000004


In [69]:
len(df["EPAPropertyType"].unique())

66

Carbon footprint of amazon buildings (TotalGHGEmissions)
Similar to      
Non-Refrigerated Warehouse                                     6598.3  
Retail Store                                                   4563.0  
^ Sum of total ghge emissions across entire EPA building types. 
Ranks about 13 out of 66 EPA proprety types. 

In [51]:
amazon['TotalGHGEmissions'].sum()

2538.2000000000003

In [46]:
amazon_ids = [
589,
336,
433,
471,
50528,
337,
376,
597,
21447,
750,
49985,
50045,
50037,
21445,
751,
371,
50192,
388,
50240,
50270,
334,
481
]

In [64]:
amazon = pd.concat([df[df['OSEBuildingID'].isin(amazon_ids)], df[df['BuildingName'].str.contains('amazon', case=False)]])
amazon['TotalGHGEmissions'].sum()

5260.300000000001

Sooo amazon might make up 1.3% of emissions as defined by the total emissions in this dataset? 

In [73]:
amazon["TotalGHGEmissions"].sum() /  df["TotalGHGEmissions"].sum() * 100

1.310218021793213