In [1]:
import pandas as pd

In [2]:
# Import the data
file = '../Resources/SFO_Airport_Utility_Consumption.csv'
utilities_df = pd.read_csv(file)
utilities_df.head()

Unnamed: 0,Year,Month Number,Month,Utility,Owner,Units,Usage
0,2013,1,Jan,Passengers,Campus,PAX,3209356.0
1,2013,1,Jan,Gas,Commission,Therms,363205.0
2,2013,1,Jan,Gas,Tenant,Therms,49393.0
3,2013,1,Jan,Electricity,Commission,kWh,12904353.0
4,2013,1,Jan,Electricity,Tenant,kWh,14002156.0


In [3]:
utilities_df.dtypes

Year              int64
Month Number      int64
Month            object
Utility          object
Owner            object
Units            object
Usage           float64
dtype: object

In [4]:
# Add a column that tracks the tax rate
# Assume every year and type of utility had 
# a tax rate of 5%, except for 2019 when the 
# tax was raised to 5.5%

# Define a function
def tax_rate(year):
    if year == 2019:
        return 0.055
    else:
        return 0.05

# Apply the function to the Year column
utilities_df['Tax Rate'] = utilities_df['Year'].apply(tax_rate)
utilities_df.sort_values(by="Year", ascending=False).head()

Unnamed: 0,Year,Month Number,Month,Utility,Owner,Units,Usage,Tax Rate
559,2019,8,Aug,Water,Tenant,Million Gallons,15.12082,0.055
531,2019,4,Apr,Water,Tenant,Million Gallons,15.92492,0.055
529,2019,4,Apr,Electricity,Tenant,kWh,12130340.0,0.055
528,2019,4,Apr,Electricity,Commission,kWh,13164180.0,0.055
527,2019,4,Apr,Gas,Tenant,Therms,50665.3,0.055


In [5]:
print(utilities_df['Utility'].unique())
print(utilities_df['Owner'].unique())

['Passengers' 'Gas' 'Electricity' 'Water']
['Campus' 'Commission' 'Tenant']


In [6]:
# Recalculate the tax rate assuming that
# commission owned units were taxed an
# additional 1% on Electricity.

# Define a function
def tax_rate(row):
    rate = 0.05
    
    if (row['Owner']=="Commission") and (row['Utility']=="Electricity"):
        rate += 0.01
    if row['Year'] == 2019:
        rate += 0.005
    
    return rate


# Apply the function to the DataFrame
utilities_df['Tax Rate'] = utilities_df.apply(tax_rate, axis=1)

utilities_df.head()

Unnamed: 0,Year,Month Number,Month,Utility,Owner,Units,Usage,Tax Rate
0,2013,1,Jan,Passengers,Campus,PAX,3209356.0,0.05
1,2013,1,Jan,Gas,Commission,Therms,363205.0,0.05
2,2013,1,Jan,Gas,Tenant,Therms,49393.0,0.05
3,2013,1,Jan,Electricity,Commission,kWh,12904353.0,0.06
4,2013,1,Jan,Electricity,Tenant,kWh,14002156.0,0.05


In [11]:
# Use apply with a lambda function to set
# the existing Tax Rate column to 0 if
# the utility was "Passengers"

# Apply a lambda function
# utilites['Tax Rate'] = utilities_df.apply(lambda row: 0 if row["Utilty"]=="Passengers")

In [9]:
utilities_df['Tax Rate']=utilities_df.apply(lambda row: 0 if row["Utility"]=="Passengers" else row['Tax Rate'], 
                                            axis = 1)

In [10]:
utilities_df.head()

Unnamed: 0,Year,Month Number,Month,Utility,Owner,Units,Usage,Tax Rate
0,2013,1,Jan,Passengers,Campus,PAX,3209356.0,0.0
1,2013,1,Jan,Gas,Commission,Therms,363205.0,0.05
2,2013,1,Jan,Gas,Tenant,Therms,49393.0,0.05
3,2013,1,Jan,Electricity,Commission,kWh,12904353.0,0.06
4,2013,1,Jan,Electricity,Tenant,kWh,14002156.0,0.05
