## Shirra Emissions Project

In [63]:
import pandas as pd
import requests
import json

### Load API data

In [64]:
# Request API data
api_url = "https://dataportalforcities.org/api/emissions?cityid=5128581&year=2015&scope=total"
response = requests.get(api_url)

In [65]:
# Save data to local file
with open("data.json", "w") as json_file:
        json.dump(data, json_file)

In [66]:
# Open the data from local file
with open("data.json", "r") as json_file:
    data = json.load(json_file)

### Data cleaning

In [75]:
# Unnest json data and turn into pandas dataframe
rows = []
for sector_data in data['data']:
    sector = sector_data['sector']
    for emission in sector_data.get('emissions', []):
        rows.append({
            'cityid': data['cityid'],
            'city_name': data['city_name'],
            'adm1id': data['adm1id'],
            'adm1_name': data['adm1_name'],
            'countryid': data['countryid'],
            'country_name': data['country_name'],
            'regionid': data['regionid'],
            'region_name': data['region_name'],
            'year': data['year'],
            'population': data['population'],
            'scope': data['scope'],
            'sector': sector,
            'sector_code': sector_data['sector_code'],
            'subsector': emission['subsector'],
            'subsector_code': emission['subsector_code'],
            'activity_amount_mj': emission['activity_amount_mj'],
            'co2e_total_ton': emission['co2e_total_ton'],
            'notation': emission['notation']
        })
 
emissions = pd.DataFrame(rows)

In [76]:
# Convert number columns to numeric data type
emissions['co2e_total_ton'] = pd.to_numeric(emissions['co2e_total_ton'], errors='coerce')
emissions['activity_amount_mj'] = pd.to_numeric(emissions['activity_amount_mj'], errors='coerce')

In [77]:
emissions.head()

Unnamed: 0,cityid,city_name,adm1id,adm1_name,countryid,country_name,regionid,region_name,year,population,scope,sector,sector_code,subsector,subsector_code,activity_amount_mj,co2e_total_ton,notation
0,5128581,New York,5128638,New York,6252001,United States,53,North America,2015,8537673,Total emissions,Stationary energy,stationary_energy,Residential buildings,stationary_energy_residential_buildings,212589300000.0,9526082.0,
1,5128581,New York,5128638,New York,6252001,United States,53,North America,2015,8537673,Total emissions,Stationary energy,stationary_energy,Commercial buildings & facilities,stationary_energy_commercial_buildings,280200100000.0,9296281.0,
2,5128581,New York,5128638,New York,6252001,United States,53,North America,2015,8537673,Total emissions,Stationary energy,stationary_energy,Institutional buildings & facilities,stationary_energy_institutional_buildings,,,NE
3,5128581,New York,5128638,New York,6252001,United States,53,North America,2015,8537673,Total emissions,Stationary energy,stationary_energy,Industrial buildings & facilities,stationary_energy_industry,21783650000.0,833315.7,
4,5128581,New York,5128638,New York,6252001,United States,53,North America,2015,8537673,Total emissions,Stationary energy,stationary_energy,Agriculture,stationary_energy_agriculture_forestry_fisheries,782881800.0,18612.35,


### Data exploration

In [78]:
# Sum total co2e by subsector
subsector_sum = emissions.groupby('subsector').co2e_total_ton.agg(['sum'])

In [79]:
# Format to float notation with 2 decimals
subsector_sum['sum'] = subsector_sum['sum'].apply(lambda x: f"{x:,.2f}")

In [80]:
subsector_sum

Unnamed: 0_level_0,sum
subsector,Unnamed: 1_level_1
Agriculture,18612.35
Aviation,6123167.7
Biological treatment,106064.0
CHP Generation,0.0
Commercial buildings & facilities,9296280.8
Electricity-only generation,0.0
Fugitive emissions,0.0
Heat/cold generation,0.0
Incineration and open burning,0.0
Industrial buildings & facilities,833315.71
