## USA CO2 Emissions 

In [1]:
# Import dependencies

import pandas as pd
import numpy as np


In [2]:
# Read csv

df=pd.read_csv('USA CO2 Unit Data.csv')

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 229853 entries, 0 to 229852
Data columns (total 17 columns):
 #   Column                                     Non-Null Count   Dtype  
---  ------                                     --------------   -----  
 0   Facility Id                                229853 non-null  int64  
 1   FRS Id                                     206379 non-null  float64
 2   Facility Name                              229853 non-null  object 
 3   City                                       229853 non-null  object 
 4   State                                      229853 non-null  object 
 5   Primary NAICS Code                         229853 non-null  int64  
 6   Reporting Year                             229853 non-null  int64  
 7   Industry Type (subparts)                   229853 non-null  object 
 8   Industry Type (sectors)                    229831 non-null  object 
 9   Unit Name                                  229853 non-null  object 
 10  Unit Typ

In [4]:
df.size

3907501

In [5]:
df.shape

(229853, 17)

In [6]:
# Check for null values per column

df.isnull().sum()

Facility Id                                      0
FRS Id                                       23474
Facility Name                                    0
City                                             0
State                                            0
Primary NAICS Code                               0
Reporting Year                                   0
Industry Type (subparts)                         0
Industry Type (sectors)                         22
Unit Name                                        0
Unit Type                                     2326
Unit Reporting Method                         3395
Unit Maximum Rated Heat Input (mmBTU/hr)     45865
Unit CO2 emissions (non-biogenic)                0
Unit Methane (CH4) emissions                     0
Unit Nitrous Oxide (N2O) emissions               0
Unit Biogenic CO2 emissions (metric tons)     3395
dtype: int64

In [7]:
df=df.dropna()
df.size

2714917

In [8]:
df.shape

(159701, 17)

In [9]:
# Dropping FRS Id, Facility Name, Primary NAICS Code, Industry Type(subparts), Unit Type, Unit Reporting Method,
# Unit Maximum Rated Heat Input (mmBTU/hr), Unit Biogenic CO2 emissions (metric tons) from the dataframe

USA_CO2_df=df.drop(columns=['FRS Id','Facility Id', 'Facility Name', 'Primary NAICS Code', 'Industry Type (subparts)', 'Unit Type','Unit Name', 'Unit Reporting Method',
'Unit Maximum Rated Heat Input (mmBTU/hr)', 'Unit Biogenic CO2 emissions (metric tons)'])
USA_CO2_df.head()

Unnamed: 0,City,State,Reporting Year,Industry Type (sectors),Unit CO2 emissions (non-biogenic),Unit Methane (CH4) emissions,Unit Nitrous Oxide (N2O) emissions
7,BROOKLYN,NY,2021,Power Plants,84.4,0.0,0.0
8,BROOKLYN,NY,2021,Power Plants,78.9,0.0,0.0
11,BROOKLYN,NY,2020,Power Plants,90.7,0.0,0.0
12,BROOKLYN,NY,2020,Power Plants,108.4,0.0,0.0
15,BROOKLYN,NY,2019,Power Plants,30.2,0.0,0.0


In [10]:
#Renaming Reporting Year to Year

USA_CO2_df.rename(columns = {'Reporting Year':'Year', 
                             'Unit CO2 emissions (non-biogenic) ': 'Unit CO2 emissions',
                            'Industry Type (sectors)':'Sectors'}, inplace = True)

In [11]:
USA_CO2_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 159701 entries, 7 to 229852
Data columns (total 7 columns):
 #   Column                               Non-Null Count   Dtype  
---  ------                               --------------   -----  
 0   City                                 159701 non-null  object 
 1   State                                159701 non-null  object 
 2   Year                                 159701 non-null  int64  
 3   Sectors                              159701 non-null  object 
 4   Unit CO2 emissions                   159701 non-null  float64
 5   Unit Methane (CH4) emissions         159701 non-null  float64
 6   Unit Nitrous Oxide (N2O) emissions   159701 non-null  float64
dtypes: float64(3), int64(1), object(3)
memory usage: 9.7+ MB


In [12]:
USA_CO2_df.head()

Unnamed: 0,City,State,Year,Sectors,Unit CO2 emissions,Unit Methane (CH4) emissions,Unit Nitrous Oxide (N2O) emissions
7,BROOKLYN,NY,2021,Power Plants,84.4,0.0,0.0
8,BROOKLYN,NY,2021,Power Plants,78.9,0.0,0.0
11,BROOKLYN,NY,2020,Power Plants,90.7,0.0,0.0
12,BROOKLYN,NY,2020,Power Plants,108.4,0.0,0.0
15,BROOKLYN,NY,2019,Power Plants,30.2,0.0,0.0


In [13]:
USA_CO2_df.shape

(159701, 7)

In [14]:
USA_CO2_df.isnull().sum()

City                                   0
State                                  0
Year                                   0
Sectors                                0
Unit CO2 emissions                     0
Unit Methane (CH4) emissions           0
Unit Nitrous Oxide (N2O) emissions     0
dtype: int64

In [15]:
USA_CO2_df.duplicated(keep=False)

7         False
8         False
11        False
12        False
15        False
          ...  
229848    False
229849    False
229850    False
229851    False
229852    False
Length: 159701, dtype: bool

In [16]:
sectors_df = USA_CO2_df.Sectors.unique()
sectors_df

array(['Power Plants', 'Petroleum and Natural Gas Systems', 'Waste',
       'Minerals', 'Other', 'Chemicals, Industrial Gas Suppliers',
       'Chemicals, Industrial Gas Suppliers, Minerals', 'Pulp and Paper',
       'Chemicals', 'Metals', 'Other, Waste', 'Other, Suppliers of CO2',
       'Other, Suppliers of CO2, Waste', 'Chemicals, Suppliers of CO2',
       'Natural Gas and Natural Gas Liquids Suppliers, Petroleum and Natural Gas Systems',
       'Power Plants, Suppliers of CO2', 'Chemicals, Waste',
       'Pulp and Paper, Waste', 'Metals, Waste',
       'Petroleum and Natural Gas Systems, Suppliers of CO2',
       'Metals, Power Plants', 'Refineries',
       'Chemicals, Petroleum Product Suppliers, Refineries',
       'Chemicals, Refineries', 'Petroleum Product Suppliers, Refineries',
       'Import and Export of Equipment Containing Fluorintaed GHGs, Other',
       'Minerals, Waste', 'Other, Power Plants', 'Power Plants, Waste',
       'Injection of CO2, Other, Suppliers of CO2, Wa

In [17]:
# groupingby the CO2 emissions by sectors and year

USA_CO2_df.groupby(['Sectors','Year'])['Unit CO2 emissions'].sum()

Sectors    Year
Chemicals  2011    43898128.26
           2012    38524980.68
           2013    44007221.72
           2014    44865417.20
           2015    46133748.31
                      ...     
Waste      2017     4166201.38
           2018     4082443.92
           2019     3972656.40
           2020     3998599.24
           2021     3959075.32
Name: Unit CO2 emissions, Length: 649, dtype: float64

In [18]:
USA_CO2_df.to_csv('USA_CO2_data.csv', index=False)

In [19]:
USA_CO2_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 159701 entries, 7 to 229852
Data columns (total 7 columns):
 #   Column                               Non-Null Count   Dtype  
---  ------                               --------------   -----  
 0   City                                 159701 non-null  object 
 1   State                                159701 non-null  object 
 2   Year                                 159701 non-null  int64  
 3   Sectors                              159701 non-null  object 
 4   Unit CO2 emissions                   159701 non-null  float64
 5   Unit Methane (CH4) emissions         159701 non-null  float64
 6   Unit Nitrous Oxide (N2O) emissions   159701 non-null  float64
dtypes: float64(3), int64(1), object(3)
memory usage: 9.7+ MB


In [25]:
USA_CO2_df.assign(Year=pd.to_datetime(USA_CO2_df.Year, format='%Y')).set_index('Year')

Unnamed: 0_level_0,City,State,Sectors,Unit CO2 emissions,Unit Methane (CH4) emissions,Unit Nitrous Oxide (N2O) emissions
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-01-01,BROOKLYN,NY,Power Plants,84.4,0.00,0.000
2021-01-01,BROOKLYN,NY,Power Plants,78.9,0.00,0.000
2020-01-01,BROOKLYN,NY,Power Plants,90.7,0.00,0.000
2020-01-01,BROOKLYN,NY,Power Plants,108.4,0.00,0.000
2019-01-01,BROOKLYN,NY,Power Plants,30.2,0.00,0.000
...,...,...,...,...,...,...
2015-01-01,Briscoe,TX,Petroleum and Natural Gas Systems,33757.8,16.00,19.072
2014-01-01,Briscoe,TX,Petroleum and Natural Gas Systems,32901.2,15.50,18.476
2013-01-01,Briscoe,TX,Petroleum and Natural Gas Systems,42700.7,20.25,24.138
2012-01-01,Briscoe,TX,Petroleum and Natural Gas Systems,46840.7,22.00,26.224
