In [1]:
import pandas as pd

In [11]:
# Load the dataset
file = ("Em_Unit_Fuel_Type.xlsx")
Em_fuel_type_df = pd.read_excel(file, sheet_name = "UNIT_DATA")

In [12]:
print(Em_fuel_type_df.columns)

Index(['Facility Id', 'FRS Id', 'Facility Name', 'City', 'State',
       'Primary NAICS Code', 'Reporting Year', 'Industry Type (subparts)',
       'Industry Type (sectors)', 'Unit Name', 'Unit Type',
       'Unit Reporting Method',
       'Unit Maximum Rated Heat Input Capacity (mmBTU/hr)',
       'General Fuel Type', 'Unit CO2 emissions (non-biogenic) ',
       'Unit Methane (CH4) emissions ', 'Unit Nitrous Oxide (N2O) emissions ',
       'Unit Biogenic CO2 emissions (metric tons)'],
      dtype='object')


In [13]:
# Select the columns to keep for the analysis
Em_fuel_type_df = Em_fuel_type_df[['Facility Id', 'Facility Name', 'State',  
                           'Primary NAICS Code', 'Industry Type (sectors)', 
                           'Unit Maximum Rated Heat Input Capacity (mmBTU/hr)',
                            'General Fuel Type']]

In [14]:
# Check the column data type
Em_fuel_type_df.dtypes

Facility Id                                            int64
Facility Name                                         object
State                                                 object
Primary NAICS Code                                     int64
Industry Type (sectors)                               object
Unit Maximum Rated Heat Input Capacity (mmBTU/hr)    float64
General Fuel Type                                     object
dtype: object

In [15]:
# Remove the null value
Em_fuel_type_df.isnull()

Unnamed: 0,Facility Id,Facility Name,State,Primary NAICS Code,Industry Type (sectors),Unit Maximum Rated Heat Input Capacity (mmBTU/hr),General Fuel Type
0,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...
210382,False,False,False,False,False,False,False
210383,False,False,False,False,False,False,False
210384,False,False,False,False,False,False,False
210385,False,False,False,False,False,False,False


In [16]:
Em_fuel_type_df.dropna()

Unnamed: 0,Facility Id,Facility Name,State,Primary NAICS Code,Industry Type (sectors),Unit Maximum Rated Heat Input Capacity (mmBTU/hr),General Fuel Type
0,1012147,17Z Gas Plant - Chevron USA Inc.,CA,211130,"Natural Gas and Natural Gas Liquids Suppliers,...",30.0,Natural Gas
1,1012147,17Z Gas Plant - Chevron USA Inc.,CA,211130,"Natural Gas and Natural Gas Liquids Suppliers,...",30.0,Natural Gas
2,1012147,17Z Gas Plant - Chevron USA Inc.,CA,211130,"Natural Gas and Natural Gas Liquids Suppliers,...",30.0,Natural Gas
3,1012147,17Z Gas Plant - Chevron USA Inc.,CA,211130,"Natural Gas and Natural Gas Liquids Suppliers,...",30.0,Natural Gas
4,1012147,17Z Gas Plant - Chevron USA Inc.,CA,211112,"Natural Gas and Natural Gas Liquids Suppliers,...",30.0,Natural Gas
...,...,...,...,...,...,...,...
210382,1003318,ZYBACH CRYOGENIC PLANT,TX,211111,Petroleum and Natural Gas Systems,32.0,Natural Gas
210383,1003318,ZYBACH CRYOGENIC PLANT,TX,211111,Petroleum and Natural Gas Systems,32.0,Natural Gas
210384,1003318,ZYBACH CRYOGENIC PLANT,TX,211111,Petroleum and Natural Gas Systems,32.0,Natural Gas
210385,1003318,ZYBACH CRYOGENIC PLANT,TX,211111,Petroleum and Natural Gas Systems,32.0,Natural Gas


In [18]:
Em_fuel_type_df = Em_fuel_type_df.rename(columns = {"Facility Id": "Fac_ID",
                                                   "Facility Name": "Fac_Name",
                                                   "Primary NAICS Code": "NAICS",
                                                   "Industry Type (sectors)": "Sector",
                                                   "Unit Maximum Rated Heat Input Capacity (mmBTU/hr)": "Max_capacity",
                                                   "General Fuel Type": "Fuel_type"})
Em_fuel_type_df.head(8)

Unnamed: 0,Fac_ID,Fac_Name,State,NAICS,Sector,Max_capacity,Fuel_type
0,1012147,17Z Gas Plant - Chevron USA Inc.,CA,211130,"Natural Gas and Natural Gas Liquids Suppliers,...",30.0,Natural Gas
1,1012147,17Z Gas Plant - Chevron USA Inc.,CA,211130,"Natural Gas and Natural Gas Liquids Suppliers,...",30.0,Natural Gas
2,1012147,17Z Gas Plant - Chevron USA Inc.,CA,211130,"Natural Gas and Natural Gas Liquids Suppliers,...",30.0,Natural Gas
3,1012147,17Z Gas Plant - Chevron USA Inc.,CA,211130,"Natural Gas and Natural Gas Liquids Suppliers,...",30.0,Natural Gas
4,1012147,17Z Gas Plant - Chevron USA Inc.,CA,211112,"Natural Gas and Natural Gas Liquids Suppliers,...",30.0,Natural Gas
5,1000112,23rd and 3rd,NY,221112,Power Plants,,Natural Gas
6,1000112,23rd and 3rd,NY,221112,Power Plants,,Natural Gas
7,1000112,23rd and 3rd,NY,221112,Power Plants,7.4,Natural Gas


In [21]:
Em_fuel_type_df.drop_duplicates("Fac_ID")

Unnamed: 0,Fac_ID,Fac_Name,State,NAICS,Sector,Max_capacity,Fuel_type
0,1012147,17Z Gas Plant - Chevron USA Inc.,CA,211130,"Natural Gas and Natural Gas Liquids Suppliers,...",30.000000,Natural Gas
5,1000112,23rd and 3rd,NY,221112,Power Plants,,Natural Gas
45,1006394,29-6 #2 Central Delivery Point,NM,213112,Petroleum and Natural Gas Systems,11.720875,Natural Gas
50,1002885,30-5 Central Delivery Point Compressor Station,NM,213112,Petroleum and Natural Gas Systems,11.720875,Natural Gas
55,1002707,31-6 Central Delivery Point,NM,213112,Petroleum and Natural Gas Systems,11.720875,Natural Gas
...,...,...,...,...,...,...,...
210310,1010592,Zider,TX,486210,Petroleum and Natural Gas Systems,8.500000,Natural Gas
210318,1011602,ZINNIA COMPRESSOR STATION,WV,211111,Petroleum and Natural Gas Systems,17.800000,Natural Gas
210320,1000805,Zion Energy Center,IL,221112,Power Plants,,Natural Gas
210363,1001464,Zuni,CO,221330,Power Plants,,Natural Gas


In [23]:
Em_fuel_type_df.to_csv("Em_fuel_type.csv", index = 'False', header = 'True')