<a href="https://colab.research.google.com/github/thesuper147/Carbon-Emissions_CS_DSRP/blob/main/Carbon_Emissions_CS_DSRP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Project Question: For a consumer, what purchasing habits have the largest impact on their carbon footprint?

In [1]:
import pandas as pd
import matplotlib as plt
df = pd.read_csv('/content/drive/MyDrive/Carbon-Emissions_CS_DSRP/SupplyChainEmissions.csv')

df = df.drop(columns=['2017 NAICS Code', 'Reference USEEIO Code'])


df.rename(columns={
    '2017 NAICS Title': 'Industry',
    'Supply Chain Emission Factors without Margins': 'Emissions_No_Margins',
    'Margins of Supply Chain Emission Factors': 'Margins',
    'Supply Chain Emission Factors with Margins': 'Total_Emissions'
}, inplace=True)


Unnamed: 0,Commodity,Total Exports Value ($US)
0,,
1,All Commodities,183013072069
2,111 Agricultural Products,5749701972
3,1111 Oilseeds & Grains,3094052185
4,11111 Soybeans,699526766
...,...,...
980,980000 Goods Returned (exports For Canada Only),18996749
981,990 Other Special Classification Provisions,6502520105
982,9900 Other Special Classification Provisions,6502520105
983,99000 Other Special Classification Provisions,6502520105


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 985 entries, 0 to 984
Data columns (total 2 columns):
 #   Column                     Non-Null Count  Dtype 
---  ------                     --------------  ----- 
 0   Commodity                  984 non-null    object
 1   Total Exports Value ($US)  748 non-null    object
dtypes: object(2)
memory usage: 15.5+ KB


Basic viewing of industry emission scale

In [4]:
import matplotlib.pyplot as plt

sample_df = df.sample(9)
x = sample_df['Industry']
y = sample_df['Total_Emissions']


x2 = [name[:27] + '...' if len(name) > 27 else name for name in x]


plt.bar(x2,
        y,
        label="Pollution Emission Scale vs. Industry")
plt.xlabel("Industry")
plt.ylabel('Average Emission Factor')
plt.title("Pollution Emission Scale vs. Sampled Industries")
plt.xticks(rotation=45, ha="right")
plt.show()

KeyError: 'Industry'

In [None]:

import matplotlib.pyplot as plt




sample_df = df.sample(9)
x = sample_df['Industry']
y = sample_df['Total_Emissions']



plt.bar(x.str[:27],
        y,
        label="Pollution Emission Scale vs. Industry")
plt.xlabel("Industry")
plt.ylabel('Average Emission Factor')
plt.title("Pollution Emission Scale vs. Sampled Industries")
plt.xticks(rotation=45, ha="right")
plt.show()

Mapping data to fit research question through categories

In [None]:
import numpy as np

df_consumer = df.copy()


df_consumer['Consumer Category'] = 'Other'

# filtering of industries (2017 naics title) to create specific categories that are usable for project
keywords_map = {
    'Housing & Utilities': ['Power generation', 'Electric', 'Electricity', 'Residential', 'Construction', 'Natural gas', 'Water', 'Sewer', 'Furniture', 'Appliance', 'Plumbing', 'Gas'],
    'Food, Farming, & Beverages': ['Farm', 'Food', 'Beverage', 'Alcohol', 'Snack', 'Fish', 'Fishing', 'Breakfast', 'Cereal', 'Sugar', 'Winery', 'Brewery', 'Restaurant', 'Grocery', 'Restaurant', 'Tortilla', 'Corn', 'Farming', 'Dining', 'Flour', 'Pasta', 'Bakery', 'Bakeries', 'Convenience'],
    'Transportation': ['Automobile', 'Vehicle', 'Gasoline', 'Air transport', 'Rail', 'Transit', 'Taxi', 'Airplane', 'Car', 'Train', 'Bus', 'Transport'],
    'Fashion': ['Apparel', 'Textile', 'Clothing', 'Fabric', 'Leather', 'Footwear', 'Fashion', 'Designer', 'Jewelry'],
    'Healthcare': ['Hospital', 'Ambulatory', 'Health', 'Pharmaceutical', 'Medical', 'Surgical', 'Surgery', 'Psychology', 'Doctor', 'Pharmacy', 'Veterinary', 'Vet', 'Safety', 'Ambulance', 'Clinic', 'Medicine'],
    'Entertainment / Recreation': ['Hotel', 'Amusement', 'Recreation', 'Museum', 'Arts', 'Spectator sports', 'Book', 'Motion picture', 'Movie', 'TV', 'Gambling', 'Casino', 'Game', 'Sports', 'Gaming', 'Theatre', 'Park'],
    'Financial & Organization Services': ['Securities', 'Insurance', 'Credit', 'Monetary', 'Legal', 'Accounting', 'Bank','Financing', 'Claims', 'Loan', 'Brokers', 'Trust', 'Court', 'Financial', 'Consulting'],
    'Manufacturing and Development': ['Manufacturing', 'Engineering', 'Corperate', 'Production', 'Extraction', 'Fuel', 'Development', 'Research', 'Refining', 'Internet']
}

# Loops through all industries, searches and finds matching keywords, creates a boolean mask, and then assigns it to one of the above categories.
for category, keywords in keywords_map.items():
    sortdef = r'\b(' + '|'.join(keywords) + r')\b'
    bmask = df_consumer['Industry'].str.contains(sortdef, case=False)
    df_consumer.loc[bmask, 'Consumer Category'] = category


# --> samples data for proofing and checking
print("number of industries assigned to each category:")
print(df_consumer['Consumer Category'].value_counts())

print("\nsample of mapped industries:")
print(df_consumer[['Industry', 'Consumer Category']].sample(10))

Creating a prototype bar plot to display carbon footprint in each created category

In [None]:
import seaborn as sns

# average emission factor for each consumer categ
emission_factor_avg = df_consumer.groupby('Consumer Category')['Total_Emissions'].mean().sort_values(ascending=False)

print("\ghg emissions by consumer spending category:")
print(emission_factor_avg)


sns.set_style('ticks')
plt.figure(figsize=(12, 8))
sns.barplot(x=emission_factor_avg.values, y=emission_factor_avg.index, palette='crest')
plt.title('Average Emissions by Consumer Spending Category')
plt.xlabel('Average Emission Factor (kg CO2e / 2022 USD spent)')
plt.ylabel('Consumer Category')
plt.grid(axis='x', linestyle='--')
plt.show()

In [6]:

import pandas as pd
import matplotlib as plt
pd.read_csv('/content/drive/MyDrive/Carbon-Emissions_CS_DSRP/export_value.csv')

Unnamed: 0,Commodity,Total Exports Value ($US)
0,,
1,All Commodities,183013072069
2,111 Agricultural Products,5749701972
3,1111 Oilseeds & Grains,3094052185
4,11111 Soybeans,699526766
...,...,...
980,980000 Goods Returned (exports For Canada Only),18996749
981,990 Other Special Classification Provisions,6502520105
982,9900 Other Special Classification Provisions,6502520105
983,99000 Other Special Classification Provisions,6502520105
