# US Sustainability Analysis

Subset of the Sustainability_Analysis.ipynb that focuses on only the US

12.4 By 2020, achieve the environmentally sound management of chemicals and all wastes throughout their life cycle, in accordance with agreed international frameworks, and significantly reduce their release to air, water and soil in order to minimize their adverse impacts on human health and the environment

## Import Dependencies

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import seaborn as sns
import os

# Read in xlsx files related to UN's Goal of Sustainability

## Electronic Waste Datasets

In [2]:
# 1st xlsx: electronic waste generated per capita (kg) 
# Goal 12, Target 12.4, Indicator 12.4.2
e_waste_pc = pd.read_excel('data/EN_EWT_GENPCAP.xlsx')
e_waste_pc.head(2)

Unnamed: 0,Goal,Target,Indicator,SeriesCode,SeriesDescription,GeoAreaCode,GeoAreaName,Nature,Reporting Type,Units,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,12,12.4,12.4.2,EN_EWT_GENPCAP,"Electronic waste generated, per capita (Kg)",4,Afghanistan,E,G,KG,...,0.254903,0.297841,0.342332,0.385085,0.427947,0.468858,0.509681,0.549503,0.592546,0.630326
1,12,12.4,12.4.2,EN_EWT_GENPCAP,"Electronic waste generated, per capita (Kg)",2,Africa,E,G,KG,...,1.736518,1.882356,1.991023,2.095194,2.187254,2.262542,2.333871,2.384393,2.438595,2.486051


In [12]:
# # Create a list of the years

# year_df = e_waste_pc[['2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008',
#        '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017',
#        '2018', '2019']]
# year_df.head(2)

Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,0.064123,0.068659,0.074788,0.083986,0.094898,0.109521,0.127797,0.149665,0.179011,0.215028,0.254903,0.297841,0.342332,0.385085,0.427947,0.468858,0.509681,0.549503,0.592546,0.630326
1,0.850446,0.904585,0.961481,1.021856,1.091787,1.1717,1.263077,1.36957,1.488206,1.611851,1.736518,1.882356,1.991023,2.095194,2.187254,2.262542,2.333871,2.384393,2.438595,2.486051


### Test Data with select countries

In [3]:
# Create a list of the columns

columns = ['2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', 
       '2008', '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', 
       '2017', '2018', '2019']

#  Create a new df for "US" with the columns. 
us_e_waste_pc = e_waste_pc.loc[e_waste_pc["GeoAreaName"] == "United States of America",  columns]
us_e_waste_pc.head()

Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
199,11.728266,12.280305,12.839701,13.447392,14.106024,14.778751,15.475267,16.123013,16.716385,17.286182,17.816548,18.36893,18.95407,19.494872,19.925803,20.26028,20.475782,20.659757,20.824843,20.991704


In [4]:
us_e_waste_df = pd.DataFrame({
    
    "Year":
       ['2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008',
       '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017',
       '2018', '2019'],
    "Electronic waste generated, per capita (Kg)": 
        ["11.728266", "12.280305", "12.839701", "13.447392", "14.106024", "14.778751", 
        "15.475267", "16.123013", "16.716385", "17.286182", "17.816548", "18.36893", 
        "18.95407", "19.494872", "19.925803", "20.26028", "20.475782", "20.659757", 
        "20.824843", "20.991704"]})

us_e_waste_df

Unnamed: 0,Year,"Electronic waste generated, per capita (Kg)"
0,2000,11.728266
1,2001,12.280305
2,2002,12.839701
3,2003,13.447392
4,2004,14.106024
5,2005,14.778751
6,2006,15.475267
7,2007,16.123013
8,2008,16.716385
9,2009,17.286182


In [10]:
# # Write to csv file
# us_e_waste_df.to_csv("US_data/us_electronic_waste_pc.csv", encoding="utf-8", index=False)

In [5]:
# # 2nd xlsx: electronic waste generated in metric unit Tonnes
# # Goal 12, Target 12.4, Indicator 12.4.2
# e_waste_total = pd.read_excel('data/EN_EWT_GENV.xlsx')
# e_waste_total.head(2)

Unnamed: 0,Goal,Target,Indicator,SeriesCode,SeriesDescription,GeoAreaCode,GeoAreaName,Nature,Reporting Type,Units,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,12,12.4,12.4.2,EN_EWT_GENV,Electronic waste generated (Tonnes),4,Afghanistan,E,G,TONNES,...,7341.974,8848.573,10508.56,12219.51,14018.68,15817.39,17663.5,19523.85,21343.52,23013.19
1,12,12.4,12.4.2,EN_EWT_GENV,Electronic waste generated (Tonnes),2,Africa,E,G,TONNES,...,1623176.0,1783923.0,1943366.0,2097974.0,2244913.0,2382539.0,2515126.0,2649666.0,2780043.0,2905206.0


## Electronic Waste Recycling Datasets

In [11]:
# 3rd xlsx: electronic waste recycling, per capita (kg) 
# Goal 12, Target 12.4, Indicator 12.4.2
e_recycling_pc = pd.read_excel('data/EN_EWT_RCYPCAP.xlsx')
e_recycling_pc.head(2)

Unnamed: 0,Goal,Target,Indicator,SeriesCode,SeriesDescription,GeoAreaCode,GeoAreaName,Nature,Reporting Type,Units,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,12,12.4,12.4.2,EN_EWT_RCYPCAP,"Electronic waste recycling, per capita (Kg)",8,Albania,E,G,KG,,,,3.237495,4.242644,4.889079,5.103762,5.303011,5.477067,5.639709
1,12,12.4,12.4.2,EN_EWT_RCYPCAP,"Electronic waste recycling, per capita (Kg)",28,Antigua and Barbuda,E,G,KG,,,,,11.883926,11.755779,12.078921,12.38594,12.666956,12.948181


In [14]:
e_recycling_pc.columns

Index(['Goal', 'Target', 'Indicator', 'SeriesCode', 'SeriesDescription',
       'GeoAreaCode', 'GeoAreaName', 'Nature', 'Reporting Type', 'Units',
       '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018',
       '2019'],
      dtype='object')

In [16]:
# Create a list of the columns

columns = ['2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008',
       '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017',
       '2018', '2019']

#  Create a new df for "US" with the columns. 
us_e_recycling_pc = e_recycling_pc.loc[e_recycling_pc["GeoAreaName"] == "United States of America",  columns]
us_e_recycling_pc.head()

Unnamed: 0,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
80,,,,,,,,,,,2.100246,2.727996,3.184268,4.0139,4.393212,3.833229,3.312529,3.137643,3.162841,3.190476


In [18]:
us_e_recycling_pc = pd.DataFrame({
    
    "Year":
       ['2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008',
       '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017',
       '2018', '2019'],
    "Electronic waste recycling, per capita (Kg)": 
        ["NaN", "NaN", "NaN", "NaN", "NaN", "NaN", 
        "NaN", "NaN", "NaN", "NaN", "2.100246", "2.727996", 
        "3.184268", "4.0139", "4.393212", "3.833229", "3.312529", "3.137643", 
        "20.824843", "20.991704"]})

us_e_recycling_pc

Unnamed: 0,Year,"Electronic waste recycling, per capita (Kg)"
0,2000,11.728266
1,2001,12.280305
2,2002,12.839701
3,2003,13.447392
4,2004,14.106024
5,2005,14.778751
6,2006,15.475267
7,2007,16.123013
8,2008,16.716385
9,2009,17.286182


In [9]:
# Write to csv file
e_recycling_pc.to_csv("US_data/us_electronic_waste_recycling_pc.csv", encoding="utf-8", index=False)

In [12]:
# # 4th xlsx: electronic waste recycling, in metric tonnes
# # Goal 12, Target 12.4, Indicator 12.4.2
# e_recycling_total = pd.read_excel('data/EN_EWT_RCYV.xlsx')
# e_recycling_total.head(2)

Unnamed: 0,Goal,Target,Indicator,SeriesCode,SeriesDescription,GeoAreaCode,GeoAreaName,Nature,Reporting Type,Units,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,12,12.4,12.4.2,EN_EWT_RCYV,Electronic waste recycling (Tonnes),8,Albania,E,G,TONNES,,,,9401.0,12288.0,14132.0,14731.64,15294.78,15788.93,16247.51
1,12,12.4,12.4.2,EN_EWT_RCYV,Electronic waste recycling (Tonnes),28,Antigua and Barbuda,E,G,TONNES,,,,,1100.0,1100.0,1141.7,1181.93,1219.6,1257.46


## Hazardous Waste Datasets

In [16]:
# 5th xlsx: hazardous waste generated, per unit of GDP
# Goal 12, Target 12.4, Indicator 12.4.2
haz_waste_gen_GDP = pd.read_excel('data/EN_HAZ_GENGDP.xlsx')
haz_waste_gen_GDP.head(2)

Unnamed: 0,Goal,Target,Indicator,SeriesCode,SeriesDescription,GeoAreaCode,GeoAreaName,Nature,Reporting Type,Units,...,2007,2008,2010,2011,2012,2013,2014,2015,2016,2017
0,12,12.4,12.4.2,EN_HAZ_GENGDP,"Hazardous waste generated, per unit of GDP (ki...",12,Algeria,C,G,KG_PER_CON_USD,...,0.002486,,,,,,,,,
1,12,12.4,12.4.2,EN_HAZ_GENGDP,"Hazardous waste generated, per unit of GDP (ki...",20,Andorra,C,G,KG_PER_CON_USD,...,,,,,,0.000358,0.000746,0.000636,0.000671,0.000524


In [None]:
haz_waste_gen_GDP = pd.DataFrame({
    
    "Year":
       ['2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008',
       '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017',
       '2018', '2019'],
    "Hazardous waste generated, per unit of GDP (kg)": 
        ["11.728266", "12.280305", "12.839701", "13.447392", "14.106024", "14.778751", 
        "15.475267", "16.123013", "16.716385", "17.286182", "17.816548", "18.36893", 
        "18.95407", "19.494872", "19.925803", "20.26028", "20.475782", "20.659757", 
        "20.824843", "20.991704"]})

haz_waste_gen_GDP


# Write to csv file
haz_waste_gen_GDP.to_csv("US_data/hazardous_waste.csv", encoding="utf-8", index=False)

In [19]:
# 6th xlsx: hazardous waste generated, in metric tonnes
# Goal 12, Target 12.4, Indicator 12.4.2
haz_waste_gen_total = pd.read_excel('data/EN_HAZ_GENV.xlsx')
haz_waste_gen_total.head(2)

Unnamed: 0,Goal,Target,Indicator,SeriesCode,SeriesDescription,GeoAreaCode,GeoAreaName,Nature,Reporting Type,Units,...,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
0,12,12.4,12.4.2,EN_HAZ_GENV,Hazardous waste generated (Tonnes),12,Algeria,C,G,TONNES,...,,,,,,,,,,
1,12,12.4,12.4.2,EN_HAZ_GENV,Hazardous waste generated (Tonnes),20,Andorra,C,G,TONNES,...,,,,,,976.0,2081.0,1788.0,1923.40002,1527.5


### Merge the two hazardous waste generated dataframes

In [22]:
# 7th xlsx: hazardous waste treated, by type of treatment
# Goal 12, Target 12.4, Indicator 12.4.2
haz_waste_treated = pd.read_excel('data/EN_HAZ_TREATV.xlsx')
haz_waste_treated.head(2)

Unnamed: 0,Goal,Target,Indicator,SeriesCode,SeriesDescription,GeoAreaCode,GeoAreaName,Nature,Reporting Type,Type of waste treatment,...,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
0,12,12.4,12.4.2,EN_HAZ_TREATV,"Hazardous waste treated, by type of treatment ...",28,Antigua and Barbuda,C,G,LANDFIL,...,100.860001,52.959999,23.059999,157.220001,7.94,7.16,13.6,16.6,,
1,12,12.4,12.4.2,EN_HAZ_TREATV,"Hazardous waste treated, by type of treatment ...",51,Armenia,C,G,INCINRT,...,1359.0,456.0,300.0,12.0,167.399994,17.0,2379.0,289.700012,604.900024,582.799988


In [10]:
haz_waste_treated = pd.DataFrame({
    
    "Year":
       ['2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008',
       '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017',
       '2018', '2019'],
    "Hazardous waste generated, per unit of GDP (kg)": 
        ["11.728266", "12.280305", "12.839701", "13.447392", "14.106024", "14.778751", 
        "15.475267", "16.123013", "16.716385", "17.286182", "17.816548", "18.36893", 
        "18.95407", "19.494872", "19.925803", "20.26028", "20.475782", "20.659757", 
        "20.824843", "20.991704"]})

haz_waste_treated


Unnamed: 0,Year,"Hazardous waste generated, per unit of GDP (kg)"
0,2000,11.728266
1,2001,12.280305
2,2002,12.839701
3,2003,13.447392
4,2004,14.106024
5,2005,14.778751
6,2006,15.475267
7,2007,16.123013
8,2008,16.716385
9,2009,17.286182


In [None]:
merged_us_df = pd.merge(haz_waste_treated, haz_waste_gen_GDP, e_recycling_pc, us_e_waste_df , on="Year")
merged_us_df

## Domestic Material Consumption Datasets

In [27]:
# 8th xlsx: domestic material consumption per capita, by type of product
# Goal 12, Target 12.2, Indicator 12.2.2
dom_mat_consumption_pc = pd.read_excel('data/EN_MAT_DOMCMPC.xlsx')
dom_mat_consumption_pc.head(2)

Unnamed: 0,Goal,Target,Indicator,SeriesCode,SeriesDescription,GeoAreaCode,GeoAreaName,Nature,Reporting Type,Type of product,...,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
0,12,12.2,12.2.2,EN_MAT_DOMCMPC,"Domestic material consumption per capita, by t...",4,Afghanistan,E,G,BIM,...,1.72341,1.97509,1.93652,1.82431,1.90564,1.83294,1.82012,1.80205,1.78902,1.77893
1,12,12.2,12.2.2,EN_MAT_DOMCMPC,"Domestic material consumption per capita, by t...",4,Afghanistan,E,G,COL,...,0.01271,0.01785,0.02517,0.0244,0.02443,0.04246,0.04633,0.04798,0.04962,0.05124


In [28]:
dom_mat_consumption_pc.columns

Index(['Goal', 'Target', 'Indicator', 'SeriesCode', 'SeriesDescription',
       'GeoAreaCode', 'GeoAreaName', 'Nature', 'Reporting Type',
       'Type of product', 'Units', '2000', '2001', '2002', '2003', '2004',
       '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013',
       '2014', '2015', '2016', '2017'],
      dtype='object')

In [29]:
# Only keep necessary columns
dom_mat_consumption_pc = dom_mat_consumption_pc[['Target', 'Indicator', 'SeriesDescription', 'GeoAreaName', 'Type of product', 'Units',
        '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008',
       '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017']]
dom_mat_consumption_pc.head(2)

Unnamed: 0,Target,Indicator,SeriesDescription,GeoAreaName,Type of product,Units,2000,2001,2002,2003,...,2007,2008,2010,2011,2012,2013,2014,2015,2016,2017
0,12.2,12.2.2,"Domestic material consumption per capita, by t...",Afghanistan,BIM,TONNES,2.26784,1.96185,2.07472,2.01228,...,1.79459,1.72341,1.93652,1.82431,1.90564,1.83294,1.82012,1.80205,1.78902,1.77893
1,12.2,12.2.2,"Domestic material consumption per capita, by t...",Afghanistan,COL,TONNES,5e-05,0.00124,0.00096,0.00152,...,0.00913,0.01271,0.02517,0.0244,0.02443,0.04246,0.04633,0.04798,0.04962,0.05124


In [30]:
# 9th xlsx: domestic material consumption per unit of GDP, by type of product
# Goal 12, Target 12.2, Indicator 12.2.2
dom_mat_consumption_GDP = pd.read_excel('data/EN_MAT_DOMCMPG.xlsx')
dom_mat_consumption_GDP.head(2)

Unnamed: 0,Goal,Target,Indicator,SeriesCode,SeriesDescription,GeoAreaCode,GeoAreaName,Nature,Reporting Type,Type of product,...,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
0,12,12.2,12.2.2,EN_MAT_DOMCMPG,"Domestic material consumption per unit of GDP,...",4,Afghanistan,E,G,BIM,...,3.54062,3.55151,3.46922,3.10169,3.0179,2.81812,2.80302,2.91022,2.86592,2.85037
1,12,12.2,12.2.2,EN_MAT_DOMCMPG,"Domestic material consumption per unit of GDP,...",4,Afghanistan,E,G,COL,...,0.02612,0.03211,0.04509,0.04149,0.03869,0.06527,0.07135,0.07749,0.07949,0.0821


In [31]:
# Only keep necessary columns
dom_mat_consumption_GDP = dom_mat_consumption_GDP[['Target', 'Indicator', 'SeriesDescription', 'GeoAreaName', 'Type of product', 'Units',
        '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008',
       '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017']]
dom_mat_consumption_GDP.head(2)

Unnamed: 0,Target,Indicator,SeriesDescription,GeoAreaName,Type of product,Units,2000,2001,2002,2003,...,2007,2008,2010,2011,2012,2013,2014,2015,2016,2017
0,12.2,12.2.2,"Domestic material consumption per unit of GDP,...",Afghanistan,BIM,KG_PER_CON_USD,8.42728,8.06455,5.41873,5.09579,...,3.35689,3.54062,3.46922,3.10169,3.0179,2.81812,2.80302,2.91022,2.86592,2.85037
1,12.2,12.2.2,"Domestic material consumption per unit of GDP,...",Afghanistan,COL,KG_PER_CON_USD,0.00018,0.0051,0.0025,0.00384,...,0.01708,0.02612,0.04509,0.04149,0.03869,0.06527,0.07135,0.07749,0.07949,0.0821


In [32]:
# 10th xlsx: domestic material consumption, by type of raw product
# Goal 12, Target 12.2, Indicator 12.2.2
dom_mat_consumption_total = pd.read_excel('data/EN_MAT_DOMCMPT.xlsx')
dom_mat_consumption_total.head(2)

Unnamed: 0,Goal,Target,Indicator,SeriesCode,SeriesDescription,GeoAreaCode,GeoAreaName,Nature,Reporting Type,Type of product,...,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
0,12,12.2,12.2.2,EN_MAT_DOMCMPT,"Domestic material consumption, by type of raw ...",4,Afghanistan,E,G,BIM,...,47038880.0,55310950.0,55777840.0,54197770.0,58497430.0,58162420.0,59623400.0,60794940.0,62000240.0,63205540.0
1,12,12.2,12.2.2,EN_MAT_DOMCMPT,"Domestic material consumption, by type of raw ...",4,Afghanistan,E,G,COL,...,347005.9,500008.4,725012.2,725012.2,750012.6,1347192.0,1517746.0,1618705.0,1719664.0,1820623.0


In [33]:
# Only keep necessary columns
dom_mat_consumption_total = dom_mat_consumption_total[['Target', 'Indicator', 'SeriesDescription', 'GeoAreaName', 'Type of product', 'Units',
        '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008',
       '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017']]
dom_mat_consumption_total.head(2)

Unnamed: 0,Target,Indicator,SeriesDescription,GeoAreaName,Type of product,Units,2000,2001,2002,2003,...,2007,2008,2010,2011,2012,2013,2014,2015,2016,2017
0,12.2,12.2.2,"Domestic material consumption, by type of raw ...",Afghanistan,BIM,TONNES,45569430.0,41132970.0,45602090.0,46412990.0,...,47766150.0,47038880.0,55777840.0,54197770.0,58497430.0,58162420.0,59623400.0,60794940.0,62000240.0,63205540.0
1,12.2,12.2.2,"Domestic material consumption, by type of raw ...",Afghanistan,COL,TONNES,1000.016,26000.44,21000.36,35000.59,...,243004.1,347005.9,725012.2,725012.2,750012.6,1347192.0,1517746.0,1618705.0,1719664.0,1820623.0


### Merge the domestic material consumption dataframes

In [34]:
merged_dom_mat_consumption = pd.merge(dom_mat_consumption_pc, dom_mat_consumption_GDP, on="GeoAreaName")
merged_dom_mat_consumption.head(2)

Unnamed: 0,Target_x,Indicator_x,SeriesDescription_x,GeoAreaName,Type of product_x,Units_x,2000_x,2001_x,2002_x,2003_x,...,2007_y,2008_y,2010_y,2011_y,2012_y,2013_y,2014_y,2015_y,2016_y,2017_y
0,12.2,12.2.2,"Domestic material consumption per capita, by t...",Afghanistan,BIM,TONNES,2.26784,1.96185,2.07472,2.01228,...,3.35689,3.54062,3.46922,3.10169,3.0179,2.81812,2.80302,2.91022,2.86592,2.85037
1,12.2,12.2.2,"Domestic material consumption per capita, by t...",Afghanistan,BIM,TONNES,2.26784,1.96185,2.07472,2.01228,...,0.01708,0.02612,0.04509,0.04149,0.03869,0.06527,0.07135,0.07749,0.07949,0.0821


In [35]:
merged_dom_mat_consumption_overall = pd.merge(dom_mat_consumption_total, merged_dom_mat_consumption, on="GeoAreaName")
merged_dom_mat_consumption_overall.head(2)

Unnamed: 0,Target,Indicator,SeriesDescription,GeoAreaName,Type of product,Units,2000,2001,2002,2003,...,2007_y,2008_y,2010_y,2011_y,2012_y,2013_y,2014_y,2015_y,2016_y,2017_y
0,12.2,12.2.2,"Domestic material consumption, by type of raw ...",Afghanistan,BIM,TONNES,45569430.0,41132970.0,45602090.0,46412990.0,...,3.35689,3.54062,3.46922,3.10169,3.0179,2.81812,2.80302,2.91022,2.86592,2.85037
1,12.2,12.2.2,"Domestic material consumption, by type of raw ...",Afghanistan,BIM,TONNES,45569430.0,41132970.0,45602090.0,46412990.0,...,0.01708,0.02612,0.04509,0.04149,0.03869,0.06527,0.07135,0.07749,0.07949,0.0821


In [36]:
# # Visualize pairplots using seaborn
# sns.pairplot(merged_dom_mat_consumption_overall)

### Now we have narrowed down the datasets for the different topics of sustainability: 
1. Target 12.4 Electronic waste
2. Target 12.4 Electronic waste recycling
3. Target 12.4 Hazardous waste
4. Target 12.2 Domestic material consumption

Merge these dataframes to perform analysis

In [37]:
# Merge the datasets related to Target 12.4 
merged_electronics = pd.merge(merged_e_waste, merged_e_recycling, on="GeoAreaName")
# merged_electronics.head(2)
merged_e_haz = pd.merge(merged_electronics, merged_haz_waste_overall, on="GeoAreaName")

In [38]:
# Write to csv file
merged_e_haz.to_csv("electronics_sustainability.csv", encoding="utf-8", index=False)

In [39]:
print(merged_e_haz.info())
merged_e_haz.dropna(inplace=True)
print(merged_e_haz.info())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 127 entries, 0 to 126
Columns: 141 entries, Target_x_x to 2017
dtypes: float64(118), object(23)
memory usage: 140.9+ KB
None
<class 'pandas.core.frame.DataFrame'>
Int64Index: 3 entries, 121 to 123
Columns: 141 entries, Target_x_x to 2017
dtypes: float64(118), object(23)
memory usage: 3.3+ KB
None


In [40]:
merged_e_haz

Unnamed: 0,Target_x_x,Indicator_x_x,SeriesDescription_x_x,GeoAreaName,Units_x_x,2000_x_x,2001_x_x,2002_x_x,2003_x_x,2004_x_x,...,2007,2008,2010,2011,2012,2013,2014,2015,2016,2017
121,12.4,12.4.2,"Electronic waste generated, per capita (Kg)",Ukraine,KG,1.810717,1.95,2.10486,2.281523,2.472581,...,75388.0,56288.0,16473.0,15612.0,14007.0,15097.0,8273.0,5808.0,6232.0,8748.0
122,12.4,12.4.2,"Electronic waste generated, per capita (Kg)",Ukraine,KG,1.810717,1.95,2.10486,2.281523,2.472581,...,990591.0,1066308.0,126849.0,138455.0,146760.0,102983.0,81584.0,78650.0,102517.0,96810.0
123,12.4,12.4.2,"Electronic waste generated, per capita (Kg)",Ukraine,KG,1.810717,1.95,2.10486,2.281523,2.472581,...,995424.0,894834.0,642392.0,597529.0,541356.0,439049.0,327134.0,314543.0,337658.0,305545.0


In [41]:
# # Target 12.4 data
# merged_e_haz.corr()

In [42]:
# # Target 12.2 data
# merged_dom_mat_consumption_overall.corr()

In [43]:
# # Final dataframe merge
# sustainability_df = pd.merge(merged_electronics, merged_haz_dom, on="GeoAreaName")
# sustainability_df.head(1)