# Sustainability Analysis

### This project analyzes the data related to the United Nation's Global Sustainable Development Goal 12: "Ensure sustainable consumption and production patterns," in order to visualize trends and predict if the targets will be met by the goal target's deadlines of 2020 and 2030.

Imported xlsx files and information on goal 12 are from the UN's SDG database website: https://unstats.un.org/sdgs/indicators/database with a total of 375,264 data observations related to the sustainability goal.

# Import Dependencies

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
import seaborn as sns
import os

# Read in xlsx files related to UN's Goal of Sustainability

## Electronic Waste Datasets

In [3]:
# 1st xlsx: electronic waste generated, per capita (kg) 
# Goal 12, Target 12.4, Indicator 12.4.2
e_waste_pc = pd.read_excel('data/EN_EWT_GENPCAP.xlsx')
e_waste_pc.head(2)

Unnamed: 0,Goal,Target,Indicator,SeriesCode,SeriesDescription,GeoAreaCode,GeoAreaName,Nature,Reporting Type,Units,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,12,12.4,12.4.2,EN_EWT_GENPCAP,"Electronic waste generated, per capita (Kg)",4,Afghanistan,E,G,KG,...,0.254903,0.297841,0.342332,0.385085,0.427947,0.468858,0.509681,0.549503,0.592546,0.630326
1,12,12.4,12.4.2,EN_EWT_GENPCAP,"Electronic waste generated, per capita (Kg)",2,Africa,E,G,KG,...,1.736518,1.882356,1.991023,2.095194,2.187254,2.262542,2.333871,2.384393,2.438595,2.486051


In [4]:
e_waste_pc.columns

Index(['Goal', 'Target', 'Indicator', 'SeriesCode', 'SeriesDescription',
       'GeoAreaCode', 'GeoAreaName', 'Nature', 'Reporting Type', 'Units',
       '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008',
       '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017',
       '2018', '2019'],
      dtype='object')

In [4]:
# Only keep necessary columns
e_waste_pc = e_waste_pc[['Target', 'Indicator', 'SeriesDescription', 'GeoAreaName', 'Units',
       '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008',
       '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017',
       '2018', '2019']]
e_waste_pc.head(2)

Unnamed: 0,Target,Indicator,SeriesDescription,GeoAreaName,Units,2000,2001,2002,2003,2004,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,12.4,12.4.2,"Electronic waste generated, per capita (Kg)",Afghanistan,KG,0.064123,0.068659,0.074788,0.083986,0.094898,...,0.254903,0.297841,0.342332,0.385085,0.427947,0.468858,0.509681,0.549503,0.592546,0.630326
1,12.4,12.4.2,"Electronic waste generated, per capita (Kg)",Africa,KG,0.850446,0.904585,0.961481,1.021856,1.091787,...,1.736518,1.882356,1.991023,2.095194,2.187254,2.262542,2.333871,2.384393,2.438595,2.486051


In [5]:
# 2nd xlsx: electronic waste generated in metric unit Tonnes
# Goal 12, Target 12.4, Indicator 12.4.2
e_waste_total = pd.read_excel('data/EN_EWT_GENV.xlsx')
e_waste_total.head(2)

Unnamed: 0,Goal,Target,Indicator,SeriesCode,SeriesDescription,GeoAreaCode,GeoAreaName,Nature,Reporting Type,Units,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,12,12.4,12.4.2,EN_EWT_GENV,Electronic waste generated (Tonnes),4,Afghanistan,E,G,TONNES,...,7341.974,8848.573,10508.56,12219.51,14018.68,15817.39,17663.5,19523.85,21343.52,23013.19
1,12,12.4,12.4.2,EN_EWT_GENV,Electronic waste generated (Tonnes),2,Africa,E,G,TONNES,...,1623176.0,1783923.0,1943366.0,2097974.0,2244913.0,2382539.0,2515126.0,2649666.0,2780043.0,2905206.0


In [7]:
# Only keep necessary columns
e_waste_total = e_waste_total[['Target', 'Indicator', 'SeriesDescription', 'GeoAreaName', 'Units',
       '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008',
       '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017',
       '2018', '2019']]
e_waste_total.head(2)

Unnamed: 0,Target,Indicator,SeriesDescription,GeoAreaName,Units,2000,2001,2002,2003,2004,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,12.4,12.4.2,Electronic waste generated (Tonnes),Afghanistan,TONNES,1310.912736,1462.783356,1660.511207,1941.41641,2279.356371,...,7341.974,8848.573,10508.56,12219.51,14018.68,15817.39,17663.5,19523.85,21343.52,23013.19
1,12.4,12.4.2,Electronic waste generated (Tonnes),Africa,TONNES,620085.609183,676266.228815,735815.811708,801704.357828,878039.257946,...,1623176.0,1783923.0,1943366.0,2097974.0,2244913.0,2382539.0,2515126.0,2649666.0,2780043.0,2905206.0


### Merge the electronic waste dataframes

In [8]:
# Merge the two dataframes 
merged_e_waste = pd.merge(e_waste_pc, e_waste_total, on="GeoAreaName")
merged_e_waste.head(2)

Unnamed: 0,Target_x,Indicator_x,SeriesDescription_x,GeoAreaName,Units_x,2000_x,2001_x,2002_x,2003_x,2004_x,...,2010_y,2011_y,2012_y,2013_y,2014_y,2015_y,2016_y,2017_y,2018_y,2019_y
0,12.4,12.4.2,"Electronic waste generated, per capita (Kg)",Afghanistan,KG,0.064123,0.068659,0.074788,0.083986,0.094898,...,7341.974,8848.573,10508.56,12219.51,14018.68,15817.39,17663.5,19523.85,21343.52,23013.19
1,12.4,12.4.2,"Electronic waste generated, per capita (Kg)",Africa,KG,0.850446,0.904585,0.961481,1.021856,1.091787,...,1623176.0,1783923.0,1943366.0,2097974.0,2244913.0,2382539.0,2515126.0,2649666.0,2780043.0,2905206.0


## Electronic Waste Recycling Datasets

In [9]:
# 3rd xlsx: electronic waste recycling, per capita (kg) 
# Goal 12, Target 12.4, Indicator 12.4.2
e_recycling_pc = pd.read_excel('data/EN_EWT_RCYPCAP.xlsx')
e_recycling_pc.head(2)

Unnamed: 0,Goal,Target,Indicator,SeriesCode,SeriesDescription,GeoAreaCode,GeoAreaName,Nature,Reporting Type,Units,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,12,12.4,12.4.2,EN_EWT_RCYPCAP,"Electronic waste recycling, per capita (Kg)",8,Albania,E,G,KG,,,,3.237495,4.242644,4.889079,5.103762,5.303011,5.477067,5.639709
1,12,12.4,12.4.2,EN_EWT_RCYPCAP,"Electronic waste recycling, per capita (Kg)",28,Antigua and Barbuda,E,G,KG,,,,,11.883926,11.755779,12.078921,12.38594,12.666956,12.948181


In [12]:
e_recycling_pc.columns

Index(['Goal', 'Target', 'Indicator', 'SeriesCode', 'SeriesDescription',
       'GeoAreaCode', 'GeoAreaName', 'Nature', 'Reporting Type', 'Units',
       '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018',
       '2019'],
      dtype='object')

In [10]:
# Only keep necessary columns
e_recycling_pc = e_recycling_pc[['Target', 'Indicator', 'SeriesDescription', 'GeoAreaName', 'Units',
       '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018',
       '2019']]
e_recycling_pc.head(2)

Unnamed: 0,Target,Indicator,SeriesDescription,GeoAreaName,Units,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,12.4,12.4.2,"Electronic waste recycling, per capita (Kg)",Albania,KG,,,,3.237495,4.242644,4.889079,5.103762,5.303011,5.477067,5.639709
1,12.4,12.4.2,"Electronic waste recycling, per capita (Kg)",Antigua and Barbuda,KG,,,,,11.883926,11.755779,12.078921,12.38594,12.666956,12.948181


In [11]:
# 4th xlsx: electronic waste recycling, in metric tonnes
# Goal 12, Target 12.4, Indicator 12.4.2
e_recycling_total = pd.read_excel('data/EN_EWT_RCYV.xlsx')
e_recycling_total.head(2)

Unnamed: 0,Goal,Target,Indicator,SeriesCode,SeriesDescription,GeoAreaCode,GeoAreaName,Nature,Reporting Type,Units,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,12,12.4,12.4.2,EN_EWT_RCYV,Electronic waste recycling (Tonnes),8,Albania,E,G,TONNES,,,,9401.0,12288.0,14132.0,14731.64,15294.78,15788.93,16247.51
1,12,12.4,12.4.2,EN_EWT_RCYV,Electronic waste recycling (Tonnes),28,Antigua and Barbuda,E,G,TONNES,,,,,1100.0,1100.0,1141.7,1181.93,1219.6,1257.46


In [12]:
# Only keep necessary columns
e_recycling_total = e_recycling_total[['Target', 'Indicator', 'SeriesDescription', 'GeoAreaName', 'Units',
       '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018',
       '2019']]
e_recycling_total.head(2)

Unnamed: 0,Target,Indicator,SeriesDescription,GeoAreaName,Units,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,12.4,12.4.2,Electronic waste recycling (Tonnes),Albania,TONNES,,,,9401.0,12288.0,14132.0,14731.64,15294.78,15788.93,16247.51
1,12.4,12.4.2,Electronic waste recycling (Tonnes),Antigua and Barbuda,TONNES,,,,,1100.0,1100.0,1141.7,1181.93,1219.6,1257.46


### Merge the electronic waste recycling dataframes

In [22]:
# Merge the two dataframes 
merged_e_recycling = pd.merge(e_recycling_total, e_recycling_pc, on="GeoAreaName")
merged_e_recycling.head(2)

Unnamed: 0,Target_x,Indicator_x,SeriesDescription_x,GeoAreaName,Units_x,2010_x,2011_x,2012_x,2013_x,2014_x,...,2010_y,2011_y,2012_y,2013_y,2014_y,2015_y,2016_y,2017_y,2018_y,2019_y
0,12.4,12.4.2,Electronic waste recycling (Tonnes),Albania,TONNES,,,,9401.0,12288.0,...,,,,3.237495,4.242644,4.889079,5.103762,5.303011,5.477067,5.639709
1,12.4,12.4.2,Electronic waste recycling (Tonnes),Antigua and Barbuda,TONNES,,,,,1100.0,...,,,,,11.883926,11.755779,12.078921,12.38594,12.666956,12.948181


## Hazardous Waste Datasets

In [2]:
# 5th xlsx: hazardous waste generated, per unit of GDP
# Goal 12, Target 12.4, Indicator 12.4.2
haz_waste_gen_GDP = pd.read_excel('data/EN_HAZ_GENGDP.xlsx')
haz_waste_gen_GDP.head(2)

Unnamed: 0,Goal,Target,Indicator,SeriesCode,SeriesDescription,GeoAreaCode,GeoAreaName,Nature,Reporting Type,Units,...,2007,2008,2010,2011,2012,2013,2014,2015,2016,2017
0,12,12.4,12.4.2,EN_HAZ_GENGDP,"Hazardous waste generated, per unit of GDP (ki...",12,Algeria,C,G,KG_PER_CON_USD,...,0.002486,,,,,,,,,
1,12,12.4,12.4.2,EN_HAZ_GENGDP,"Hazardous waste generated, per unit of GDP (ki...",20,Andorra,C,G,KG_PER_CON_USD,...,,,,,,0.000358,0.000746,0.000636,0.000671,0.000524


In [3]:
haz_waste_gen_GDP.columns

Index(['Goal', 'Target', 'Indicator', 'SeriesCode', 'SeriesDescription',
       'GeoAreaCode', 'GeoAreaName', 'Nature', 'Reporting Type', 'Units',
       '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008',
       '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017'],
      dtype='object')

In [4]:
# Only keep necessary columns
haz_waste_gen_GDP = haz_waste_gen_GDP[['Target', 'Indicator', 'SeriesDescription', 'GeoAreaName', 'Units',
        '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008',
       '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017']]
haz_waste_gen_GDP.head(2)

Unnamed: 0,Target,Indicator,SeriesDescription,GeoAreaName,Units,2000,2001,2002,2003,2004,...,2007,2008,2010,2011,2012,2013,2014,2015,2016,2017
0,12.4,12.4.2,"Hazardous waste generated, per unit of GDP (ki...",Algeria,KG_PER_CON_USD,,,,0.002886,,...,0.002486,,,,,,,,,
1,12.4,12.4.2,"Hazardous waste generated, per unit of GDP (ki...",Andorra,KG_PER_CON_USD,,,,,,...,,,,,,0.000358,0.000746,0.000636,0.000671,0.000524


In [5]:
# 6th xlsx: hazardous waste generated, in metric tonnes
# Goal 12, Target 12.4, Indicator 12.4.2
haz_waste_gen_total = pd.read_excel('data/EN_HAZ_GENV.xlsx')
haz_waste_gen_total.head(2)

Unnamed: 0,Goal,Target,Indicator,SeriesCode,SeriesDescription,GeoAreaCode,GeoAreaName,Nature,Reporting Type,Units,...,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
0,12,12.4,12.4.2,EN_HAZ_GENV,Hazardous waste generated (Tonnes),12,Algeria,C,G,TONNES,...,,,,,,,,,,
1,12,12.4,12.4.2,EN_HAZ_GENV,Hazardous waste generated (Tonnes),20,Andorra,C,G,TONNES,...,,,,,,976.0,2081.0,1788.0,1923.40002,1527.5


In [6]:
# Only keep necessary columns
haz_waste_gen_total = haz_waste_gen_total[['Target', 'Indicator', 'SeriesDescription', 'GeoAreaName', 'Units',
        '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008',
       '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017']]
haz_waste_gen_total.head(2)

Unnamed: 0,Target,Indicator,SeriesDescription,GeoAreaName,Units,2000,2001,2002,2003,2004,...,2007,2008,2010,2011,2012,2013,2014,2015,2016,2017
0,12.4,12.4.2,Hazardous waste generated (Tonnes),Algeria,TONNES,,,,325000.0,,...,325100.0,,,,,,,,,
1,12.4,12.4.2,Hazardous waste generated (Tonnes),Andorra,TONNES,,,,,,...,,,,,,976.0,2081.0,1788.0,1923.40002,1527.5


### Merge the two hazardous waste generated dataframes

In [7]:
# Merge the two dataframes 
merged_haz_waste_gen = pd.merge(haz_waste_gen_total, haz_waste_gen_GDP, on="GeoAreaName")
merged_haz_waste_gen.head(2)

Unnamed: 0,Target_x,Indicator_x,SeriesDescription_x,GeoAreaName,Units_x,2000_x,2001_x,2002_x,2003_x,2004_x,...,2007_y,2008_y,2010_y,2011_y,2012_y,2013_y,2014_y,2015_y,2016_y,2017_y
0,12.4,12.4.2,Hazardous waste generated (Tonnes),Algeria,TONNES,,,,325000.0,,...,0.002486,,,,,,,,,
1,12.4,12.4.2,Hazardous waste generated (Tonnes),Andorra,TONNES,,,,,,...,,,,,,0.000358,0.000746,0.000636,0.000671,0.000524


In [8]:
# 7th xlsx: hazardous waste treated, by type of treatment
# Goal 12, Target 12.4, Indicator 12.4.2
haz_waste_treated = pd.read_excel('data/EN_HAZ_TREATV.xlsx')
haz_waste_treated.head(2)

Unnamed: 0,Goal,Target,Indicator,SeriesCode,SeriesDescription,GeoAreaCode,GeoAreaName,Nature,Reporting Type,Type of waste treatment,...,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
0,12,12.4,12.4.2,EN_HAZ_TREATV,"Hazardous waste treated, by type of treatment ...",28,Antigua and Barbuda,C,G,LANDFIL,...,100.860001,52.959999,23.059999,157.220001,7.94,7.16,13.6,16.6,,
1,12,12.4,12.4.2,EN_HAZ_TREATV,"Hazardous waste treated, by type of treatment ...",51,Armenia,C,G,INCINRT,...,1359.0,456.0,300.0,12.0,167.399994,17.0,2379.0,289.700012,604.900024,582.799988


In [9]:
haz_waste_treated.columns

Index(['Goal', 'Target', 'Indicator', 'SeriesCode', 'SeriesDescription',
       'GeoAreaCode', 'GeoAreaName', 'Nature', 'Reporting Type',
       'Type of waste treatment', 'Units', '2000', '2001', '2002', '2003',
       '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012',
       '2013', '2014', '2015', '2016', '2017'],
      dtype='object')

In [10]:
# Only keep necessary columns
haz_waste_treated = haz_waste_treated[['Target', 'Indicator', 'SeriesDescription', 'GeoAreaName', 'Type of waste treatment', 'Units',
        '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008',
       '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017']]
haz_waste_treated.head(2)

Unnamed: 0,Target,Indicator,SeriesDescription,GeoAreaName,Type of waste treatment,Units,2000,2001,2002,2003,...,2007,2008,2010,2011,2012,2013,2014,2015,2016,2017
0,12.4,12.4.2,"Hazardous waste treated, by type of treatment ...",Antigua and Barbuda,LANDFIL,TONNES,,,,,...,51.759998,100.860001,23.059999,157.220001,7.94,7.16,13.6,16.6,,
1,12.4,12.4.2,"Hazardous waste treated, by type of treatment ...",Armenia,INCINRT,TONNES,,,,,...,,1359.0,300.0,12.0,167.399994,17.0,2379.0,289.700012,604.900024,582.799988


### Merge the hazardous waste generated and hazardous waste treated datasets

In [11]:
merged_haz_waste_overall = pd.merge(merged_haz_waste_gen, haz_waste_treated, on="GeoAreaName")
merged_haz_waste_overall.head(2)

Unnamed: 0,Target_x,Indicator_x,SeriesDescription_x,GeoAreaName,Units_x,2000_x,2001_x,2002_x,2003_x,2004_x,...,2007,2008,2010,2011,2012,2013,2014,2015,2016,2017
0,12.4,12.4.2,Hazardous waste generated (Tonnes),Armenia,TONNES,381578.0,375500.0,377900.0,344095.0,351400.0,...,,1359.0,300.0,12.0,167.399994,17.0,2379.0,289.700012,604.900024,582.799988
1,12.4,12.4.2,Hazardous waste generated (Tonnes),Armenia,TONNES,381578.0,375500.0,377900.0,344095.0,351400.0,...,362275.0,433537.53125,432811.78125,457729.8125,460844.6875,574973.8125,571750.375,551020.0625,601917.375,541012.375


## Domestic Material Consumption Datasets

In [12]:
# 8th xlsx: domestic material consumption per capita, by type of product
# Goal 12, Target 12.2, Indicator 12.2.2
dom_mat_consumption_pc = pd.read_excel('data/EN_MAT_DOMCMPC.xlsx')
dom_mat_consumption_pc.head(2)

Unnamed: 0,Goal,Target,Indicator,SeriesCode,SeriesDescription,GeoAreaCode,GeoAreaName,Nature,Reporting Type,Type of product,...,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
0,12,12.2,12.2.2,EN_MAT_DOMCMPC,"Domestic material consumption per capita, by t...",4,Afghanistan,E,G,BIM,...,1.72341,1.97509,1.93652,1.82431,1.90564,1.83294,1.82012,1.80205,1.78902,1.77893
1,12,12.2,12.2.2,EN_MAT_DOMCMPC,"Domestic material consumption per capita, by t...",4,Afghanistan,E,G,COL,...,0.01271,0.01785,0.02517,0.0244,0.02443,0.04246,0.04633,0.04798,0.04962,0.05124


In [13]:
dom_mat_consumption_pc.columns

Index(['Goal', 'Target', 'Indicator', 'SeriesCode', 'SeriesDescription',
       'GeoAreaCode', 'GeoAreaName', 'Nature', 'Reporting Type',
       'Type of product', 'Units', '2000', '2001', '2002', '2003', '2004',
       '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013',
       '2014', '2015', '2016', '2017'],
      dtype='object')

In [14]:
# Only keep necessary columns
dom_mat_consumption_pc = dom_mat_consumption_pc[['Target', 'Indicator', 'SeriesDescription', 'GeoAreaName', 'Type of product', 'Units',
        '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008',
       '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017']]
dom_mat_consumption_pc.head(2)

Unnamed: 0,Target,Indicator,SeriesDescription,GeoAreaName,Type of product,Units,2000,2001,2002,2003,...,2007,2008,2010,2011,2012,2013,2014,2015,2016,2017
0,12.2,12.2.2,"Domestic material consumption per capita, by t...",Afghanistan,BIM,TONNES,2.26784,1.96185,2.07472,2.01228,...,1.79459,1.72341,1.93652,1.82431,1.90564,1.83294,1.82012,1.80205,1.78902,1.77893
1,12.2,12.2.2,"Domestic material consumption per capita, by t...",Afghanistan,COL,TONNES,5e-05,0.00124,0.00096,0.00152,...,0.00913,0.01271,0.02517,0.0244,0.02443,0.04246,0.04633,0.04798,0.04962,0.05124


In [15]:
# 9th xlsx: domestic material consumption per unit of GDP, by type of product
# Goal 12, Target 12.2, Indicator 12.2.2
dom_mat_consumption_GDP = pd.read_excel('data/EN_MAT_DOMCMPG.xlsx')
dom_mat_consumption_GDP.head(2)

Unnamed: 0,Goal,Target,Indicator,SeriesCode,SeriesDescription,GeoAreaCode,GeoAreaName,Nature,Reporting Type,Type of product,...,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
0,12,12.2,12.2.2,EN_MAT_DOMCMPG,"Domestic material consumption per unit of GDP,...",4,Afghanistan,E,G,BIM,...,3.54062,3.55151,3.46922,3.10169,3.0179,2.81812,2.80302,2.91022,2.86592,2.85037
1,12,12.2,12.2.2,EN_MAT_DOMCMPG,"Domestic material consumption per unit of GDP,...",4,Afghanistan,E,G,COL,...,0.02612,0.03211,0.04509,0.04149,0.03869,0.06527,0.07135,0.07749,0.07949,0.0821


In [16]:
# Only keep necessary columns
dom_mat_consumption_GDP = dom_mat_consumption_GDP[['Target', 'Indicator', 'SeriesDescription', 'GeoAreaName', 'Type of product', 'Units',
        '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008',
       '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017']]
dom_mat_consumption_GDP.head(2)

Unnamed: 0,Target,Indicator,SeriesDescription,GeoAreaName,Type of product,Units,2000,2001,2002,2003,...,2007,2008,2010,2011,2012,2013,2014,2015,2016,2017
0,12.2,12.2.2,"Domestic material consumption per unit of GDP,...",Afghanistan,BIM,KG_PER_CON_USD,8.42728,8.06455,5.41873,5.09579,...,3.35689,3.54062,3.46922,3.10169,3.0179,2.81812,2.80302,2.91022,2.86592,2.85037
1,12.2,12.2.2,"Domestic material consumption per unit of GDP,...",Afghanistan,COL,KG_PER_CON_USD,0.00018,0.0051,0.0025,0.00384,...,0.01708,0.02612,0.04509,0.04149,0.03869,0.06527,0.07135,0.07749,0.07949,0.0821


In [17]:
# 10th xlsx: domestic material consumption, by type of raw product
# Goal 12, Target 12.2, Indicator 12.2.2
dom_mat_consumption_total = pd.read_excel('data/EN_MAT_DOMCMPT.xlsx')
dom_mat_consumption_total.head(2)

Unnamed: 0,Goal,Target,Indicator,SeriesCode,SeriesDescription,GeoAreaCode,GeoAreaName,Nature,Reporting Type,Type of product,...,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017
0,12,12.2,12.2.2,EN_MAT_DOMCMPT,"Domestic material consumption, by type of raw ...",4,Afghanistan,E,G,BIM,...,47038880.0,55310950.0,55777840.0,54197770.0,58497430.0,58162420.0,59623400.0,60794940.0,62000240.0,63205540.0
1,12,12.2,12.2.2,EN_MAT_DOMCMPT,"Domestic material consumption, by type of raw ...",4,Afghanistan,E,G,COL,...,347005.9,500008.4,725012.2,725012.2,750012.6,1347192.0,1517746.0,1618705.0,1719664.0,1820623.0


In [18]:
# Only keep necessary columns
dom_mat_consumption_total = dom_mat_consumption_total[['Target', 'Indicator', 'SeriesDescription', 'GeoAreaName', 'Type of product', 'Units',
        '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008',
       '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017']]
dom_mat_consumption_total.head(2)

Unnamed: 0,Target,Indicator,SeriesDescription,GeoAreaName,Type of product,Units,2000,2001,2002,2003,...,2007,2008,2010,2011,2012,2013,2014,2015,2016,2017
0,12.2,12.2.2,"Domestic material consumption, by type of raw ...",Afghanistan,BIM,TONNES,45569430.0,41132970.0,45602090.0,46412990.0,...,47766150.0,47038880.0,55777840.0,54197770.0,58497430.0,58162420.0,59623400.0,60794940.0,62000240.0,63205540.0
1,12.2,12.2.2,"Domestic material consumption, by type of raw ...",Afghanistan,COL,TONNES,1000.016,26000.44,21000.36,35000.59,...,243004.1,347005.9,725012.2,725012.2,750012.6,1347192.0,1517746.0,1618705.0,1719664.0,1820623.0


### Merge the domestic material consumption dataframes

In [20]:
merged_dom_mat_consumption = pd.merge(dom_mat_consumption_GDP, dom_mat_consumption_pc, on="GeoAreaName")
merged_dom_mat_consumption.head(2)

Unnamed: 0,Target_x,Indicator_x,SeriesDescription_x,GeoAreaName,Type of product_x,Units_x,2000_x,2001_x,2002_x,2003_x,...,2007_y,2008_y,2010_y,2011_y,2012_y,2013_y,2014_y,2015_y,2016_y,2017_y
0,12.2,12.2.2,"Domestic material consumption per unit of GDP,...",Afghanistan,BIM,KG_PER_CON_USD,8.42728,8.06455,5.41873,5.09579,...,1.79459,1.72341,1.93652,1.82431,1.90564,1.83294,1.82012,1.80205,1.78902,1.77893
1,12.2,12.2.2,"Domestic material consumption per unit of GDP,...",Afghanistan,BIM,KG_PER_CON_USD,8.42728,8.06455,5.41873,5.09579,...,0.00913,0.01271,0.02517,0.0244,0.02443,0.04246,0.04633,0.04798,0.04962,0.05124


In [21]:
merged_dom_mat_consumption_overall = pd.merge(dom_mat_consumption_total, merged_dom_mat_consumption, on="GeoAreaName")
merged_dom_mat_consumption_overall.head(2)

Unnamed: 0,Target,Indicator,SeriesDescription,GeoAreaName,Type of product,Units,2000,2001,2002,2003,...,2007_y,2008_y,2010_y,2011_y,2012_y,2013_y,2014_y,2015_y,2016_y,2017_y
0,12.2,12.2.2,"Domestic material consumption, by type of raw ...",Afghanistan,BIM,TONNES,45569430.0,41132970.0,45602090.0,46412990.0,...,1.79459,1.72341,1.93652,1.82431,1.90564,1.83294,1.82012,1.80205,1.78902,1.77893
1,12.2,12.2.2,"Domestic material consumption, by type of raw ...",Afghanistan,BIM,TONNES,45569430.0,41132970.0,45602090.0,46412990.0,...,0.00913,0.01271,0.02517,0.0244,0.02443,0.04246,0.04633,0.04798,0.04962,0.05124
