In [37]:
import pandas as pd
import json
from pprint import pprint

In [38]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from scipy.stats import linregress

In [39]:
df = pd.read_json (r'2017data.json')
df

Unnamed: 0,Country,Year,Total,Coal,Oil,Gas,Cement,Flaring,Other,ISO
0,Afghanistan,2017,0.276852,0.096731,0.170436,0.008429,0.001256,0.000000,0.000000,AFG
1,Afghanistan,2018,0.294876,0.104866,0.180470,0.007990,0.001550,0.000000,0.000000,AFG
2,Afghanistan,2019,0.293401,0.104708,0.181177,0.006502,0.001015,0.000000,0.000000,AFG
3,Afghanistan,2020,0.299746,0.106488,0.185254,0.006447,0.001557,0.000000,0.000000,AFG
4,Afghanistan,2021,0.296119,0.104828,0.185029,0.005956,0.000306,0.000000,0.000000,AFG
...,...,...,...,...,...,...,...,...,...,...
1155,Global,2017,4.749682,1.908857,1.610910,0.940144,0.198416,0.051579,0.039776,WLD
1156,Global,2018,4.792753,1.919213,1.596350,0.979965,0.204225,0.053634,0.039366,WLD
1157,Global,2019,4.775633,1.896468,1.589920,0.984878,0.208309,0.056569,0.039490,WLD
1158,Global,2020,4.497423,1.807760,1.427353,0.963695,0.208844,0.051981,0.037789,WLD


In [40]:
df.dtypes

Country     object
Year         int64
Total      float64
Coal       float64
Oil        float64
Gas        float64
Cement     float64
Flaring    float64
Other      float64
ISO         object
dtype: object

In [41]:
def get_year_df(year):
    df_year = df.loc[df['Year']==year].reset_index(drop=True)
    return df_year

In [42]:
df_2021 = get_year_df(2021)
df_2021.head()

Unnamed: 0,Country,Year,Total,Coal,Oil,Gas,Cement,Flaring,Other,ISO
0,Afghanistan,2021,0.296119,0.104828,0.185029,0.005956,0.000306,0.0,0.0,AFG
1,Albania,2021,1.618066,0.061096,1.137164,0.052631,0.367175,0.0,0.0,ALB
2,Algeria,2021,3.989977,0.024798,1.268066,2.100412,0.249628,0.347072,0.0,DZA
3,Andorra,2021,5.730292,0.0,5.730292,0.0,0.0,0.0,0.0,AND
4,Angola,2021,0.619141,0.0,0.402904,0.085536,0.032568,0.098133,0.0,AGO


In [52]:
df_2021_sorted=df_2021.sort_values(by=['Total'], ascending=False)
df_2021_sorted.head(10)

Unnamed: 0,Country,Year,Total,Coal,Oil,Gas,Cement,Flaring,Other,ISO
165,Qatar,2021,35.587357,0.0,3.226295,31.072901,0.58465,0.703512,0.0,QAT
15,Bahrain,2021,26.66372,0.0,2.426313,23.642933,0.420088,0.174386,0.0,BHR
107,Kuwait,2021,24.972098,0.0,12.643539,11.721402,0.287619,0.319538,0.0,KWT
209,Trinidad and Tobago,2021,23.677599,0.0,2.016713,21.305957,0.172285,0.182644,0.0,TTO
29,Brunei Darussalam,2021,23.532003,0.865973,8.708841,13.046933,0.0,0.910255,0.0,BRN
217,United Arab Emirates,2021,21.792151,0.714923,6.389958,13.90118,0.611617,0.174473,0.0,ARE
144,New Caledonia,2021,19.097542,9.177665,9.817779,0.0,0.102099,0.0,0.0,NCL
181,Saudi Arabia,2021,18.702989,0.0,10.28421,7.512159,0.797994,0.108627,0.0,SAU
152,Oman,2021,17.916536,0.0,2.697747,13.787812,0.401763,1.029214,0.0,OMN
11,Australia,2021,15.091473,5.823767,5.368438,2.979896,0.108798,0.666359,0.144215,AUS


In [43]:
df_2020 = get_year_df(2020)
df_2020.head()

Unnamed: 0,Country,Year,Total,Coal,Oil,Gas,Cement,Flaring,Other,ISO
0,Afghanistan,2020,0.299746,0.106488,0.185254,0.006447,0.001557,0.0,0.0,AFG
1,Albania,2020,1.649392,0.115611,1.122339,0.045757,0.36562,6.6e-05,0.0,ALB
2,Algeria,2020,3.970031,0.025282,1.25519,2.032334,0.2538,0.403426,0.0,DZA
3,Andorra,2020,5.777148,0.0,5.777148,0.0,0.0,0.0,0.0,AND
4,Angola,2020,0.606541,0.0,0.39198,0.075982,0.033616,0.104963,0.0,AGO


In [44]:
def get_emission_df(Type):
    df_type = df[['Country','Year', Type]].reset_index(drop=True)
    return df_type

In [45]:
df_coal = get_emission_df('Coal')
df_coal.head()

Unnamed: 0,Country,Year,Coal
0,Afghanistan,2017,0.096731
1,Afghanistan,2018,0.104866
2,Afghanistan,2019,0.104708
3,Afghanistan,2020,0.106488
4,Afghanistan,2021,0.104828


In [46]:
df_total = get_emission_df('Total')
df_total.head()

Unnamed: 0,Country,Year,Total
0,Afghanistan,2017,0.276852
1,Afghanistan,2018,0.294876
2,Afghanistan,2019,0.293401
3,Afghanistan,2020,0.299746
4,Afghanistan,2021,0.296119


In [61]:
grouped_df = df.groupby(['Year','Country'])
grouped_df.first()

Unnamed: 0_level_0,Unnamed: 1_level_0,Total,Coal,Oil,Gas,Cement,Flaring,Other,ISO
Year,Country,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2017,Afghanistan,0.276852,0.096731,0.170436,0.008429,0.001256,0.000000,0.0,AFG
2017,Albania,1.932429,0.095372,1.416589,0.058495,0.351291,0.010682,0.0,ALB
2017,Algeria,4.045341,0.019328,1.344500,2.001920,0.277278,0.402316,0.0,DZA
2017,Andorra,6.302098,0.000000,6.302098,0.000000,0.000000,0.000000,0.0,AND
2017,Angola,0.804541,0.000000,0.483036,0.052027,0.032966,0.236512,0.0,AGO
...,...,...,...,...,...,...,...,...,...
2021,Viet Nam,3.344827,2.016234,0.621885,0.140346,0.555266,0.011096,0.0,VNM
2021,Wallis and Futuna Islands,2.392571,0.000000,2.392571,0.000000,0.000000,0.000000,0.0,WLF
2021,Yemen,0.378289,0.007456,0.290844,0.006264,0.020120,0.053605,0.0,YEM
2021,Zambia,0.394194,0.126514,0.218330,0.000000,0.049349,0.000000,0.0,ZMB


In [62]:
df_global_total=df_total.loc[df_total['Country']=='Global'].reset_index(drop=True)
df_global_total

Unnamed: 0,Country,Year,Total
0,Global,2017,4.749682
1,Global,2018,4.792753
2,Global,2019,4.775633
3,Global,2020,4.497423
4,Global,2021,4.693699
