In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# 1. Function to Read DataFrame

In [None]:
file_name = 'API_19_DS2_en_csv_v2_3931355.csv'

In [None]:
def function_read_df(file_name):

  # read the csv file and skip the top 3 rows which are the empty Headers  
  data_df = pd.read_csv(file_name, skiprows=[0,1,2]) 
  # Filter the DataFrame baseds on the Indicator Names of the Climate Change Category
  data_df_indicators = data_df[(data_df['Indicator Name'] == 'Urban population') | 
                               (data_df['Indicator Name'] == 'Population, total') |
                               (data_df['Indicator Name'] == 'Forest area (sq. km)')| 
                               (data_df['Indicator Name'] == 'Agricultural land (sq. km)') |
                               (data_df['Indicator Name'] == 'CO2 emissions (kt)') ]
  
  data_df_indicators = data_df_indicators.drop(['Indicator Name','Country Name'],axis=1) # drop the Column's which are not necessary for filtering
  data_df_indicators.fillna(0,inplace=True) # Replace the Null Values with Zer0's
  df_1 = data_df_indicators.set_index(['Country Code','Indicator Code'])  # Set the Index of the DataFrame as Country COde and Indicator COde
  df_2 = df_1.stack().unstack('Country Code') # Convert the Dataframe from Years as COlumn to Countries as COlumn 

  return df_1,df_2

df_year,df_country = function_read_df(file_name)

In [None]:
df_year # DatafRame with Year's as Columns

In [None]:
df_country # Dataframe with Countries as Column

In [None]:
df_indicator_code = df_year.stack().unstack('Indicator Code') # DataFrame with Indicator Code as Column's
df_indicator_code

### Filter the DataFrame based on 4 Countries 

<br> we choose only FOur countries for Data Analysis which are France, USA, Great Britain and China <br> A Dictionary is created which has indicator names and indicator codes

In [None]:
country_codes = ['FRA','USA','GBR','CHN'] # the countries choosen are
# Dictionary in which indicator codes are linked to indicator Names
dict_codes  = {
    'AG.LND.AGRI.K2' : 'Agricultural land (sq. km)',
    'AG.LND.FRST.K2' : 'Forest area (sq. km)',
    'EN.ATM.CO2E.KT' : 'CO2 emissions (kt)',
    'SP.POP.TOTL' : 'Population, total',
    'SP.URB.TOTL' : 'Urban population'
}

In [None]:
df_year = df_year.loc[country_codes,(slice(None)),:] # Slice the dataframe such that we select only four given countries
df_year 

In [None]:
df_indicator_code = df_indicator_code.loc[country_codes,(slice(None)),:]# Slice the dataframe such that we select only four given countries
df_indicator_code

In [None]:
df_year.rename(index=dict_codes,inplace=True) # change the indicator codes to indicators
df_year# we just rename the indicator codes using the Dictionary which has codes

# 2. Insight-4-Population Analysis

In [None]:
k = df_indicator_code.groupby(level=['Country Code']).agg({'SP.URB.TOTL' : 'sum'})
# # Here we present sum of Urban population for all countries over 40 years 
print(k)
k_1 = df_indicator_code.groupby(level=['Country Code']).agg({'SP.POP.TOTL' : 'sum'})
# # Here we present the sum of Total population for each country for all the years
print(k_1)

### PIE chart for Total Population

In [None]:
plt.figure()
plt.pie(k_1["SP.POP.TOTL"], labels=k.index,autopct='%1.2f%%') # use the above Groupby object for Analysis
plt.title('total population of all Countries Over last 60 years')
plt.show()

### Pie Chart for Urban Population

In [None]:
plt.figure()
plt.pie(k["SP.URB.TOTL"], labels=k.index,autopct='%1.2f%%')
plt.title('total urban population of all Countries Over last 60 years')# use the above Groupby object for Analysis
plt.show()

# 3. Insight-1-Stacked Bar Plot of Urban Population and Total Population

In [None]:
plt.bar(k.index, k["SP.URB.TOTL"],color='r') # use the data of urban Population
plt.bar(k.index, k_1["SP.POP.TOTL"],color='b',bottom=k["SP.URB.TOTL"]) #use the data of Total Population
plt.xlabel('Countries') 
plt.ylabel("Total and Urban Population in 10 Billion")
plt.title(" Total Population and Urban Population of Countries in 10 Billion")
plt.legend(["Urban Population", "Total Population"])
plt.figure(figsize=(30, 20))
plt.savefig('hdsgfhdf.png')

# 4. Insight-2-Total Population and CO2 Emissions in Last 40 years

## TIME-SERIES plot of Total Population in Last 40 years

In [None]:
plt.figure(figsize=(22,5))
# Total population data of china,GBR,USA and France in the last 40 years
plt.plot(df_year.columns[20:-1], list(df_year.loc[('CHN',           'Population, total')])[20:-1], label="CHINA")
plt.plot(df_year.columns[20:-1], list(df_year.loc[('GBR',           'Population, total')])[20:-1], label="GBR")
plt.plot(df_year.columns[20:-1], list(df_year.loc[('USA',           'Population, total')])[20:-1], label="USA")
plt.plot(df_year.columns[20:-1], list(df_year.loc[('FRA',           'Population, total')])[20:-1], label="FRA")
plt.xlabel("Year from 1980 to 2019")
plt.ylabel("Total Population in 10 Billion's")
plt.title("Total Population of countries in the last 40 years in 10 Billions")
plt.legend(['China','United Kingdom','United States','France'])
plt.show()

## TIME-SERIES plot of CO2 Emissions in Last 40 years


In [None]:
plt.figure(figsize=(22,5))
plt.plot(df_year.columns[20:-1], list(df_year.loc[('CHN',           'CO2 emissions (kt)')])[20:-1], label="CHINA")
plt.plot(df_year.columns[20:-1], list(df_year.loc[('GBR',           'CO2 emissions (kt)')])[20:-1], label="GBR")
plt.plot(df_year.columns[20:-1], list(df_year.loc[('USA',           'CO2 emissions (kt)')])[20:-1], label="USA")
plt.plot(df_year.columns[20:-1], list(df_year.loc[('FRA',           'CO2 emissions (kt)')])[20:-1], label="FRA")
plt.xlabel("Year from 1980 to 2019")
plt.ylabel("amount of CO2 Emissions ")
plt.title("CO2 Emissions made by countries in the last 40 years")
plt.legend(['China','United Kingdom','United States','France'])
plt.show()

# 5. Insight-3 - Corleations between Indicators for Various Countries

### Corelation between indicators for CHINA

In [None]:
df_indicator_code.columns = dict_codes.values() # change the Column names from indicator codes to indicator names
# Filter the China Data from the dataframe
df_china = df_indicator_code.loc[['CHN'],(slice(None)),:]
df_china.columns = dict_codes.values()
#  create the Co-relation Matrix for the Indicator's of CHINA Only
corr = df_china.corr()
corr.style.background_gradient(cmap='coolwarm')

### Corelation between indicators for USA

In [None]:
df_indicator_code.columns = dict_codes.values()# change the Column names from indicator codes to indicator names
# Filter the USA Data from the dataframe
df_usa = df_indicator_code.loc[['USA'],(slice(None)),:]
df_usa.columns = dict_codes.values()
#  create the Co-relation Matrix for the Indicator's of CHINA Only
corr = df_usa.corr()
corr.style.background_gradient(cmap='coolwarm')

# 6. Other Insights

### Histograms
<br> Histogramms of Agricultural Lands for all the countries

In [None]:
plt.figure()
# first subplot, 
plt.subplot(2, 2, 1)
# select the list which has china's agricultural land for last  60 years
plt.hist(list(df_year.loc[('CHN',            'Agricultural land (sq. km)')])[:-1], label="China", density=True)
plt.legend()   # needs to be done for every subplot

# and following
plt.subplot(2, 2, 2)
# select the list which has France's agricultural land for last  60 years
plt.hist(list(df_year.loc[('FRA',            'Agricultural land (sq. km)')])[:-1], label="France", density=True)
plt.legend()

plt.subplot(2, 2, 3)
# select the list which has USA's agricultural land for last  60 years
plt.hist(list(df_year.loc[('USA',            'Agricultural land (sq. km)')])[:-1], label="United States" , density=True)
plt.legend()

plt.subplot(2, 2, 4)
# select the list which has united Kingdom's agricultural land for last  60 years
plt.hist(list(df_year.loc[('GBR',            'Agricultural land (sq. km)')])[:-1], label="United Kingdom" , density=True)

plt.legend()
plt.savefig("four_histo.png")
plt.show()

### Box-Plot For CO2 Emissions for all 4 Countries

In [None]:
# list with  country names
cntry = ["USA", "CHN", "GBR", "FRA"]
co2emissions = [list(df_year.loc[('USA',             'CO2 emissions (kt)')])[:-1], 
      list(df_year.loc[('CHN',             'CO2 emissions (kt)')])[:-1], 
      list(df_year.loc[('GBR',             'CO2 emissions (kt)')])[:-1],
      list(df_year.loc[('FRA',             'CO2 emissions (kt)')])[:-1]]

plt.figure()
plt.boxplot(co2emissions, labels=cntry)
plt.title("Box plot for CO2 Emissions for 4 countries")
plt.savefig("box.png")
plt.show()

### Violin-Plot For CO2 Emissions for all 4 Countries

In [None]:
plt.figure 
fig, ax = plt.subplots(1, 1)
plt.violinplot(co2emissions, showmedians=True)     # list with returns data from above

ax.set_xticks([1, 2, 3, 4])
ax.set_xticklabels(cntry)

plt.ylabel("Ranges")
plt.savefig("violines.png")
plt.show()