# HACKATHON INFMDI721 : QUELLES ENERGIES POUR DEMAIN ?

#### - Erwann Floch
#### - Antonin Durieux
#### - Kaëlig Castor

In [1]:
%matplotlib inline
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import mapclassify
import numpy as np
import seaborn as sns
matplotlib.style.use('seaborn')

ImportError: libkea.so.1.4.7: cannot open shared object file: No such file or directory

# PARTIE 1 : Démographie, Production de CO2

In [None]:
CO2=pd.read_csv('Data/CO2.csv', sep = ';', index_col = 0)
CO2.describe()

In [None]:
# Global CO2 production from 1965 to 2018
date = CO2.columns
CO2_emission = CO2.loc['Total World']
CO2_emission = CO2_emission.iloc[:-3,]
annees = CO2_emission.index
CO2 = CO2_emission.values

**Global population since 1965**

In [None]:
GlobalPopulation = pd.read_csv('Data/GlobalPopulation.csv', sep=';')

In [None]:
GlobalPopulation = GlobalPopulation.drop(columns = 'Country Code')
GlobalPopulation

In [None]:
population = GlobalPopulation[GlobalPopulation['Country Name'] == 'World']
population
population = population.T
population = population.iloc[1:,:]
population['Time'] = population.index
population = population.set_index('Time')
population.rename(columns = {251:'Global population'}, inplace = True)

**Global population prospect**

In [None]:
prospect = pd.read_csv('Data/PopulationProspect.csv', sep=',')

In [None]:
prospect = prospect[prospect['Time'] >2018]
prospect = prospect[prospect.Location == 'World']
prospect = prospect[prospect.Variant == 'Medium']

# selection of the columns of interest
prospect = prospect[ ['Time', 'PopTotal'] ]
prospect = prospect.set_index('Time')
prospect.rename(columns = {'PopTotal':'Global population'}, inplace = True)
prospect['Global population'] = 1000 * prospect['Global population'] 

In [None]:
# Concaténation des deux tableaux de population
#population.merge(prospect, left_on = 'Time', right_on = 'Time')
prospected_population = pd.concat([population, prospect], axis = 0)

In [None]:
plt.figure(1,figsize=(25, 15))
plt.style.use('seaborn')

# plot
a = np.arange(1960, 1960+len(prospected_population))
#print(len(prospected_population[1]))
plt.plot(a, prospected_population.iloc[:,0])

# plot properties
plt.title('Global population', weight = 'bold').set_fontsize(40)
plt.xlabel('year', weight = 'bold').set_fontsize(25)
plt.ylabel('Population in millions', weight = 'bold').set_fontsize(25)

In [None]:
# CO2 emissions
plt.figure(1,figsize=(25, 15))
plt.style.use('seaborn')
plt.plot(annees, CO2)

# plot properties
plt.title('Global CO2 emissions', weight = 'bold').set_fontsize(40)
plt.xlabel('year', weight = 'bold').set_fontsize(25)
plt.ylabel('CO2 emissions [Million tons]', weight = 'bold').set_fontsize(25)
plt.xticks(np.arange(0, len(annees), step=5))
plt.show()

In [None]:
CO2_emission_dataframe = pd.DataFrame(CO2_emission) # numpy Series to DataFrame
# rename the index
CO2_emission_dataframe.index.names = ['Time']

In [None]:
CO2_per_inhabitant = CO2_emission_dataframe.merge(prospected_population, left_on = 'Time', right_on = 'Time')

#CO2_per_inhabitant = CO2_per_inhabitant.rename(columns = {0:'CO2 emissions'})
CO2_per_inhabitant['kg CO2 per inhabitant'] = 1e6 * CO2_per_inhabitant['Total World'] / CO2_per_inhabitant['Global population']


In [None]:
annees = CO2_per_inhabitant.index
CO2 = CO2_per_inhabitant['kg CO2 per inhabitant'].values
limite = 2.0*np.ones((len(annees),1)) # limit recommended by the GIEC for 2050

In [None]:
# CO2 emissions
plt.figure(figsize=(25, 15))
#fig, ax = plt.subplots(figsize=(13, 13))

#plt.figure(1,figsize=(25, 5))
#plt.style.use('seaborn')
plt.plot(annees, limite, 'r')
plt.plot(annees, CO2_per_inhabitant['kg CO2 per inhabitant'].values, 'b')

# plot properties
plt.title('CO2 emission per inhabitant', weight = 'bold').set_fontsize(40)
plt.xlabel('year', weight = 'bold').set_fontsize(25)
plt.ylabel('CO2 emissions [ton per inhabitant]', weight = 'bold').set_fontsize(25)
plt.ylim(0,5)
plt.xticks(np.arange(0, len(annees), step=10))
plt.show()

# PARTIE 2 : Lien entre énergies renouvelables et production de CO2

## 1. Carte de la production de CO2 mondiale (2018)

### 1.1 Import de la production de CO2 par pays

In [None]:
# Création du DataFrame
url_bp_data = "https://www.bp.com/content/dam/bp/business-sites/en/global/corporate/xlsx/energy-economics/statistical-review/bp-stats-review-2019-all-data.xlsx"
CO2_data = pd.read_excel(url_bp_data, sheet_name = 'Carbon Dioxide Emissions', header=2, nrows=107)

In [None]:
CO2_data

In [None]:
# Nettoyage du dataframe
CO2_data = CO2_data[CO2_data.columns[:-5]]
CO2_data = CO2_data.rename(columns={"Million tonnes of carbon dioxide": "country"})
CO2_data = CO2_data[pd.notnull(CO2_data['country'])]

# On ne garde que les pays (pas les continents)
CO2_data = CO2_data[~CO2_data["country"].str.contains('Total',case=False)]
CO2_data = CO2_data[~CO2_data["country"].str.contains('Other',case=False)]
CO2_data

### 1.2 Création de la carte

In [None]:
world_shape = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
world_shape = world_shape.rename(columns={"name": "country"})
world_shape.head()

On doit traiter les correspondances sur les noms des pays : 

In [None]:
CO2_data_countries = CO2_data.country.values.tolist()
CO2_data_countries.sort()
print(CO2_data_countries)

In [None]:
world_shape_countries = world_shape.country.values.tolist()
world_shape_countries.sort()
print(world_shape_countries)

In [None]:
# On récupère les données sur les pays que l'on peut... 
# Les pays africains notamment ne sont pas assez documentés individuellement
country_rename_dict = {'Czech Republic': 'Czechia', 
                       'North Macedonia': 'Macedonia', 
                       'Russian Federation': 'Russia', 
                       'Trinidad & Tobago': 'Trinidad and Tobago',
                       'US': 'United States of America'}

CO2_data['country'] = CO2_data['country'].map(country_rename_dict).fillna(CO2_data['country'])
CO2_data

In [None]:
CO2_data = world_shape.merge(CO2_data, how='left')
CO2_data = CO2_data.set_index('country')

In [None]:
CO2_data

In [None]:
from mpl_toolkits.axes_grid1 import make_axes_locatable

fig, ax = plt.subplots(figsize=(18, 10))
plt.title('CO2 production by country for 2018 (Million tonnes)').set_fontsize(20)
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)
CO2_data.plot(column=2018, ax=ax, legend=True, cax=cax, cmap='OrRd',
             norm=matplotlib.colors.LogNorm(vmin=CO2_data[2018].min(), vmax=CO2_data[2018].max()))
plt.show()

## 2. Carte de la consommation d'énergie mondiale (2018)

In [None]:
Energy_data = pd.read_excel(url_bp_data, sheet_name = 'Primary Energy Consumption', header=2, nrows=107)

# Nettoyage du dataframe
Energy_data = Energy_data[Energy_data.columns[:-5]]
Energy_data = Energy_data.rename(columns={"Million tonnes oil equivalent": "country"})
Energy_data = Energy_data[pd.notnull(Energy_data['country'])]

Energy_data = Energy_data[~Energy_data["country"].str.contains('Total',case=False)]
Energy_data = Energy_data[~Energy_data["country"].str.contains('Other',case=False)]

In [None]:
Energy_data['country'] = Energy_data['country'].map(country_rename_dict).fillna(Energy_data['country'])
Energy_data = world_shape.merge(Energy_data, how='left')
Energy_data = Energy_data.set_index('country')

In [None]:
fig, ax = plt.subplots(figsize=(18, 10))
plt.title('Primary Energy Consumption for 2018 (Million tonnes oil equivalent)').set_fontsize(20)
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)
Energy_data.plot(column=2018, ax=ax, legend=True, cax=cax, cmap='PuBu',
             norm=matplotlib.colors.LogNorm(vmin=Energy_data[2018].min(), vmax=Energy_data[2018].max()))
plt.show()

On voit que les pays qui consomment le plus d'énergie sont ceux qui semblent produire le plus de CO2, y a t-il des exceptions?

## 3. Lien entre consommation d'énergie et production de CO2

In [None]:
fig, ax = plt.subplots(figsize=(13, 13))

energie_cons_2018 = Energy_data[2018]
CO2_prod_2018 = CO2_data[2018]
x = energie_cons_2018.values
y = CO2_prod_2018.values
labels = CO2_data.index
plt.scatter(x, y)
plt.xscale('log')
plt.yscale('log')
plt.xlabel('Energy Consommation (Million tonnes oil equivalent)').set_fontsize(20)
plt.ylabel('CO2 production (Million tonnes)').set_fontsize(20)
plt.title('Relation between CO2 production and Energy Consommation').set_fontsize(20)
for i, label in enumerate(labels):
    if label in ['Iceland', 'Norway', 'Sweden']:
        plt.annotate(label, (x[i], y[i]), rotation=0,
                             textcoords="offset points", xytext=(30, 0), ha='center')
plt.show()

On voit qu'une tendance linéaire se dégage entre consommation d'énergie et production de CO2 mais certains pays se démarquent, principalement l'Islande, La Norvège et la Suède. Comment font ces pays pour produire moins de CO2 que la moyenne, l'utilisation d'énergie renouvelable est-elle un facteur dans leur démarcation?  

## 4. Rapport entre ratio d'énergie renouvelable consommée et production de CO2

Part d'énergie renouvelable par rapport à l'énergie totale consommée :

In [None]:
# Création, nettoyage et calculs sur le dataframe
Renewables_data = pd.read_excel(url_bp_data, sheet_name = 'Renewables - Mtoe', header=2, nrows=107)
Renewables_data = Renewables_data[Renewables_data.columns[:-5]]
Renewables_data = Renewables_data.rename(columns={"Million tonnes oil equivalent": "country"})
Renewables_data = Renewables_data[pd.notnull(Renewables_data['country'])]
Renewables_data = Renewables_data[~Renewables_data["country"].str.contains('Total',case=False)]
Renewables_data = Renewables_data[~Renewables_data["country"].str.contains('Other',case=False)]
Renewables_data['country'] = Renewables_data['country'].map(country_rename_dict).fillna(Renewables_data['country'])
Renewables_data = Renewables_data.set_index('country')

# Apport des données du dataframe de l'énergie totale consommée
Total_energy_2018 = Energy_data[2018]
Total_energy_2018 = Total_energy_2018.rename(columns={2018: "Total_energy"})
Renewables_data = Renewables_data.merge(Total_energy_2018.rename('Total_energy_2018'), left_index=True, right_index=True)
Renewables_data['renewable energy ratio'] = Renewables_data[2018] / Renewables_data['Total_energy_2018']
Renewables_data_clean = Renewables_data.dropna()

# Plot
Renewables_data_clean = Renewables_data_clean.sort_values('renewable energy ratio', ascending=False)
Renewables_data_clean.plot.bar(y='renewable energy ratio', rot=90, figsize=(18,10), color='green')
plt.title('Ratio of renewable energy consumption on total energy consumption by country').set_fontsize(20)
plt.show()

In [None]:
# A noter que l'énergie nucléaire ne semble pas considérée comme une énergie renouvelable dans le dataset utilisé

In [None]:
Renewables_data_map =  world_shape.set_index('country').join(Renewables_data_clean, how='left')

In [None]:
fig, ax = plt.subplots(figsize=(18, 10))
plt.title('Ratios of renewable energy consumption over total energy consumption').set_fontsize(20)
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)
Renewables_data_map.plot(column='renewable energy ratio', ax=ax, legend=True, cax=cax, cmap='Greens')
plt.show()

In [None]:
Renewable_best_ratios = Renewables_data_clean[Renewables_data_clean['renewable energy ratio'] > 0.1]
Renewable_best_ratios = Renewable_best_ratios.merge(CO2_prod_2018.rename('CO2_prod_2018'), left_index=True, right_index=True)

In [None]:
fig, ax = plt.subplots(figsize=(13, 13))

energie_cons_2018 = Energy_data[2018]
CO2_prod_2018 = CO2_data[2018]
x = energie_cons_2018.values
y = CO2_prod_2018.values
x_best = Renewable_best_ratios['Total_energy_2018']
y_best = Renewable_best_ratios['CO2_prod_2018']

labels = Renewable_best_ratios.index
plt.scatter(x, y)
plt.scatter(x_best, y_best, s=700, alpha=0.3)
for i, label in enumerate(labels):
    plt.annotate(label, (x_best[i], y_best[i]), rotation=0,
                 textcoords="offset points", xytext=(30, 0), ha='center')

plt.xscale('log')
plt.yscale('log')
plt.xlabel('Energy Consommation (Million tonnes oil equivalent)').set_fontsize(20)
plt.ylabel('CO2 production (Million tonnes)').set_fontsize(20)
plt.title('Relation between CO2 production and Global Energy Consommation \n Country having the best renewable energy ratios are in green').set_fontsize(20)
plt.show()



Il n'y a pas de lien très clair, les pays consommant une part importante d'énergies renouvelable peuvent produire autant de CO2 que des pays dans la moyenne.

# PARTIE 3 : Tendances sur les énergies non émettrices de CO2

In [None]:
df = pd.read_csv('Data/all_energy_statistics.csv')
df.drop('quantity_footnotes', axis=1, inplace=True)
df.head()

In [None]:
df['category'].value_counts().head(20)

In [None]:
US = df[df['country_or_area']=='United States'].sort_values('year')
print('US :%f', US.shape)
BR= df[df['country_or_area']=='Brazil'].sort_values('year')
print('BR: %f', BR.shape)
CAN = df[df['country_or_area']=='Canada'].sort_values('year')
print('CAN: %f', CAN.shape)
CHI = df[df['country_or_area']=='China'].sort_values('year')
print('CHI: %f', CHI.shape)
IND = df[df['country_or_area']=='India'].sort_values('year')
print('IND: %f', IND.shape)
JAP = df[df['country_or_area']=='Japan'].sort_values('year')
print('JAP: %f', JAP.shape)
UK =df[df['country_or_area']=='United Kingdom'].sort_values('year')
print('UK: %f', UK.shape)

#European Union
SP = df[df['country_or_area']=='Spain'].sort_values('year')
print('SP: %f', SP.shape)
ITA = df[df['country_or_area']=='Italy'].sort_values('year')
print('ITA: %f', ITA.shape)
GER = df[df['country_or_area']=='Germany'].sort_values('year')
print('GER: %f', GER.shape)
FRA = df[df['country_or_area']=='France'].sort_values('year')
print('FRA: %f', FRA.shape)
NETH = df[df['country_or_area']=='Netherlands'].sort_values('year')
print('NETH: %f', NETH.shape)

In [None]:
dfUS = pd.DataFrame(US)
dfUS.head(15)

In [None]:
IND_WIND = IND[IND['category'] == "wind_electricity"]
IND_WIND

In [None]:
DF1 = pd.DataFrame(df.groupby(['country_or_area', 'category'])['quantity'].agg('sum').reset_index())
DF1.head()

In [None]:
DF2 = DF1.groupby(['category'])['quantity'].agg('sum').reset_index()
DF2.head()

In [None]:
DF3 = pd.DataFrame(df.groupby(['country_or_area', 'category'])['quantity'].agg('sum').reset_index())
DF3.head()

In [None]:
fig, ax = plt.subplots(figsize=(15, 10))
sns.regplot(IND_WIND['year'], IND_WIND['quantity'], order=3)
plt.title("Wind energy quantity by year in India")
plt.show()

In [None]:
US_WIND = US[US['commodity_transaction'] == "Electricity - total wind production"]
BR_WIND = BR[BR['commodity_transaction'] == "Electricity - total wind production"]
CAN_WIND = CAN[CAN['commodity_transaction'] == "Electricity - total wind production"]
CHI_WIND = CHI[CHI['commodity_transaction'] == "Electricity - total wind production"]
JAP_WIND = JAP[JAP['commodity_transaction'] == "Electricity - total wind production"]
UK_WIND = UK[UK['commodity_transaction'] == "Electricity - total wind production"]
SP_WIND = SP[SP['commodity_transaction'] == "Electricity - total wind production"]
ITA_WIND = ITA[ITA['commodity_transaction'] == "Electricity - total wind production"]
GER_WIND = GER[GER['commodity_transaction'] == "Electricity - total wind production"]
FRA_WIND = FRA[FRA['commodity_transaction'] == "Electricity - total wind production"]
NETH_WIND = NETH[NETH['commodity_transaction'] == "Electricity - total wind production"]

In [None]:
WIND = pd.concat([US_WIND, CAN_WIND, CHI_WIND, IND_WIND, UK_WIND, GER_WIND, FRA_WIND])
WIND.head()

In [None]:
US_SOLAR = US[US['commodity_transaction'] == "Electricity - total solar production"]
CAN_SOLAR = CAN[CAN['commodity_transaction'] == "Electricity - total solar production"]
CHI_SOLAR = CHI[CHI['commodity_transaction'] == "Electricity - total solar production"]
IND_SOLAR = IND[IND['commodity_transaction'] == "Electricity - total solar production"]
UK_SOLAR = UK[UK['commodity_transaction'] == "Electricity - total solar production"]
GER_SOLAR = GER[GER['commodity_transaction'] == "Electricity - total solar production"]
FRA_SOLAR = FRA[FRA['commodity_transaction'] == "Electricity - total solar production"]

In [None]:
SOLAR = pd.concat([US_SOLAR, CAN_SOLAR, CHI_SOLAR, IND_SOLAR, UK_SOLAR, GER_SOLAR, FRA_SOLAR])
SOLAR.head()

In [None]:
US_NUCLEAR = US[US['commodity_transaction'] == "Electricity - total nuclear production"]
CAN_NUCLEAR = CAN[CAN['commodity_transaction'] == "Electricity - total nuclear production"]
CHI_NUCLEAR = CHI[CHI['commodity_transaction'] == "Electricity - total nuclear production"]
IND_NUCLEAR = IND[IND['commodity_transaction'] == "Electricity - total nuclear production"]
UK_NUCLEAR = UK[UK['commodity_transaction'] == "Electricity - total nuclear production"]
GER_NUCLEAR = GER[GER['commodity_transaction'] == "Electricity - total nuclear production"]
FRA_NUCLEAR = FRA[FRA['commodity_transaction'] == "Electricity - total nuclear production"]

In [None]:
NUCLEAR = pd.concat([US_NUCLEAR, CAN_NUCLEAR, CHI_NUCLEAR, IND_NUCLEAR, UK_NUCLEAR, GER_NUCLEAR, FRA_NUCLEAR])
NUCLEAR.head()

In [None]:
ENERGY=pd.concat([WIND, NUCLEAR, SOLAR]).sort_values('year')
ENERGY.head()

In [None]:
ENERGY=pd.concat([WIND, NUCLEAR, SOLAR]).sort_values('year')
ENERGY.head()

In [None]:
IND_ENERGY=pd.concat([IND_WIND, IND_NUCLEAR, IND_SOLAR]).sort_values('year') 
IND_ENERGY.head()

In [None]:
DF3.head()

In [None]:
DF2.head()

In [None]:
IND_ENERGY.head()

In [None]:
ENERGY_2014 = ENERGY[ENERGY['year']==2014] 

# CURRENT TOTAL (SOLAR, WIND, NUCLEAR) ENERGY PRODUCTION PER COUNTRY

In [None]:
fig, ax = plt.subplots(figsize=(15, 10))
sns.set(style="whitegrid")  
ax = sns.barplot(x="year", y="quantity", hue="country_or_area", data=ENERGY_2014)  
plt.title("CURRENT TOTAL (SOLAR, WIND, NUCLEAR) ENERGY PRODUCTION PER COUNTRY")
ax.set(ylim=(0, 350000))
plt.show()

# SOLAR PRODUCTION

In [None]:
fig, ax = plt.subplots(figsize=(15, 10))
sns.set(style="whitegrid") 
sns.lineplot(x="year", y="quantity",
             hue="country_or_area", linewidth = 6,
             data=SOLAR)
plt.title("SOLAR PRODUCTION")
plt.show()

# WIND PRODUCTION

In [None]:
fig, ax = plt.subplots(figsize=(15, 10))
sns.set(style="whitegrid") 
sns.lineplot(x="year", y="quantity",
             hue="country_or_area", linewidth = 6,
             data=WIND)
plt.title("WIND PRODUCTION")
plt.show()

# NUCLEAR PRODUCTION

In [None]:
fig, ax = plt.subplots(figsize=(15, 10))
sns.set(style="whitegrid") 
ax = sns.lineplot(x="year", y="quantity",
             hue="country_or_area", linewidth = 6, 
             data=NUCLEAR)
plt.title("NUCLEAR PRODUCTION")
plt.show()

In [None]:
ENERGY.head()
ENERGY.groupby('country_or_area')['quantity'].sum().sort_values().head(20)

![Heliomax.png](attachment:Heliomax.png)

![Agripower.png](attachment:Agripower.png)

![Agripower_IPO.png](attachment:Agripower_IPO.png)