In [None]:

import numpy as np 
import pandas as pd 
import plotly.express as px
import geopandas as gpd
import plotly.graph_objects as go
from plotly.offline import init_notebook_mode
import json
import plotly.graph_objects as go # or plotly.express as px
init_notebook_mode()




![](https://media2.giphy.com/media/xT77XHIPBBGZL9m62I/giphy.gif?cid=790b7611f3f63a0f153fb2f9f874bfc9aaf67ed70583d060&rid=giphy.gif&ct=g)

# Data Cleaning

First thing first, let's import the data. Global_power_plant_database is the primary dataset we will use today.

In [None]:
df = pd.read_csv('/kaggle/input/global-power-plant-database/global_power_plant_database.csv')

df.head()

And we will add the continent attribute of each country to the power plant database from geopandas world dataset.

In [None]:
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
world.head()

In [None]:
world = world[['iso_a3','continent']]
df = df.merge(world,how = 'left',left_on = 'country',right_on = 'iso_a3')
df.info()

There are still some rows without continent, let's find out what these countries are and fill in the corresponding values. <br />
As for null values in the estimated_generation_gwh attribute, we will fill in the most recent annual generation data available. 

In [None]:
df.loc[pd.isna(df.continent),:].country_long.unique()

In [None]:
continent_map = {'Bahrain':'Asia','Cape Verde':'Africa','France':'Europe','French Guiana':'South America','Kosovo':'Europe','Mauritius':'Africa','Norway':'Europe','Singapore':'Asia'}
for key,val in continent_map.items():
    df.loc[df.country_long==key,'continent'] = val
df['year_of_generation_data'] = ''*len(df)
year = np.arange(2017,2012,-1)
for y in year:
    col = 'generation_gwh_'+str(y)
    before =  df['estimated_generation_gwh'].isna()
    df['estimated_generation_gwh'].fillna(df[col],inplace = True)
    after = df['estimated_generation_gwh'].notna()
    mask = [b and a for b, a in zip(before, after)]
    df.loc[mask,['year_of_generation_data']] = str(y)
df.dropna(subset = ['estimated_generation_gwh'],inplace = True)
df.loc[df.year_of_generation_data == '',['year_of_generation_data']] = 'Estimated Data'
df = df[['country','country_long','continent','name','commissioning_year','capacity_mw','latitude','longitude','primary_fuel','estimated_generation_gwh','year_of_generation_data']]
df.rename(columns = {'country':'country_code','country_long':'country','estimated_generation_gwh':'annual_generation_gwh'},inplace = True)
df.drop(df[df.annual_generation_gwh<=0].index,inplace = True)
df.sort_values(by = 'annual_generation_gwh',ascending=False,ignore_index = True, inplace = True)
df.describe()


Print out the top 5 power plants with the highest annual generation.

In [None]:
df.head()

A.E.S. Corp. power plant has extremely high annual generation that is almost 4 times higher than the second one, and generation doesn't match its capacity. So we will drop the first row.

In [None]:
df.drop(index = [0],inplace = True)
fuel_map = {'Coal':'Fossil','Gas':'Fossil','Gas':'Fossil','Oil':'Fossil','Petcoke':'Fossil','Geothermal':'Other_renewable','Biomass':'Other_renewable','Storage':'Other',
            'Cogeneration':'Other','Waste':'Other','Hydro':'Hydro','Nuclear':'Nuclear','Wind':'Wind','Solar':'Solar','Other':'Other'}
df['fuel_type'] = df.primary_fuel.map(fuel_map)
renewable = ['Hydro','Other_renewable','Wind','Solar']
df['renewable'] = ['renewable' if i in renewable else 'non-renewable' for i in df['fuel_type'] ]
df.head()

# Where are the Power Plants Located?

**The map below shows (almost) every power plant in the world, with fuel types and annual generation data. Find out the ones near you!**

In [None]:
colordict ={'Hydro':'#6495ED','Fossil':'#DEB887','Nuclear':'#CD5C5C','Other_renewable':'#90EE90','Wind':'#20B2AA','Other':'#BDB76B','Solar':'#FFD700'}
hoverD = {'primary_fuel':True,'capacity_mw':True,'annual_generation_gwh':True,'year_of_generation_data':True,
          'country':True,'longitude':False,'latitude':False}
fig = px.scatter_mapbox(df, lat="latitude", lon="longitude",  color="fuel_type", size="annual_generation_gwh",hover_name="name"
                        ,hover_data =hoverD ,color_discrete_map =colordict, size_max=15, zoom=2,title ='Power Generation around the Globe')
fig.update_layout(mapbox_style="carto-positron")
fig.update_layout({'showlegend':True,'legend':{'title': 'Fuel Type'}})
fig.show()

There are barely any wind farms or solar farms on this map. If we take a look at the distribution of power generation in the chart below, we will find out it is exponential. Since most of the wind farms and solar farms are small-scale with way lower annual generation than that of traditional power plants, the sizes of the markers are squeezed to a tiny point.

In [None]:
df['rank'] = df.index
fig = px.scatter(df, x="rank", y="annual_generation_gwh",color = 'fuel_type',color_discrete_map =colordict)
fig.show()

If we apply logarithm on power generation, the distribution becomes linear, hence the smaller power plants can be seen more clearly on the map.

In [None]:
df['log_gen'] = df.annual_generation_gwh.apply(np.log1p)
fig = px.scatter(df, x="rank", y="log_gen",color = 'fuel_type',color_discrete_map =colordict)
fig.show()

In [None]:
hoverD = {'primary_fuel':True,'capacity_mw':True,'annual_generation_gwh':True,'year_of_generation_data':True,'country':True,'longitude':False,'latitude':False,'log_gen':False}
fig = px.scatter_mapbox(df, lat="latitude", lon="longitude",  color="fuel_type", size="log_gen",hover_name="name",
                        hover_data =hoverD ,color_discrete_map =colordict, size_max=12, zoom=2,title ='Power Generation around the Globe (log-scale marker size)')
fig.update_layout(mapbox_style="carto-positron")
fig.update_layout({'showlegend':True,'legend':{'title': 'Fuel Type'}})
fig.show()


The future of humankind looks way more promising on this map! <br />
But I am wondering whether the solar farms in England have ever worked ;)

# Where is Our Electricity Coming from?

**The sunburst chart below shows what fuels we use to generate electricity in each continent.**

In [None]:
continents = df.continent.unique()
fig = go.Figure()
alllabels = px.sunburst(df, path=['renewable', 'fuel_type','primary_fuel'], values='annual_generation_gwh',color = 'renewable'
                  ,color_discrete_map={'non-renewable':'#BDB76B','renewable':'#20B2AA'},title = 'Fuel Type Distribution of Power Generation')
fig.add_trace(alllabels['data'][0])
for c in continents:
    chart = px.sunburst(df.loc[df.continent==c,:], path=['renewable', 'fuel_type','primary_fuel'], values='annual_generation_gwh',color = 'renewable'
                  ,color_discrete_map={'non-renewable':'#BDB76B','renewable':'#20B2AA'})
    fig.add_trace(chart['data'][0])
updatemenus=[dict(type = "buttons", direction = "down",active=0,
             buttons=list([
                 dict(args=[{'visible': [True  , False  , False , False,False ,False,False]} ,],
                  label = "World"   , method="update"),
                dict(args=[{'visible': [False  , True  , False , False,False ,False,False]} ,],
                  label = continents[0]   , method="update"),
                 dict(args=[{'visible': [False  , False  , True , False,False ,False,False]} ,],
                  label = continents[1]   , method="update"),
                 dict(args=[{'visible': [False  , False  , False , True,False ,False,False]} ,],
                  label = continents[2]  , method="update"),
                 dict(args=[{'visible': [False  , False  , False , False,True ,False,False]} ,],
                  label = continents[3]  , method="update"),
                 dict(args=[{'visible': [False  , False  , False , False,False ,True,False]} ,],
                  label = continents[4]  , method="update"),
                 dict(args=[{'visible': [False  , False  , False , False,False ,False,True]} ,],
                  label = continents[5]   , method="update"),
            
             ])),]
fig.update_layout(updatemenus = updatemenus)
fig.show()

* Around 25% of power comes from renewable fuels worldwide.
* South America is blessed with abundant hydro resources.
* Nuclear power has a considerable share in Europe.

**Let's make a treemap to figure out what's the situation in each country.**

In [None]:
colordict2 ={'(?)':'#708090','Hydro':'#6495ED','Fossil':'#DEB887','Nuclear':'#CD5C5C','Other_renewable':'#90EE90','Wind':'#20B2AA','Other':'#BDB76B','Solar':'#FFD700'}

fig = px.treemap(df, path=[px.Constant('world'),'continent','country', 'fuel_type'], values='annual_generation_gwh',
                  color='fuel_type',color_discrete_map =colordict2)
fig.show()

* China is the largest power producer in the world.
* Most of the countries still rely heavily on fossil fuels.
* France is the only country that uses nuclear as a primary power resource. 

**Now that we have explored the power generation in each region and country. Let's find out what's the trend of fuels over time.**

Here is the plot of 10 years moving average of new power plants in terms of annual generation. We can see what are the fuels of the power plants constructed in different eras.

In [None]:
df2 = df.dropna(subset=['commissioning_year']).sort_values('commissioning_year')
df2.head()
pv_time = pd.pivot_table(df2,values = 'capacity_mw',index = ['commissioning_year'],columns = 'fuel_type', aggfunc=np.sum).fillna(0)
pv_time.reset_index(level=['commissioning_year'],inplace=True)
pv_time['commissioning_year'] = pd.to_datetime(pv_time['commissioning_year'],format = '%Y')
pv_time = pv_time.groupby('commissioning_year').sum()
for f in pv_time.columns:
    pv_time[f+'_MA10'] = pv_time[f].rolling(10).mean()
pv_time.reset_index(level=['commissioning_year'],inplace=True)


In [None]:
fig = px.line(pv_time,x = 'commissioning_year',y =['Fossil_MA10','Hydro_MA10','Nuclear_MA10','Other_MA10','Other_renewable_MA10','Solar_MA10','Wind_MA10'],
             labels=dict(commissioning_year="Commissioning Year", value="10 MA of Capacity (mw)" ))
fig.show()

* Fossil fuel has stopped growing in the early 2010s, after 2 decades of explosive growth.
* Nuclear experienced a sharp fall in the 1990s, which could be a result of the Chernobyl incident.
* Wind and solar took off respectively in the 2000s and 2010s.

# How green is your country?

Finally, let's have a glimpse of how every country is doing at sustainable power generation.<br />
Please note that the data is not up to date, but still, we can have a big picture from the map.

In [None]:
pv_tbl = pd.pivot_table(df,values = 'annual_generation_gwh',index = ['country','country_code'],columns = 'renewable', aggfunc=np.sum)
pv_tbl.fillna(0,inplace = True)
pv_tbl['total'] = pv_tbl['non-renewable']+ pv_tbl['renewable']
pv_tbl['renewable_pct'] = pv_tbl['renewable']/pv_tbl['total']*100
pv_tbl.reset_index(level=['country', 'country_code'],inplace=True)


In [None]:
with open('../input/country-outlines/countries.geo.json') as f:
    countries = json.load(f)
fig = px.choropleth_mapbox(pv_tbl, locations="country_code",
                           geojson=countries,
                          featureidkey = 'properties.adm0_a3',
                    color="renewable_pct", 
                    hover_name="country", 
                    color_continuous_scale=px.colors.sequential.YlGn,
                   title = 'Percentage of Renewable Energy in Power Generation',
                   zoom = 2)
fig.update_layout(mapbox_style="carto-positron")

fig.show()

**<br />We have walked through various ways to explore global power generation today, and I hope you find this kernel interesting.
<br />If you like this kernel, please give it an upvote or leave a comment below, thank you!**