## Getting Started with Interactive Data Visualizations

In [19]:
import pandas as pd
import numpy as np
import bokeh # for interactive visualisation

In [20]:
co2 = pd.read_csv('co2.csv')
co2.head()

Unnamed: 0,country,1800,1801,1802,1803,1804,1805,1806,1807,1808,...,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014
0,Afghanistan,,,,,,,,,,...,0.0529,0.0637,0.0854,0.154,0.242,0.294,0.412,0.35,0.316,0.299
1,Albania,,,,,,,,,,...,1.38,1.28,1.3,1.46,1.48,1.56,1.79,1.68,1.73,1.96
2,Algeria,,,,,,,,,,...,3.22,2.99,3.19,3.16,3.42,3.3,3.29,3.46,3.51,3.72
3,Andorra,,,,,,,,,,...,7.3,6.75,6.52,6.43,6.12,6.12,5.87,5.92,5.9,5.83
4,Angola,,,,,,,,,,...,0.98,1.1,1.2,1.18,1.23,1.24,1.25,1.33,1.25,1.29


In [21]:
gm = pd.read_csv('gapminder.csv')
gm.head()

Unnamed: 0,Country,Year,fertility,life,population,child_mortality,gdp,region
0,Afghanistan,1964,7.671,33.639,10474903.0,339.7,1182.0,South Asia
1,Afghanistan,1965,7.671,34.152,10697983.0,334.1,1182.0,South Asia
2,Afghanistan,1966,7.671,34.662,10927724.0,328.7,1168.0,South Asia
3,Afghanistan,1967,7.671,35.17,11163656.0,323.3,1173.0,South Asia
4,Afghanistan,1968,7.671,35.674,11411022.0,318.1,1187.0,South Asia


In [22]:
df_gm = gm[['Country', 'region']].drop_duplicates()

In [23]:
# merge left df with right based on 'country' and 'Country'
df_w_regions = pd.merge(co2, df_gm, left_on='country', right_on='Country', how='inner')

In [24]:
# drop 'Country' column from the df as we already have the 'country' column
df_w_regions = df_w_regions.drop('Country', axis='columns')

In [25]:
# melt df so that country & region become key values
new_co2 = pd.melt(df_w_regions, id_vars=['country', 'region'])

In [26]:
new_co2

Unnamed: 0,country,region,variable,value
0,Afghanistan,South Asia,1800,
1,Albania,Europe & Central Asia,1800,
2,Algeria,Middle East & North Africa,1800,
3,Angola,Sub-Saharan Africa,1800,
4,Antigua and Barbuda,America,1800,
...,...,...,...,...
37190,Vanuatu,East Asia & Pacific,2014,0.595
37191,Venezuela,America,2014,6.030
37192,Vietnam,East Asia & Pacific,2014,1.800
37193,Zambia,Sub-Saharan Africa,2014,0.288


In [27]:
# rename column names
columns = ['country', 'region', 'year', 'co2']
new_co2.columns = columns
new_co2

Unnamed: 0,country,region,year,co2
0,Afghanistan,South Asia,1800,
1,Albania,Europe & Central Asia,1800,
2,Algeria,Middle East & North Africa,1800,
3,Angola,Sub-Saharan Africa,1800,
4,Antigua and Barbuda,America,1800,
...,...,...,...,...
37190,Vanuatu,East Asia & Pacific,2014,0.595
37191,Venezuela,America,2014,6.030
37192,Vietnam,East Asia & Pacific,2014,1.800
37193,Zambia,Sub-Saharan Africa,2014,0.288


In [28]:
df_co2 = new_co2[new_co2['year'].astype('int64') > 1963]
df_co2 = df_co2.sort_values(by=['country', 'year'])
df_co2['year'] = df_co2['year'].astype('int64')
df_co2.head()

Unnamed: 0,country,region,year,co2
28372,Afghanistan,South Asia,1964,0.0863
28545,Afghanistan,South Asia,1965,0.101
28718,Afghanistan,South Asia,1966,0.108
28891,Afghanistan,South Asia,1967,0.124
29064,Afghanistan,South Asia,1968,0.116


In [29]:
df_gdp = gm[['Country', 'Year', 'gdp']]
df_gdp.columns = ['country', 'year', 'gdp']
df_gdp.head()

Unnamed: 0,country,year,gdp
0,Afghanistan,1964,1182.0
1,Afghanistan,1965,1182.0
2,Afghanistan,1966,1168.0
3,Afghanistan,1967,1173.0
4,Afghanistan,1968,1187.0


In [30]:
data = pd.merge(df_co2, df_gdp, on=['country', 'year'], how='left')

In [31]:
data = data.dropna()
data.head()

Unnamed: 0,country,region,year,co2,gdp
0,Afghanistan,South Asia,1964,0.0863,1182.0
1,Afghanistan,South Asia,1965,0.101,1182.0
2,Afghanistan,South Asia,1966,0.108,1168.0
3,Afghanistan,South Asia,1967,0.124,1173.0
4,Afghanistan,South Asia,1968,0.116,1187.0


In [34]:
data.describe()

Unnamed: 0,year,co2,gdp
count,8202.0,8202.0,8202.0
mean,1989.055109,4.596666,12651.499634
std,14.433002,7.720794,18042.270821
min,1964.0,0.0,142.0
25%,1977.0,0.378,2230.75
50%,1989.0,1.67,6242.5
75%,2002.0,6.3175,15434.25
max,2013.0,101.0,182668.0


In [32]:
np_co2 = np.array(data['co2'])
np_gdp = np.array(data['gdp'])
np.corrcoef(np_co2, np_gdp)

array([[1.        , 0.78219731],
       [0.78219731, 1.        ]])

In [None]:
from bokeh.io import curdoc, output_notebook
from bokeh.plotting import figure, show
from bokeh.models import HoverTool, ColumnDataSource, CategoricalColorMapper, Slider
from bokeh.palettes import Spectral6
from bokeh.layouts import column, row

The error you're encountering is due to an outdated or incorrect use of the bokeh.layouts module. In recent versions of Bokeh, the widgetbox function has been deprecated and removed. Instead, you should use the column or row functions to organize widgets and plots.

Here's how you can adjust your import statement:

Replace:

python
Copy code
from bokeh.layouts import widgetbox, row
With:

python
Copy code
from bokeh.layouts import column, row

In [None]:
output_notebook()

In [None]:
regions_list = data.region.unique().tolist()
color_mapper = CategoricalColorMapper(factors=regions_list, palette=Spectral6)

Make the ColumnDataSource: source

In [None]:

source = ColumnDataSource(data={
    'x': data.gdp[data['year'] == 1964],
    'y': data.co2[data['year'] == 1964],
    'country': data.country[data['year'] == 1964],
    'region': data.region[data['year'] == 1964],
})

Save the minimum and maximum values of the gdp column: xmin, xmax

In [None]:
xmin, xmax = min(data.gdp), max(data.gdp)

Save the minimum and maximum values of the co2 column: ymin, ymax

In [None]:
ymin, ymax = min(data.co2), max(data.co2)

Create the figure: plot

In [None]:

plot = figure(title='CO2 Emissions vs GDP in 1964', 
              plot_height=600, plot_width=1000,
              x_range=(xmin, xmax),
              y_range=(ymin, ymax), y_axis_type='log')

Add circle glyphs to the plot

In [None]:
plot.circle(x='x', y='y', fill_alpha=0.8, source=source, legend='region',
            color=dict(field='region', transform=color_mapper),
            size=7)

In [None]:
# Set the legend.location attribute of the plot
plot.legend.location = 'bottom_right'

# Set the x-axis label
plot.xaxis.axis_label = 'Income Per Person'

# Set the y-axis label
plot.yaxis.axis_label = 'CO2 Emissions (tons per person)'

In [None]:
show(plot)

### Adding a Slider to the Static Plot 

In [None]:
slider = Slider(start=min(data.year), end=max(data.year), step=1,
value=min(data.year), title='Year') 

In [None]:
def update_plot(attr, old, new):
    yr = slider.value
    new_data = {
     'x': data.gdp[data['year'] == yr], 
     'y': data.co2[data['year'] == yr],
     'country': data.country[data['year'] == yr],
     'region': data.region[data['year'] == yr],
    }
    source.data = new_data
    plot.title.text = 'CO2 Emissions vs GDP in %d' % yr 

In [None]:
slider.on_change('value', update_plot)

In [None]:
layout = row(widgetbox(slider), plot) 

In [None]:
curdoc().add_root(layout) 

### Adding a Hover Tool 

In [None]:
hover = HoverTool(tooltips=[('Country', '@country'), ('GDP', '@x'), ('CO2 Emission','@y')]) 

In [None]:
plot.add_tools(hover) 

In [None]:
!bokeh serve --show Getting_Started_with_Interactive_Data_Visualizations.ipynb

## Interactive Data Visualization with Plotly Express 

In [35]:
import pandas as pd
import plotly.express as px 

In [36]:
co2 = pd.read_csv('co2.csv')
gm = pd.read_csv('gapminder.csv')

In [37]:
df_gm = gm[['Country', 'region']].drop_duplicates()
df_w_regions = pd.merge(co2, df_gm, left_on='country', right_on='Country',how='inner')
df_w_regions = df_w_regions.drop('Country', axis='columns')
new_co2 = pd.melt(df_w_regions, id_vars=['country', 'region'])
columns = ['country', 'region', 'year', 'co2']
new_co2.columns = columns
df_co2 = new_co2[new_co2['year'].astype('int64') > 1963]
df_co2 = df_co2.sort_values(by=['country', 'year'])
df_co2['year'] = df_co2['year'].astype('int64')
df_gdp = gm[['Country', 'Year', 'gdp']]
df_gdp.columns = ['country', 'year', 'gdp']
data = pd.merge(df_co2, df_gdp, on=['country', 'year'], how='left') 
data = data.dropna() 

In [38]:
data.head()

Unnamed: 0,country,region,year,co2,gdp
0,Afghanistan,South Asia,1964,0.0863,1182.0
1,Afghanistan,South Asia,1965,0.101,1182.0
2,Afghanistan,South Asia,1966,0.108,1168.0
3,Afghanistan,South Asia,1967,0.124,1173.0
4,Afghanistan,South Asia,1968,0.116,1187.0


In [39]:
xmin, xmax = min(data.gdp), max(data.gdp) 

In [40]:
ymin, ymax = min(data.co2), max(data.co2) 

In [43]:
fig = px.scatter(data, 
                 x="gdp", 
                 y="co2", 
                 animation_frame="year",
                 animation_group="country", 
                 color="region", 
                 hover_name="country",
                 facet_col="region", 
                 width=1579, 
                 height=400, 
                 log_x=True, 
                 size_max=45,
                 range_x=[xmin,xmax], 
                 range_y=[ymin,ymax])

In [44]:
fig.show() 