In [60]:
from bokeh.models   import ColumnDataSource, Slider, HoverTool
from bokeh.plotting import figure
from bokeh.io       import output_notebook, show
output_notebook()

The dataset being considered is the number of cellphone subscribers in each country, over time. This visualisation captures the widespread popularity of cell phones in the last decade.

The visualisation chosen is a bar chart, scaled down to a logarithmic scale(since actual number of subscribers is well over 10^6, making visualisation difficult).

The data is present for years starting 1995 through 2011, different years being accessed by means of the slider provided. A HoverTool is also included, to check individual country data.

Dataset credits : [Gapminder](http://www.gapminder.org/data/)

[Data](https://docs.google.com/spreadsheets/d/14ivgHIV18Mr6hoW1deQ1L7nPXZUiTNyS8H-8sK9tMsg/pub) can be found here.

In [107]:
import pandas as pd
import numpy as np
df = pd.read_excel('broadband total.xlsx')
df.rename(columns={'Fixed broadband Internet subscribers':'country'}, inplace=True)
df = df.fillna(0) # replace all NaN values with 1
df.replace(to_replace=0, value=1, inplace=True) # replaced with 1 because now 
                                                # will take a log, making scale easier to visualise

# drop country name from DataFrame as logarithm will throw Error ; instead rename the index to country name
country_list = df['country'].tolist()      # get country list
df.index = country_list                    # replace index
df.drop(['country'], axis=1, inplace=True) # drop string data from dataframe
df = np.log(df)                            # finally take logarithm

# assign country ID to make indexing easier
cid = pd.Series(data = range(df.shape[0]), index = country_list)
df['Cid'] = cid
df.head()

Unnamed: 0,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,Cid
Afghanistan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.298317,5.393628,6.214608,6.214608,6.214608,6.907755,7.31322,0.0,0
Albania,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.605802,0.0,9.21034,11.066638,11.429544,11.566646,0.0,1
Algeria,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9.798127,10.491274,11.81303,12.043554,12.567373,13.091904,13.614618,13.71015,0.0,2
American Samoa,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3
Andorra,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.045777,8.188967,8.745444,9.243872,9.589872,9.82693,9.936535,10.040681,10.10651,0.0,4


In [101]:
ds=ColumnDataSource(df)

p = figure(width=900, height=500, y_range=(0,20), y_axis_label="Log Count", x_axis_label="Country(Hover to select)")
p.vbar(x='Cid', top='2000', bottom=0, width=0.5, source=ds)

slider = Slider(start=1995, end=2011, value=1995, step=1, title="Year")
p.add_tools(HoverTool(tooltips=[("Log Count", "@2000"), ("Country", "@index")]))

show(slider)
show(p)