# Importing Data and Required Library

In [97]:
import pandas as pd #pandas for working with dataframes
data = pd.read_csv('data.csv') #reading the data given
data.head() #checking the head of data

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37


# Data Cleaning

In [98]:
data.info() #checking basic info of data regarding the features and number of entries

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16598 entries, 0 to 16597
Data columns (total 11 columns):
Rank            16598 non-null int64
Name            16598 non-null object
Platform        16598 non-null object
Year            16327 non-null float64
Genre           16598 non-null object
Publisher       16540 non-null object
NA_Sales        16598 non-null float64
EU_Sales        16598 non-null float64
JP_Sales        16598 non-null float64
Other_Sales     16598 non-null float64
Global_Sales    16598 non-null float64
dtypes: float64(6), int64(1), object(4)
memory usage: 1.4+ MB


# Checking number of NULL values

In [99]:
data.isnull().sum() #it will show missing values

Rank              0
Name              0
Platform          0
Year            271
Genre             0
Publisher        58
NA_Sales          0
EU_Sales          0
JP_Sales          0
Other_Sales       0
Global_Sales      0
dtype: int64

# Droping NULL values of Year and Publisher column.

In [100]:
data = data.dropna(axis=0, subset=['Year','Publisher']) #we cannot impute missing year value and publisher value
data.isnull().sum() #now there is no missing value in our data

Rank            0
Name            0
Platform        0
Year            0
Genre           0
Publisher       0
NA_Sales        0
EU_Sales        0
JP_Sales        0
Other_Sales     0
Global_Sales    0
dtype: int64

In [101]:
data['Year'] = data['Year'].apply(int)  #Converting all year values to integer values
data.head()

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0
4,5,Pokemon Red/Pokemon Blue,GB,1996,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37


# CREATING DATA FOR PLOTTING

# Grouping on Year and NA_Sales

In [102]:
NA_Sales = list(pd.DataFrame(data.groupby('Year')['NA_Sales'].sum()).reset_index()['NA_Sales'][-9:-1]) #from year 2010 to 2017
NA_Sales

[304.24000000000001,
 241.00000000000091,
 154.93000000000004,
 154.7700000000001,
 131.9700000000002,
 102.81999999999992,
 22.660000000000061,
 0.0]

# Grouping on Year and EU_Sales

In [103]:
EU_Sales = list(pd.DataFrame(data.groupby('Year')['EU_Sales'].sum()).reset_index()['EU_Sales'][-9:-1]) #from year 2010 to 2017
EU_Sales

[176.57000000000016,
 167.31000000000023,
 118.76000000000002,
 125.80000000000004,
 125.63000000000009,
 97.710000000000022,
 26.760000000000066,
 0.0]

# Grouping on Year and JP_Sales

In [104]:
JP_Sales = list(pd.DataFrame(data.groupby('Year')['JP_Sales'].sum()).reset_index()['JP_Sales'][-9:-1]) #from year 2010 to 2017
JP_Sales

[59.490000000000215,
 53.040000000000084,
 51.740000000000123,
 47.590000000000053,
 39.460000000000086,
 33.720000000000127,
 13.66999999999997,
 0.050000000000000003]

# Grouping on Year and Global_Sales

In [105]:
Global_Sales = list(pd.DataFrame(data.groupby('Year')['Global_Sales'].sum()).reset_index()['Global_Sales'][-9:-1]) #from year 2010 to 2017
Global_Sales

[600.28999999999485,
 515.79999999999723,
 363.48999999999842,
 368.10999999999865,
 337.02999999999849,
 264.43999999999795,
 70.900000000000134,
 0.050000000000000003]

# Ploting with Bokeh

In [106]:
from bokeh.plotting import figure, output_file, show, ColumnDataSource
from bokeh.models import HoverTool
from bokeh.palettes import Spectral4 ##Spectral4 is list of colors

names = ['NA_Sales', 'EU_Sales', 'JP_Sales', 'Global_Sales'] #list of names of our lines
year = ['2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017'] #list of years to be shown on x-axis

#Standard set of tools. We'll add the custom hover instance afterwards.
plotTools = 'box_zoom, wheel_zoom, pan, tap, crosshair, reset, save'

p = figure(plot_width = 1000, plot_height = 600, #creating 1000*600 plot
           x_axis_label = 'YEARS',       #x label of plot
           y_axis_label = 'SALES',       #y label of plot
           tools = plotTools,            #Add the standard tools above.
           active_scroll = 'wheel_zoom') #Activate mouse wheel zooming.

hover = HoverTool(
    tooltips = [
        ('index', '$index'),
        ('X coord', '@x'), ## '@' will display values whenever a hover is triggered that is when we hover over it.
        ('Y coord', '@y')
    ]
)

p.add_tools(hover) #Add the instance of HoverTool that we modified
p.title.text = 'Click on legend entries to hide the corresponding lines'

data1 = ColumnDataSource( ### data1 is data for line 1
        name = names[0],
        data = dict(
            x = year,
            y = NA_Sales
        )
    )

data2 = ColumnDataSource( ### data2 is data for line 2
        name = names[1],
        data = dict(
            x = year,
            y = EU_Sales
        )
    )

data3 = ColumnDataSource( ### data3 is data for line 3
        name = names[2],
        data = dict(
            x = year,
            y = JP_Sales
        )
    )

data4 = ColumnDataSource( ### data4 is data for line 4
        name = names[3],
        data = dict(
            x = year,
            y = Global_Sales
        )
    )


p.line('x', 'y', legend=data1.name, source=data1, 
       line_width=4, line_color=Spectral4[0], muted_alpha=0.2, muted_color=Spectral4[0]) #LINE 1

p.line('x', 'y', legend=data2.name, source=data2,
       line_width=4, line_color=Spectral4[1], muted_alpha=0.2,muted_color=Spectral4[1])  #LINE 2

p.line('x', 'y', legend=data3.name, source=data3,
       line_width=4, line_color=Spectral4[2], muted_alpha=0.2,muted_color=Spectral4[2])  #LINE 3

p.line('x', 'y', legend=data4.name, source=data4,
       line_width=4, line_color=Spectral4[3], muted_alpha=0.2,muted_color=Spectral4[3])  #LINE 4

p.legend.click_policy = 'mute'  # 'mute' policy, make the line fade which is selected

output_file('Line_Graph Mute.html') #filename
show(p) #open browser

# The code above will generate a html file containing the plot.