* In this tutorial, we will learn how to use the plotly library.
* Plotly library: Plotly's Python graphing library makes interactive, publication-quality graphs online.
Examples of how to make line plots, scatter plots, area charts, bar charts, error bars, box plots, histograms, heatmaps, subplots, multiplt-axes, polar charts and bubble charts.

This notebook is a reimplementation of 
https://www.kaggle.com/kanncaa1/plotly-tutorial-for-beginners

I have just ported the code for plotly 4.2.1


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import plotly.graph_objects as go

from wordcloud import WordCloud

import matplotlib.pyplot as plt

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

# Loading Data and Explanation of Features
* timesData includes 14 features that are:
    * world_rank
    * university_name
    * country
    * teaching
    * international
    * research
    * citations
    * income
    * total_score
    * num_students
    * student_staff_ratio
    * international_students
    * female_male_ratio
    * year

In [None]:
timesData = pd.read_csv('/kaggle/input/world-university-rankings/timesData.csv')

In [None]:
timesData.info()

In [None]:
timesData.head(10)

# Line Charts

* Import graph_objects as go
* Creating traces
    * x = x axis
    * y = y axis
    * mode = type of plot like marker, line or line + markers
    * name = name of the plots
    * marker = marker is used with dictionary.
        * color = color of lines. It takes RGB and opacity
    * text = The hover text (hover is curser)
* data = is a  list that we add traces into it
* layout = it is dictionary.
    * title = title of layout
    * x axis = it is dictionary
        * title = label of x axis
        * ticklen = length of tx axis ticks
        * zeroline = showing zero line or not
* fig = it includes data and layout 
* go.Figure(data,layout).show() = plots the figure(fig) that is created by data and layout

In [None]:
#prepare data frame
df = timesData.iloc[:100,:]

trace1 = go.Scatter(
                    x = df.world_rank,
                    y = df.citations,
                    mode = 'lines',
                    name = 'citations',
                    marker = dict(color='rgba(16,112,2,0.8)'),
                    text = df.university_name
)

trace2 = go.Scatter(
                    x = df.world_rank,
                    y = df.teaching,
                    mode = 'lines+markers',
                    name = 'teaching',
                    marker = dict(color = 'rgba(80,26,80,0.8)'),
                    text = df.university_name
)

data= [trace1,trace2]

layout = dict(title = 'Citation and Teaching vs World Rank of Top 100 Universities',
             xaxis = dict(title= 'World Rank', ticklen=5, zeroline=False)
             )
fig = dict(data=data, layout=layout)
go.Figure(fig).show()

# #import graph objects as "go"
# import plotly.graph_objects as go

# # Creating trace1
# trace1 = go.Scatter(
#                     x = df.world_rank,
#                     y = df.citations,
#                     mode = 'lines',
#                     name = 'citations',
#                     marker= dict(color = 'rgba(16,112,2,0.8)'),
#                     text = df.university_name)

# # Creating trace2
# trace2 = go.Scatter(
#                     x= df.world_rank,
#                     y= df.teaching,
#                     mode= 'lines+markers',
#                     name = 'teaching',
#                     marker = dict(color = 'rgba(80,26,80,0.8)'),
#                     text = df.university_name)

# data = [trace1, trace2]
# layout = dict(title = 'Citation and Teaching vs World Rank of Top 100 Universities',
#              xaxis= dict(title='World Rank', ticklen=5, zeroline=False)
#              )

# fig = dict(data= data,layout=layout)
# iplot(fig)

# Scatter
* Import graph_objects as go
* Creating traces 
    * x = x axis
    * y = y axis
    * mode = type of plot like marker, line or line+markers
    * name = name of the plots
    * marker = marker is used with dictionary
        * color = color of lines. It takes RGB (red, green , blue ) and opacity (alpha)
    * text = The hover text (hover is cursor)
* data = is a list that we add traces into it
* layout = it is dictionary
    * title = title of layout
    * xaxis = it is a dictionary
        * title =  label of x axis
        * ticklen = length of x axis ticks
        * zeroline = showing zero line or not
    * y axis = it is dictionary and same with x axis
* fig = it includes data and layout
* go.Figure().show() = plots the figure(fig) that is created by data and layout

In [None]:
df2014 = timesData[timesData.year == 2014].iloc[:100,:]
df2015 = timesData[timesData.year == 2015].iloc[:100,:]
df2016 = timesData[timesData.year == 2016].iloc[:100,:]

trace1 = go.Scatter(
                    x = df2014.world_rank,
                    y = df2014.citations,
                    mode = 'markers',
                    name = '2014',
                    marker = dict(color = 'rgba(255,128,255,0.8)'),
                    text = df2014.university_name)

trace2 = go.Scatter(
                    x = df2015.world_rank,
                    y = df2015.citations,
                    mode = 'markers',
                    name = '2015',
                    marker = dict(color = 'rgba(255,128,2,0.8)'),
                    text = df2015.university_name)

trace3 = go.Scatter(
                    x = df2016.world_rank,
                    y = df2016.citations,
                    mode = 'markers',
                    name = '2016',
                    marker = dict(color='rgba(0,255,200,0.8)'),
                    text = df2016.university_name)
data = [trace1, trace2, trace3]
layout = dict(title = 'Citation vs world rank of top 100 universities with 2014, 2015 and 2016 years',
             xaxis = dict(title= 'World Rank',ticklen=5, zeroline=False))
fig = dict(data=data, layout= layout)
go.Figure(fig).show()

# Bar Charts

* Import graph_objects as go
* Creating traces
    * x = x axis
    * y = y axis
    * mode = type of plot like marker, line or line+markers
    * name = name of the plots
    * marker = marker is used with dictionary
        * color = color of lines. It takes RGB (red, green, blue) and opacity (alpha)
        * line = IT is dictionary. Line between bars
            * color = line color around bars
    * text = The hover text (hover is cursor)
* data = is a list that we add traces into it
* layout = it is dictionary.
    * barmode = bar mode of bars like grouped
* fig = it includes data and layout
* go.Figure().show() = plots the figure(fig) that is created by data and layout 

In [None]:
df2014 = timesData[timesData.year == 2014].iloc[:3,:]
df2014

In [None]:
#prepare data frames
df2014 = timesData[timesData.year== 2014].iloc[:3,:]
#create trace1
trace1 = go.Bar(
                x = df2014.university_name,
                y = df2014.citations,
                name = 'citations',
                marker = dict(color='rgba(255,174,255,0.5)',
                             line = dict(color = 'rgb(0,0,0)',width=1.5)),
                text = df2014.country)
#create trace2
trace2 = go.Bar(
                x = df2014.university_name,
                y = df2014.teaching,
                name = 'teaching',
                marker= dict(color= 'rgba(255,255, 128,0.5)',
                            line = dict(color='rgb(0,0,0)',width=1.5)),
                text = df2014.country)
data= [trace1,trace2]
layout = go.Layout(barmode='group')
fig = go.Figure(data = data, layout=layout)
fig.show()


* Import graph_objects as go
* Creating traces 
    * x = x axis
    * y = y axis
    * name = name of the plots
    * type = type of plot like bar plot
* data = is a list that we add traces into it
* layout = it is dictionary
   * xaxis = label of x axis
   * barmode = bar mode of bars like grouped or relative
   * title = title of layout
* fig = it includes data and layout 
* fig.show() = plots the figure that is created by data and layout

In [None]:
df2014 = timesData[timesData.year == 2014].iloc[:3,:]

x = df2014.university_name

trace1={
    'x': x,
    'y': df2014.citations,
    'name':'citation',
    'type': 'bar'
};

trace2={
    'x': x,
    'y': df2014.teaching,
    'name' : 'teaching',
    'type' : 'bar'
};

data = [trace1, trace2];
layout = {
    'xaxis': {'title': 'Top 3 universities'},
    'barmode': 'relative',
    'title' : 'citations and teaching of top 3 universities in 2014'
};

fig = go.Figure(data = data, layout=layout)
fig.show()

* Import graph_objs as go and importing tools
    * Tools : used for subplots 
   
* Creating trace1
    * bar: bar plot
        * x = x axis
        * y = y axis
        * marker
            * color: color of bars
            * line: bar line color and width
        * name : name of bar
        * orientation : orientation like horizontal
            * creating trace2
            * scatter: scatter plot 
                * x = x axis
                * y = y axis 
                * mode: scatter type line, line + markers, or only markers
                * line: properties of line 
                    * color: color of line 
                * name: name of scatter plot
    * layout: axis, legend, margin, paper and plot properties

In [None]:
from plotly import subplots

df2016 = timesData[timesData.year == 2016].iloc[:7,:]

y_saving = [each for each in df2016.research]
y_net_worth = [float(each) for each in df2016.income]
x_saving = [each for each in df2016.university_name]
x_net_worth = [each for each in df2016.university_name]

trace0 = go.Bar(
                x = y_saving,
                y = x_saving,
                marker = dict(color='rgba(171, 50 ,96, 0.6)',
                             line= dict(color='rgba(171,50,96,1.0)', width=1)),
                name='research',
                orientation = 'h',
)

trace1 = go.Scatter(
                    x = y_net_worth,
                    y = x_net_worth,
                    mode = 'lines+markers',
                    line=dict(color='rgb(63,72,204)'),
                    name = 'income',
)

layout = dict(
                title='Citations and income',
                yaxis=dict(showticklabels=True,domain=[0,0.85]),
                yaxis2 = dict(showline=True,showticklabels=False, linecolor='rgba(102,102,102,0.8)',linewidth=2,domain=[0,0.85]),
                xaxis = dict(zeroline=False,showline=False,showticklabels=True,showgrid=True,domain=[0,0.42]),
                xaxis2 = dict(zeroline=False,showline=False,showticklabels=True,showgrid=True,domain=[0.47,1],side='top',dtick=25),
                legend = dict(x=0.029,y=1.038,font=dict(size=10)),
                margin=dict(l=200,r=20,t=70,b=70),
                paper_bgcolor='rgb(248, 248, 255)',
                plot_bgcolor='rgb(248,248,255)',
)

annotations = []
y_s = np.round(y_saving, decimals=2)
y_nw =np.rint(y_net_worth)

for ydn, yd, xd in zip(y_nw, y_s, x_saving):
    annotations.append(dict(xref='x2',yref='y2',y = xd, x=ydn -4 , text='{:,}'.format(ydn),font=dict(family='Arial',size=12,color='rgb(171,50,96)'),showarrow=False))
    annotations.append(dict(xref='x1',yref='y1', y=xd, x=yd + 3, text=str(yd), font=dict(family='Arial', size=12, color= 'rgb(171,50,96)'),showarrow=False))
layout['annotations'] = annotations

fig = subplots.make_subplots(rows=1, cols=2, specs=[[{},{}]],shared_xaxes=True, shared_yaxes=False, vertical_spacing=0.001)

fig.append_trace(trace0,1,1)
fig.append_trace(trace1,1,2)

fig['layout'].update(layout)
fig.show()

# Pie Charts

* fig: create figures
    * data: plot type
        * values: values of plot
        * labels: labels of plot 
        * name : name of plots
        * hoverinfo: information in hover
        * hole: hole width
        * type: plot type like pie
    * layout: layout of plot
        * title: title of layout 
        * annotations: font, showarrow, text, x, y

In [None]:
df2016 = timesData[timesData.year == 2016].iloc[:7,:]
pie1 = df2016.num_students
pie1_list = [float(each.replace(',','.')) for each in df2016.num_students]
labels = df2016.university_name

fig = {
    'data': [
        {
            'values': pie1_list,
            'labels': labels,
            'domain': { 'x': [0, .5]},
            'name': 'Number Of Students Rates',
            'hoverinfo': 'label+percent+name',
            'hole': .5,
            'type': 'pie'
        },],
    'layout': {
        'title': 'Universities Number of Students rates',
        'annotations': [
            {'font': {'size':20},
             'showarrow':False,
             'text': 'Number of Students',
             'x': 0.20,
             'y': 1
            },
        ]
    }
}
go.Figure(fig).show()

# Bubble Charts

* x = x axis
* y = y axis
* mode = markers(scatter)
* marker = marker properties
    * color = third dimension of plot. International score
    * size = fourth dimension of the plot. Number of students
* text: university names

In [None]:
df2016.info()

In [None]:
df2016 = timesData[timesData.year == 2016].iloc[:20,:]
num_students_size = [float(each.replace(',','.')) for each in df2016.num_students]
international_color = [float(each) for each in df2016.international]
data = [
    {
        'y': df2016.teaching,
        'x': df2016.world_rank,
        'mode': 'markers',
        'marker': {
            'color': international_color,
            'size' : num_students_size,
            'showscale' : True
        },
        'text': df2016.university_name
    }
]
go.Figure(data).show()

# Histogram

* trace1 = first histogram
    * x = x axis
    * y = y axis
    * opacity = opacity of histogram
    * name = name of legend
    * marker = color of histogram
* trace2 = second histogram
* layout = layout
    * barmode = mode of histogram like overlay. Also you can change it with stack

In [None]:
x2011 = timesData.student_staff_ratio[timesData.year == 2011]
x2012 = timesData.student_staff_ratio[timesData.year == 2012]

trace1 = go.Histogram(
        x= x2011,
        opacity = 0.75,
        name = '2011',
        marker = dict(color = 'rgba(171,50,96,0.6)'))

trace2 = go.Histogram(
        x = x2012,
        opacity=0.75,
        name = '2012',
        marker = dict(color = 'rgba(12,50,196,0.6)'))

data = [trace1,trace2]
layout = go.Layout(barmode='overlay',
                   title='student-staff ratio in 2011 and 2012',
                   xaxis=dict(title='students-staff ratio'),
                   yaxis=dict(title='Count'),)

fig = go.Figure(data=data, layout=layout)
fig.show()

# Word Cloud

Not a pyplot but learning it is good for visualization. Lets look at which country is mentioned most in 2011.

* WordCloud = word cloud library that I import at the beginning of kernel
    * background_color = color of back ground 
    * generate = generates the country name list(x2011) a word cloud

In [None]:
x2011 = timesData.country[timesData.year == 2011]
plt.subplots(figsize=(8,8))
wordcloud = WordCloud(
                        background_color='white',
                        width=512,
                        height=384
                        ).generate(' '.join(x2011))
plt.imshow(wordcloud)
plt.axis('off')
plt.savefig('graph.png')
plt.show()

# Box Plots
* Box Plots
    * Median (50th percentile) = middle value of the data set. Sort and take the data in the middle. It is also called 50% percentile that is 50% of the data are less than median(50th quartile)
        * 25th percentile = quartile 1 (Q1) that is lower quartile
        * 75th percentile = quartile 2 (Q2) that is higher quartile
        * height of box = IQR = interquartile range = Q3 - Q1
        * Whiskers = 1.5 * IQR from the Q1 and Q3
        * Outliers = being more than 1.5\*IQR away from median commonly.
    * trace = box
        * y = data we want to visualize with box plot
        * marker = color

In [None]:
x2015 = timesData[timesData.year == 2015]

trace0 = go.Box(
    y=x2015.total_score,
    name = 'total score of universities in 2015',
    marker = dict(
        color = 'rgb(12,12,140)',
    )
)

trace1 = go.Box(
    y=x2015.research,
    name = 'research of universities in 2015',
    marker = dict(
        color = 'rgb(12,128,128)',
    )
)
data = [trace0,trace1]
go.Figure(data).show()

# Scatter Matrix Plots
* import figure factory as ff
* create_scatterplotmatrix = creates scatter plot
    * data2015 = prepared data. It includes research, international and total scores with index from 1 to 401 
    * colormap = color map of scatterplot
    * colormap_type = color type of scatter plot 
    * height and weight


In [None]:
import plotly.figure_factory as ff

dataframe = timesData[timesData.year == 2015]
data2015 = dataframe.loc[:,['research','international','total_score']]
data2015['index'] = np.arange(1,len(data2015)+1)

fig = ff.create_scatterplotmatrix(data2015,diag = 'box', index ='index',colormap='Portland', colormap_type = 'cat',
                                 height = 700, width=700)
fig.show()

# Inset Plots


In [None]:
trace1 = go.Scatter(
    x=dataframe.world_rank,
    y=dataframe.teaching,
    name='teaching',
    marker=dict(color='rgba(16,112,2,0.8)'),
)

trace2 = go.Scatter(
    x=dataframe.world_rank,
    y=dataframe.income,
    xaxis='x2',
    yaxis='y2',
    name='income',
    marker=dict(color='rgba(160,112,20,0.8)'),
)
data = [trace1,trace2]
layout = go.Layout(
    xaxis2=dict(
        domain=[0.5,0.95],
        anchor='y2',
    ),
    yaxis2=dict(
        domain=[0.6,0.95],
        anchor='x2',
    ),
    title = 'Income and Teaching vs World Rank of Universities'
    
)

fig = go.Figure(data=data,layout=layout)
fig.show()

# 3D Scatterplot with Colorscaling
* go.Scatter3d: create 3d scatter plot
* x,y,z: axis of plots 
* mode: market that is scatter
* size: marker size
* color; axis of colorscale
* colorscale: actually the 4th dimension

In [None]:
trace1 = go.Scatter3d(
    x=dataframe.world_rank,
    y=dataframe.research,
    z=dataframe.citations,
    mode='markers',
    marker=dict(
        size=10,
        color='rgb(255,0,0)',
    )
)

data = [trace1]
layout = go.Layout(
    margin=dict(
        l=0,
        r=0,
        b=0,
        t=0
    )
)

fig = go.Figure(data=data,layout=layout)
fig.show()