# Kaggle Learn - Data Visualization

Working through lessons from https://www.kaggle.com/learn/data-visualisation

## Level 1, Part 9 - Introduction to plotly

In [1]:
import pandas as pd
import plotly.graph_objs as go

#  Initiate the Plotly notebook mode to plot in Jupyter
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)

**plotly** 
* An open-source library for interactive plots
  + Allow manipulation of plots after creation
  + However, much more resource-intensive than static graphic, limiting the amount of data that can be displayed
* Online and offline modes
  + Offline - injects the plotly source code directly into notebook
  + Online - requires network access by Python
* Collection of graphical object methods, typically imported as **go**
* Plotly's **iplot** method, its highest level API, takes a list of graphical objects and composes them
  + Easy to overlay multiple plots or use with a list of one for a single plot
<br><br> 
  
**go.Scatter()**
* Interactive scatter plot
  + Toolbar to zoom, pan, save 
  + Hover over data points for coordinates 
<br>

**go.Heatmap()**
* Interactive heatmap
<br>

**go.Choropleth()**	
* Geographic maps where the entities (countries, US states, etc.) are colored according to some variable
* For examples of geographic mapping of points with folium libraries see notes from Berkeley Foundations of Data Science lecture 11. 
<br>

**go.Surface()**
* 3D topographical plot. Takes x, y and z coordinates.

In [2]:
# Dataframe for examples
reviews = pd.read_csv("../pandas/data/winemag-data_first150k.csv", index_col=0)
reviews.head(3)

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,variety,winery
0,US,This tremendous 100% varietal wine hails from ...,Martha's Vineyard,96,235.0,California,Napa Valley,Napa,Cabernet Sauvignon,Heitz
1,Spain,"Ripe aromas of fig, blackberry and cassis are ...",Carodorum Selección Especial Reserva,96,110.0,Northern Spain,Toro,,Tinta de Toro,Bodega Carmen Rodríguez
2,US,Mac Watson honors the memory of a wine once ma...,Special Selected Late Harvest,96,90.0,California,Knights Valley,Sonoma,Sauvignon Blanc,Macauley


In [17]:
# Plotly - scatter plot

plots = [go.Scatter(x=reviews.head(1000)['points'], 
                    y=reviews.head(1000)['price'], 
                    mode='markers')]

layout = go.Layout(title='Scatter',
                xaxis=dict(title='Points'),
                yaxis=dict(title='Price'))

fig = go.Figure(data=plots, layout=layout)

iplot(fig)

In [19]:
# Overlay of KDE and scatter plots

plots = [go.Scatter(x=reviews.head(1000)['points'], 
                    y=reviews.head(1000)['price'], 
                    mode='markers'),
         go.Histogram2dContour(x=reviews.head(500)['points'], 
                               y=reviews.head(500)['price'], 
                               contours=go.Contours(coloring='heatmap'))]

layout = go.Layout(title='Scatter and KDE overlay',
                xaxis=dict(title='Points'),
                yaxis=dict(title='Price'))

fig = go.Figure(data=plots, layout=layout)

iplot(fig)

In [7]:
# Plotly Choropleth map 

df = reviews['country'].replace("US", "United States").value_counts()

iplot([go.Choropleth(
    locationmode='country names',
    locations=df.index.values,
    text=df.index,
    z=df.values
)])

In [6]:
# Plotly Surface

# Not working on Virtual Box
#   probably something to do with "Additional Drivers" "InnoTek Systemberatung 
#   GmbH: VirtualBox Graphics Adapter" not working. 
#
# Worked on Windows.
#  
#
# df = reviews.assign(n=0).groupby(['points', 'price'])['n'].count().reset_index()
# df = df[df["price"] < 100]
# v = df.pivot(index='price', columns='points', values='n').fillna(0).values.tolist()
#
# iplot([go.Surface(z=v)])

#### Exercises

In [20]:
pd.set_option('max_columns', None)
pokemon = pd.read_csv("data/pokemon.csv")
pokemon.head(3)

Unnamed: 0,abilities,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,against_grass,against_ground,against_ice,against_normal,against_poison,against_psychic,against_rock,against_steel,against_water,attack,base_egg_steps,base_happiness,base_total,capture_rate,classfication,defense,experience_growth,height_m,hp,japanese_name,name,percentage_male,pokedex_number,sp_attack,sp_defense,speed,type1,type2,weight_kg,generation,is_legendary
0,"['Overgrow', 'Chlorophyll']",1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,0.25,1.0,2.0,1.0,1.0,2.0,1.0,1.0,0.5,49,5120,70,318,45,Seed Pokémon,49,1059860,0.7,45,Fushigidaneフシギダネ,Bulbasaur,88.1,1,65,65,45,grass,poison,6.9,1,0
1,"['Overgrow', 'Chlorophyll']",1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,0.25,1.0,2.0,1.0,1.0,2.0,1.0,1.0,0.5,62,5120,70,405,45,Seed Pokémon,63,1059860,1.0,60,Fushigisouフシギソウ,Ivysaur,88.1,2,80,80,60,grass,poison,13.0,1,0
2,"['Overgrow', 'Chlorophyll']",1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,0.25,1.0,2.0,1.0,1.0,2.0,1.0,1.0,0.5,100,5120,70,625,45,Seed Pokémon,123,1059860,2.0,80,Fushigibanaフシギバナ,Venusaur,88.1,3,122,120,80,grass,poison,100.0,1,0


In [21]:
plots = [go.Scatter(x=pokemon['attack'], 
                    y=pokemon['defense'], 
                    mode='markers')]

layout = go.Layout(title='Attack vs. Defense',
                xaxis=dict(title='Attack'),
                yaxis=dict(title='Defense'))

fig = go.Figure(data=plots, layout=layout)

iplot(fig)