# Práctica Visualización 1
**Nelson Saturno**

## Ejercicio 1

- Visualizar 5 marcas en \[x, y\] diferentes
- Utilizar radio diferente entre 10 y 30
- Utilizar asteriscos como marca
- Utilizar el color firebrick
- El ancho de la línea debe ser 3

In [1]:
import numpy as np
from bokeh.plotting import figure, output_notebook, show

output_notebook()

x = np.random.random(5)
y = np.random.random(5)
size = range(10, 31, 5)

p = figure(plot_width=400, plot_height=400)
p.asterisk(x=x, y=y, size=size, color="firebrick", line_width=3)
show(p)

## Ejecicio 2

- Añadir el título a la gráfica
- Añadir etiquetas a los ejes x,y
- Mostrar únicamente la interacción `Pan`

In [2]:
x = np.random.random(5)
y = np.random.random(5)
size = range(10, 31, 5)

p = figure(
    plot_width=400, plot_height=400,
    x_axis_label='x', y_axis_label='y',
    title="Data Visualization 1", tools="pan"
)
p.asterisk(x=x, y=y, size=size, color="firebrick", line_width=3)
show(p)

## Ejercicio 3

- Cargar el dataset bokeh.sampledata.iris
- Utilizar Figure.circle y factor_cmap para generar una gráfica

In [3]:
import pandas as pd
from bokeh.sampledata.iris import flowers
from bokeh.palettes import Category10
from bokeh.transform import factor_cmap

p = figure(title="Iris Dataset")
p.xaxis.axis_label = 'Petal Length'
p.yaxis.axis_label = 'Petal Width'

species = flowers['species'].unique()
factor_colors = factor_cmap('species', palette=Category10[3], factors=species)

p.circle(
    x="petal_length", y="petal_width", source=flowers,
    fill_color=factor_colors, size=10, legend=factor_colors, alpha=0.8
)
p.legend.location = "top_left"
show(p)

## Ejercicio 4

- Hacer el plot del ejercicio anterior utilizando diferentes markers por categoría.

In [4]:
from bokeh.transform import factor_mark

markers = ['circle', 'square', 'triangle']

p = figure(title="Iris Dataset")
p.xaxis.axis_label = 'Petal Length'
p.yaxis.axis_label = 'Petal Width'

species = flowers['species'].unique()
factor_colors = factor_cmap('species', palette=Category10[3], factors=species)
factor_markers = factor_mark('species', markers, sorted(species))

p.scatter(
    x="petal_length", y="petal_width", source=flowers,
    fill_color=factor_colors, size=10, legend=factor_colors, alpha=0.8,
    marker=factor_markers
)
p.legend.location = "top_left"
show(p)

## Ejercicio 5

- Crear un scatter plot matrix utilizando gridplot para iris.

In [5]:
from bokeh.layouts import gridplot

def create_plot(df, x, y, factor_colors, factor_markers):
    p = figure(plot_width=240, plot_height=240)
    p.xaxis.axis_label = x
    p.yaxis.axis_label = y

    p.scatter(
        x=x, y=y, source=df,
        fill_color=factor_colors, size=10, alpha=0.8,
        marker=factor_markers
    )
    return p
    
species = flowers['species'].unique()
factor_colors = factor_cmap('species', palette=Category10[3], factors=species)
factor_markers = factor_mark('species', markers, sorted(species))
xs = ["petal_length", "petal_width", "sepal_length", "sepal_width"]
ys = xs
plots = []
for y in ys:
    row = []
    for x in xs:
        plot = create_plot(flowers, x, y, factor_colors, factor_markers)
        row.append(plot)
    plots.append(row)
    
grid = gridplot(plots)
show(grid)

## Ejercicio 6

* Scatter plot con brushing and linking

In [6]:
from bokeh.models import ColumnDataSource

TOOLS = "box_select,lasso_select"

def create_linked_plot(df, x, y, factor_colors, factor_markers, tools):
    p = figure(plot_width=240, plot_height=240, tools=tools)
    p.xaxis.axis_label = x
    p.yaxis.axis_label = y

    p.scatter(
        x=x, y=y, source=df,
        fill_color=factor_colors, size=10, alpha=0.8,
        marker=factor_markers
    )
    return p

source = ColumnDataSource(data=flowers)

species = flowers['species'].unique()
factor_colors = factor_cmap('species', palette=Category10[3], factors=species)
factor_markers = factor_mark('species', markers, sorted(species))
xs = ["petal_length", "petal_width", "sepal_length", "sepal_width"]
ys = xs
plots = []
for y in ys:
    row = []
    for x in xs:
        plot = create_linked_plot(source, x, y, factor_colors, factor_markers, TOOLS)
        row.append(plot)
    plots.append(row)
    
grid = gridplot(plots)
show(grid)

## Ejercicio 7

* Crear scatter plot con dataset autompg
* Utilizar paleta Blues8
* Añadir color bar y tool hover

In [7]:
from bokeh.sampledata.autompg import autompg
from bokeh.palettes import Blues8
from bokeh.models import LinearColorMapper, ColorBar
from bokeh.transform import linear_cmap

mapper = LinearColorMapper(palette=Blues8, low=autompg["cyl"].min(), high=autompg["cyl"].max())
color_bar = ColorBar(color_mapper=mapper, location=(0, 0))

p = figure(plot_width=800, plot_height=400)
p.xaxis.axis_label = "Miles per Gallon"
p.yaxis.axis_label = "Horsepower"


p.scatter(
    x="mpg", y="hp",
    fill_color={'field': 'cyl', 'transform': mapper}, line_color=None,
    source=autompg, size="cyl"
)
p.add_layout(color_bar, 'right')
p.background_fill_color = "grey"
p.background_fill_alpha = 0.5
show(p)

## Ejercicio 8

- Visualización de datos financieros
- Visualizar tooltips

In [8]:
import bokeh
bokeh.sampledata.download()

Using data directory: /home/nelson/.bokeh/data
Downloading: CGM.csv (1589982 bytes)
   1589982 [100.00%]
Downloading: US_Counties.zip (3171836 bytes)
   3171836 [100.00%]
Unpacking: US_Counties.csv
Downloading: us_cities.json (713565 bytes)
    713565 [100.00%]
Downloading: unemployment09.csv (253301 bytes)
    253301 [100.00%]
Downloading: AAPL.csv (166698 bytes)
    166698 [100.00%]
Downloading: FB.csv (9706 bytes)
      9706 [100.00%]
Downloading: GOOG.csv (113894 bytes)
    113894 [100.00%]
Downloading: IBM.csv (165625 bytes)
    165625 [100.00%]
Downloading: MSFT.csv (161614 bytes)
    161614 [100.00%]
Downloading: WPP2012_SA_DB03_POPULATION_QUINQUENNIAL.zip (4816256 bytes)
   4816256 [100.00%]
Unpacking: WPP2012_SA_DB03_POPULATION_QUINQUENNIAL.csv
Downloading: gapminder_fertility.csv (64346 bytes)
     64346 [100.00%]
Downloading: gapminder_population.csv (94509 bytes)
     94509 [100.00%]
Downloading: gapminder_life_expectancy.csv (73243 bytes)
     73243 [100.00%]
Downloading: 

In [22]:
from bokeh.sampledata.stocks import AAPL as AAPL_dict, GOOG as GOOG_dict, IBM as IBM_dict, MSFT as MSFT_dict
from bokeh.models import ColumnDataSource, HoverTool
from datetime import datetime

def datetime_local(x):
    return np.array(x, dtype=np.datetime64)

def df_transform(data):
    df = pd.DataFrame.from_dict(data)
    df['date'] = df['date'].apply(lambda x: datetime.strptime(x, "%Y-%m-%d"))
    return df
    
AAPL, GOOG, IBM, MSFT = [df_transform(data) for data in [AAPL_dict, GOOG_dict, IBM_dict, MSFT_dict]]

min_common_date = pd.Timestamp(max(AAPL['date'].min(), GOOG['date'].min(), IBM['date'].min(), MSFT['date'].min()).date())
max_common_date = pd.Timestamp(min(AAPL['date'].max(), GOOG['date'].max(), IBM['date'].max(), MSFT['date'].max()).date())

common_dates = AAPL[(AAPL['date'] >= min_common_date) & (AAPL['date'] <= max_common_date)]['date']
aapl_source = ColumnDataSource(data={
    'tag': ["AAPL"] * len(common_dates),
    'date': datetime_local(common_dates),
    'adj close': AAPL[(AAPL['date'] >= min_common_date) & (AAPL['date'] <= max_common_date)]['adj_close'],
})

goog_source = ColumnDataSource(data={
    'tag': ["GOOG"] * len(common_dates),
    'date': datetime_local(common_dates),
    'adj close': GOOG[(GOOG['date'] >= min_common_date) & (GOOG['date'] <= max_common_date)]['adj_close'],
})

ibm_source = ColumnDataSource(data={
    'tag': ["IBM"] * len(common_dates),
    'date': datetime_local(common_dates),
    'adj close': IBM[(IBM['date'] >= min_common_date) & (IBM['date'] <= max_common_date)]['adj_close'],
})

msft_source = ColumnDataSource(data={
    'tag': ["MSFT"] * len(common_dates),
    'date': datetime_local(common_dates),
    'adj close': MSFT[(MSFT['date'] >= min_common_date) & (MSFT['date'] <= max_common_date)]['adj_close'],
})

p = figure(plot_height=300, x_axis_type="datetime", tools="crosshair", toolbar_location=None,
           sizing_mode="scale_width", title="Closing price")
p.background_fill_color="#f5f5f5"
p.grid.grid_line_color="white"
p.axis.axis_line_color = None

r1 = p.line(x='date', y='adj close', line_width=2, color='#AA6C39', source=aapl_source, legend="AAPL")
r2 = p.line(x='date', y='adj close', line_width=2, color='#AA3939', source=goog_source, legend="GOOG")
r3 = p.line(x='date', y='adj close', line_width=2, color='#226666', source=ibm_source, legend="IBM")
r4 = p.line(x='date', y='adj close', line_width=2, color='#55AA55', source=msft_source, legend="MSFT")

p.legend.location = "top_left"
p.add_tools(HoverTool(
        tooltips=[
            ('tag', '@tag'),
            ('date', '@date{%F}'),
            ('close', '$@{adj close}{%0.2f}')
        ],
        formatters={
            'date'      : 'datetime',
            'adj close' : 'printf'
        },
        mode='vline'
    )
)
show(p)

## Ejercicio 9

Me lo apunto para hacerlo fuera de la evaluación por no haber tenido tiempo.