In [1]:
import pandas as pd
import numpy as np
from pathlib import Path

In [2]:
from bokeh.plotting import figure, output_notebook, show
output_notebook()

In [3]:
from bokeh.models import Label, LabelSet, ColumnDataSource, Div, Range1d
from bokeh.layouts import gridplot, column, Row

In [4]:
path = Path("/media/kanishk/8E8481E28481CCE1/Learn/")

In [5]:
houston = pd.read_csv(path/"houston.csv")
houston = houston.loc[:, ["Date", "city", "Mean.TemperatureF"]]

In [6]:
houston = houston[houston.Date.str[:4] == "2015"]
houston.Date = pd.to_numeric(houston.Date.str[5:7])
houston = houston.groupby(["city", "Date"], as_index=False)["Mean.TemperatureF"].mean()

In [7]:
cleveland = pd.read_csv(path/"cleveland.csv")
cleveland = cleveland.loc[:, ["Date", "city", "Mean.TemperatureF"]]

In [8]:
cleveland = cleveland[cleveland.Date.str[:4] == "2015"]
cleveland.Date = pd.to_numeric(cleveland.Date.str[5:7])
cleveland = cleveland.groupby(["city", "Date"], as_index=False)["Mean.TemperatureF"].mean()

In [9]:
houston.head(2)
cleveland.head(2)

Unnamed: 0,city,Date,Mean.TemperatureF
0,Cleveland (OH),1,23.225806
1,Cleveland (OH),2,14.5


In [10]:
fig21 = figure(plot_height=400, plot_width=400, title="Figure 2.1 - Standard cartesian coordinate system",
              x_range=(-2, 3), y_range=(-2, 2))

fig21.line(x=0, y=(-2, 2), line_color="lightslategray")  # y = can be a tuple or a list
fig21.line(x=(-2, 3), y=0, line_color="lightslategray")

fig21.line(x=(0, 2), y=1, line_color="lightslategray", line_dash="dotted")
fig21.line(x=2, y=(0, 1), line_color="lightslategray", line_dash="dotted")
fig21.circle(x=2, y=1, fill_color="blue", line_color=None, radius=0.1)
fig21.circle(x=0, y=0, fill_color="blue", line_color=None, radius=0.1)
fig21.line(x=(-1, 0), y=-1, line_color="lightslategray", line_dash="dotted")
fig21.line(x=-1, y=(-1, 0), line_color="lightslategray", line_dash="dotted")
fig21.circle(x=(-1, -1), y=-1, fill_color="blue", line_color=None, radius=0.1)

fig21.add_layout(Label(text="x = 2", x=0.8, y=1.1))
fig21.add_layout(Label(text="x = -1", x=-1.6, y=-0.5))
fig21.add_layout(Label(text="y = -1", x=-0.9, y=-1.2))

fig21.add_layout(Label(text="(2, 1)", x=2.01, y=1.01))
fig21.add_layout(Label(text="(0, 0)", x=-0.5, y=0.1))
fig21.add_layout(Label(text="(-1, -1)", x=-1.9, y=-1.1))

show(fig21)

In [11]:
fig22a = figure(plot_width=200, plot_height=200, title="a")
fig22a.line(x="Date", y="Mean.TemperatureF", source=ColumnDataSource(houston), 
               legend="city", line_color="blue", line_width=2)
fig22a.legend.visible = False

show(fig22a)

In [12]:
fig22b = figure(plot_width=400, plot_height=200, title="b")
fig22b.line(x="Date", y="Mean.TemperatureF", source=ColumnDataSource(houston), 
               legend="city", line_color="blue", line_width=2)
fig22b.legend.visible = False

show(fig22b)

In [13]:
fig22c = figure(plot_width=600, plot_height=200, title="c")
fig22c.line(x="Date", y="Mean.TemperatureF", source=ColumnDataSource(houston), 
               legend="city", line_color="blue", line_width=2)
fig22c.legend.visible = False

show(fig22c)

In [14]:
fig22 = gridplot([[fig22a, fig22b], [fig22c]], group="Figure 2.2")

In [15]:
show(column(Div(text="<h3>Figure 2.2</h3>Daily temperature normals for Houston"), fig22))

In [16]:
houston = pd.read_csv(path/"houston.csv")
houston = houston.loc[:, ["Date", "city", "Mean.TemperatureF"]]

In [17]:
houston = houston[houston.Date.str[:4] == "2015"]

In [18]:
cleveland = pd.read_csv(path/"cleveland.csv")
cleveland = cleveland.loc[:, ["Date", "city", "Mean.TemperatureF"]]

In [19]:
cleveland = cleveland[cleveland.Date.str[:4] == "2015"]

In [20]:
base = pd.merge(cleveland, houston, on="Date")

In [21]:
base.head()

Unnamed: 0,Date,city_x,Mean.TemperatureF_x,city_y,Mean.TemperatureF_y
0,2015-01-01,Cleveland (OH),26.0,Houston (TX),44.0
1,2015-01-02,Cleveland (OH),31.0,Houston (TX),51.0
2,2015-01-03,Cleveland (OH),40.0,Houston (TX),47.0
3,2015-01-04,Cleveland (OH),41.0,Houston (TX),46.0
4,2015-01-05,Cleveland (OH),17.0,Houston (TX),43.0


In [22]:
base.head().Date.str[-2:]

0    01
1    02
2    03
3    04
4    05
Name: Date, dtype: object

In [23]:
base = base[base.Date.str[-2:] == "01"]

In [24]:
base.loc[:, "Mean.TemperatureC_y"] = (base.loc[:, "Mean.TemperatureF_y"] - 32) * 5/9
base.loc[:, "Mean.TemperatureC_x"] = (base.loc[:, "Mean.TemperatureF_x"] - 32) * 5/9

In [25]:
fig23a = figure(plot_height=400, plot_width=400, title="a")

fig23a.line(y="Mean.TemperatureF_y", x="Mean.TemperatureF_x", source=ColumnDataSource(base))

fig23a.add_layout(LabelSet(source=ColumnDataSource(base[base.Date.isin(["2015-04-01","2015-07-01"])]),
                           text="Date", y="Mean.TemperatureF_y", x="Mean.TemperatureF_x", level="glyph",
                          x_offset=-5, y_offset=-5))
fig23a.xaxis.axis_label = "Temparature in Houston (F)"
fig23a.yaxis.axis_label = "Temparature in Cleveland (F)"

show(fig23a)

In [26]:
fig23b = figure(plot_height=400, plot_width=400, title="b")

fig23b.line(y="Mean.TemperatureC_y", x="Mean.TemperatureC_x", source=ColumnDataSource(base))

fig23b.add_layout(LabelSet(source=ColumnDataSource(base[base.Date.isin(["2015-04-01","2015-07-01"])]),
                           text="Date", y="Mean.TemperatureC_y", x="Mean.TemperatureC_x", level="glyph",
                          x_offset=-5, y_offset=-5))
fig23b.xaxis.axis_label = "Temparature in Houston (C)"
fig23b.yaxis.axis_label = "Temparature in Cleveland (C)"

show(fig23b)

In [27]:
tempGrid = Row(fig23a, fig23b)

In [28]:
show(tempGrid)

In [29]:
scale_data = pd.DataFrame(data={"data": [1, 3.16, 10, 31.6, 100]})

In [30]:
fig24a = figure(plot_height=100, plot_width=400, title="Original data, linear scale", x_range=Range1d(0, 101),
               y_range=Range1d(-0.1, 0.1))
fig24a.line(source=ColumnDataSource(scale_data), x="data", y=0)
fig24a.circle(source=ColumnDataSource(scale_data), x="data", y=0, radius=1)
fig24a.yaxis.visible = False
fig24a.xaxis.axis_label = "x"
fig24a.toolbar_location = None
show(fig24a)

In [31]:
fig24b = figure(plot_height=100, plot_width=400, title="log transformed data, linear scale",
               y_range=Range1d(-0.1, 0.1))
fig24b.line(source=ColumnDataSource(np.log10(scale_data)), x="data", y=0)
fig24b.circle(source=ColumnDataSource(np.log10(scale_data)), x="data", y=0)
fig24b.yaxis.visible = False
fig24b.xaxis.axis_label = "log10(x)"
fig24b.toolbar_location = None
show(fig24b)

In [32]:
fig24c = figure(plot_height=100, plot_width=400, title="Original data, log scale",
               y_range=Range1d(-0.1, 0.1), x_axis_type="log")
fig24c.line(source=ColumnDataSource(scale_data), x="data", y=0)
fig24c.circle(source=ColumnDataSource(scale_data), x="data", y=0)
fig24c.add_layout(LabelSet(source=ColumnDataSource(scale_data), x="data", y=0, text="data", 
                           y_offset=-20))
fig24c.axis.visible = False
fig24c.toolbar_location = None
fig24c.xaxis.axis_label = "x"
show(fig24c)

In [33]:
fig24d = figure(plot_height=100, plot_width=400, title="log scale with incorrect axis title",
               y_range=Range1d(-0.5, 0.1), x_axis_type="log")
fig24d.line(source=ColumnDataSource(scale_data), x="data", y=0)
fig24d.circle(source=ColumnDataSource(scale_data), x="data", y=0)
fig24d.add_layout(LabelSet(source=ColumnDataSource(scale_data), x="data", y=0, text="data", 
                           y_offset=-20))
fig24d.yaxis.visible = False
fig24d.toolbar_location = None
fig24d.xaxis.axis_label = "log10(x)"
show(fig24d)

In [34]:
fig24 = column(fig24a, fig24b, fig24c, fig24d)

In [35]:
show(column(Div(text="<h3>Figure 2.4</h3>Relationship between linear and log scale"), fig24))

data from http://worldpopulationreview.com/us-cities/

In [36]:
census = pd.read_csv(path/"population.csv")

In [40]:
census = census[census.State == "Texas"]

In [41]:
census.head()

Unnamed: 0,Rank,Name,State,2018 Population,2016 Population,2010 Census,Change,2018 Density
3,4,Houston,Texas,2340814,2303482,2099451,0.008038,1419.917805
6,7,San Antonio,Texas,1541456,1492510,1327407,0.016133,1291.083266
8,9,Dallas,Texas,1359133,1317929,1197816,0.015392,1543.608793
10,11,Austin,Texas,983366,947890,790390,0.018369,1183.5925
12,13,Fort Worth,Texas,893997,854113,741206,0.022815,1001.075032
