In [None]:
# 🐍 python!

print("hello world!")

In [None]:
myNumber = 5
print(myNumber + 10)
print(myNumber * 10)

In [None]:
# import libraries: matplotlib and pandas

from matplotlib import pyplot as plt
import pandas as pd

# setup for seeing graphs in notebook
%matplotlib inline
# more on what above line means:
# https://jakevdp.github.io/PythonDataScienceHandbook/04.00-introduction-to-matplotlib.html#Plotting-from-an-IPython-notebook

In [None]:
# start with a figure ...
# when we write plt.something(), the "." dot allows us to call any of the functions bundled up in plt
# full list here in documentation: https://matplotlib.org/stable/api/pyplot_summary.html

fig = plt.figure()
ax = plt.axes()
plt.show()

In [None]:
# warm-up
# make a most basic line plot

plt.plot([1, 2, 3, 4])
# square brackets [x, y, z] make arrays
# each element in array is separated by a "," comma

plt.ylabel('some numbers')
# giving my y-axis a label

plt.show()

In [None]:
# change the color, linestyle, other options
# by adding parameters in the parantheses
# plt.plot(data, options)

plt.plot([1, 2, 3, 4], color="purple", linestyle="dotted")
# all options available, documentation: https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.plot.html
# (look for section "other parameters" in docs)

plt.ylabel('some numbers')
# giving my y-axis a label
plt.show()

In [None]:
# warm-up
# what happens in this code?
x = range(0, 25)

import random
y = []
for i in range(0,25):
    y.append(random.randint(1,30))

print(y)
plt.plot(x, y, color="red")
plt.show()

In [None]:
# returning to john snow + cholera ...
# let's load data from a file

cholera_dates_df = pd.read_csv('https://raw.githubusercontent.com/mab253/dataviz_fall23/main/week1/snow-dates.csv')

# df here stands for "DataFrame" - basically a table organized with columns
# 🐼 remember pd = pandas, and "." dot lets us use function "read_csv"

# show the first 5 items from data
cholera_dates_df.head(5)

# show the first x items from data: df.head(x)

In [None]:
# show the last 2 items from data
cholera_dates_df.tail(2)

In [None]:
# make a line plot with "deaths" data
plt.plot(cholera_dates_df.date, cholera_dates_df.deaths, color="black", linestyle="-", label="Deaths")
# the first 2 values are the x and y axis data - look what matches the column names from the dataframe! (df.columnName)

# 😱 x-axis is a mess!!
# let's start to design the x-axis elements ...
# plt.tick_params(axis='x', labelrotation=60, labelsize=6)
# all the options for tick_params() function: https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.tick_params.html

# let's make the x-axis better ...
# by cleaning up the number of "ticks" shown

#from matplotlib import ticker
# pull more code from matplotlib, the "ticker" object

# maxTicks = 6

# instead of a hard-coded number, what about every-other row, every other date?
# maxTicks = len(cholera_dates_df) / 2
# len here returns the length of the DataFrame, how many rows we have

# xticks = ticker.MaxNLocator(maxTicks)

# Set the xaxis number of ticks
# gca() is a function: "get current axis"
# plt.gca().xaxis.set_major_locator(xticks)

# plt.legend()
# add a legend
# more documentation on legend: https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.legend.html

# plt.title('Cholera Deaths and Cases\nJohn Snow\'s London', pad=14, loc='center')
# add a title
# more documentation on titles: https://matplotlib.org/stable/gallery/text_labels_and_annotations/titles_demo.html#sphx-glr-gallery-text-labels-and-annotations-titles-demo-py

# let's add "cases" data! add another line to the plot
# plt.plot(cholera_dates_df.date, cholera_dates_df.attacks, color="red", linestyle="--", label="Cases")

# we should label our axes!
# plt.ylabel('# of deaths')
# plt.xlabel('date')


In [None]:
# let's work with scatter plot data, also from John Snow's Ghost Map

cholera_deaths_df = pd.read_csv('https://raw.githubusercontent.com/yy/dviz-course/master/data/deaths.csv')

# let's look @ the first 20 rows of this DataFrame

cholera_deaths_df.head(20)

In [None]:
# now we'll plot these points
# documentation on scatter function: https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.scatter.html

plt.scatter(cholera_deaths_df.X, cholera_deaths_df.Y, color="Black", s=2)
# the first value there is the x-axis data - dataframe.columnName
# the second value is the y-axis data - dataframe.columnName
# s there refers to size - 1 = 1/72 inches! (in documentation)

# what color should we choose? full list: https://matplotlib.org/stable/gallery/color/named_colors.html
# other ways to name colors, more detail: https://matplotlib.org/stable/tutorials/colors/colors.html
# RGB, HEX, other color systems you may know - these work!

In [None]:
# let's also load the location of the water PUMPS (optimized for this exercise)

cholera_pumps_df = pd.read_csv('https://raw.githubusercontent.com/yy/dviz-course/master/data/pumps.csv')

# double check what the pump data looks like

cholera_pumps_df.head()

In [None]:
# and now let's plot them both!

plt.scatter(cholera_deaths_df.X, cholera_deaths_df.Y, color="Black", s=2)
plt.scatter(cholera_pumps_df.X, cholera_pumps_df.Y, color="Black", s=2)

# what channels can we change to "pop-out" the information, the pump with the most deaths clustered around?

In [None]:
# we can also save the image

plt.scatter(cholera_deaths_df.X, cholera_deaths_df.Y, color="Black", s=2)
plt.scatter(cholera_pumps_df.X, cholera_pumps_df.Y, color="Black", s=2)
plt.savefig("snow.png")

# higher resolution? matplotlib thinks in "inches" ...
# fig = plt.gcf()
# gcf() = get current figure
# print(fig.get_size_inches())
# what size is the graph, in inches?
# now choose some parameters
# dpi = dots per inch, pixels
# plt.savefig("snow.png", dpi=400, format='png', bbox_inches='tight', pad_inches=0.1)

In [None]:
# what if we wanted to put a map of London behind this translated geospatial data?!

plt.scatter(cholera_deaths_df.X, cholera_deaths_df.Y, color="Black", s=2)
plt.scatter(cholera_pumps_df.X, cholera_pumps_df.Y, color="Black", s=2)

# use python image library
from PIL import Image

# load the image, from colab notebook filepath
image_path = "/content/Screenshot 2023-09-06 at 2.47.53 PM.png"
img = Image.open(image_path)

# place the image on the figure, based on min/max x and y points
# like stretching across a canvas
plt.imshow(img, extent=[3, 23, 2, 22], alpha=0.4)
# alpha = opacity value; < 1 is less than opaque
# so the data can show up "on top" (gestalt!)

# scatter plot for deaths and pumps, data
plt.scatter(cholera_deaths_df.X, cholera_deaths_df.Y, color="Black", s=2)
plt.scatter(cholera_pumps_df.X, cholera_pumps_df.Y, color="Red", s=20)

# let's hide the axes labels - they don't really represent the geospatial data here
plt.xticks([])  # hide x-axis tick labels
plt.yticks([])  # hide y-axis tick labels

# high-res download of the figure
plt.savefig("snow-map.png", dpi=300, format='png', bbox_inches='tight', pad_inches=0.1)

**🗺️📄 citations:**

- John Snow dates data from: https://github.com/shukkkur/John-Snows-Ghost-Map/tree/main/datasets ([Datacamp](https://www.datacamp.com/))
- John Snow deaths + pump data from: https://yyahn.com/dviz-course/m02-history/lab02/ (Yong-Yeol Ahn)
- original John Snow map image: _A map taken from a report by Dr. John Snow._ [Wellcome Collection](https://wellcomecollection.org/works/uxgfjt62/items).