## Hands-on Beginning Python

### Reference

https://us.pycon.org/2020/schedule/presentation/53/
    
https://github.com/mattharrison/Tiny-Python-3.8-Notebook

### Data

https://github.com/COVID19Tracking/covid-tracking-data
    

#### Imports

https://www.python.org/dev/peps/pep-0008/#imports


In [None]:
# First come standard libraries, in alphabetical order
import csv
import sys
import urllib.request as req

# After a blank line, import third-party libraries
import matplotlib.pyplot as plt
import pandas as pd

# After another blank line, import local libraries

In [None]:
# python version
sys.version

In [None]:
! which python

In [None]:
pd.__version__

#### Running python

* REPL
* ipython
* bpython
<br>
* jupyter notebook

#### Learning should be
* D - Decide
* R - Relax
* M - Motivation
* O - Observe
* M - Mechanics

### Environments

* Refer to `python_envs.rst`
https://github.com/pyladies-houston/2020/tree/master/may
* virtual environments are not to be shared, so try to create on your own for your specific needs.
* You might want to stop here and to do so.
* Discussion of environemnts will limit to 15 min.

#### IDLE - Integrated Development and Learning Environment

https://docs.python.org/3/library/idle.html

* Fetch URL

In [None]:
url = 'https://raw.githubusercontent.com/COVID19Tracking/covid-tracking-data/master/data/states_daily_4pm_et.csv'

In [None]:
req

In [None]:
fn = req.urlopen(url)

In [None]:
data = fn.read()

In [None]:
len(data)

In [None]:
# byte string
data[:100]  # slice

* Write

In [None]:
# important! 
# output to csv_file, input for read_csv function later

fname = '../data/covid.csv'

In [None]:
# 1

fo = open(fname, mode='wb')

In [None]:
fo.write(data)

In [None]:
fo.close()

* Context manager

In [None]:
# 2

with open(fname, mode='wb') as fo:
    fo.write(data)
# upon unindent file is closed

In [None]:
def fetch_url(url, fname):
    """
    Save a url to a local file.
    """
    fn = req.urlopen(url)
    data = fn.read()
    with open(fname, mode='wb') as fo:
        fo.write(data)

In [None]:
dir()

In [None]:
fetch_url(url, 'out/test.csv')

* Playing with data

In [None]:
fn2 = open(fname, encoding='utf8')

In [None]:
lines = []  # empty list literal (special syntax)
for line in fn2:
    lines.append(line)

In [None]:
len(lines)

In [None]:
lines[0]

In [None]:
lines[-1]

In [None]:
lines[:3]

In [None]:
type(lines)

In [None]:
type(lines[0])

In [None]:
dir(lines)

In [None]:
'20200505,AL,8285,98481,,,1107,,428,,255,,B,5/5/2020 00:00,cdeeecd2210217b93fc3b08765445de51e2cebcc,2020-05-05T20:00:00Z,313,1107,106766,106766,106766,01,17,43,3389,260,3649\n' in lines

* Help

In [None]:
help(lines.append)

* python buitin functions

https://docs.python.org/3/library/functions.html

* Continue playing with data

In [None]:
lines[1]

### Problem 1

In [None]:
dir(lines[1])

In [None]:
lines[1].split(',')

In [None]:
# remove new line
lines[1].strip().split(',')

* Dictionary

https://docs.python.org/3/library/stdtypes.html#dict

In [None]:
d = {}
d['cat'] = "furry feline"
d['dog'] = "cozy canine"

In [None]:
d

In [None]:
dir(d)

In [None]:
d.keys()

In [None]:
d.values()

In [None]:
d.items()

* Store data into a dictioary

In [None]:
header = lines[0].strip().split(',')
line1 = lines[1].strip().split(',')

In [None]:
header

In [None]:
line1

* zip

https://docs.python.org/3/library/functions.html#zip

In [None]:
zip(header, line1)

In [None]:
list(zip(header, line1))

In [None]:
dict(zip(header, line1))

* enumerate

https://docs.python.org/3/library/functions.html#enumerate

In [None]:
lst = list(d)
lst

In [None]:
for i, item in enumerate(lst, 0):
    print(i, item)

* Read CSV

https://docs.python.org/3/library/csv.html?highlight=csv

In [None]:
# 1

def read_csv(fname):
    with open(fname, encoding='utf8') as csvfile:
        rows = []
        for line in csvfile:
            values = line.strip().split(',')
            if len(rows) == 0:  # if not rows
                headers = values
            else:
                rows.append(dict(zip(headers, values)))  # buggy
        return rows

In [None]:
read_csv(fname)[:2]

### Problem 2

* rubber duck debugging...

In [None]:
# 2

def read_csv(fname):
    # import pdb; pdb.set_trace()
    with open(fname, encoding='utf8') as csvfile:
        rows = []
        for i, line in enumerate(csvfile):
            values = line.strip().split(',')
            if i == 0:
                headers = values
            else:
                rows.append(dict(zip(headers, values)))
        return rows

In [None]:
read_csv(fname)[:2]

* pdb/ breakpoint

https://docs.python.org/3/library/pdb.html?highlight=pdb#module-pdb

* Filter

In [None]:
res = read_csv(fname)
len(res)

In [None]:
tx_res = []
for row in res:
    if row['state'] == 'TX':
        tx_res.append(row)

In [None]:
len(tx_res)

In [None]:
tx_res[0]

In [None]:
tx_res[0]['positive']

In [None]:
type(tx_res[0]['positive'])

In [None]:
# 3

def read_csv(fname):
    #import pdb; pdb.set_trace()
    with open(fname, encoding='utf8') as csvfile:
        rows = []
        for i, line in enumerate(csvfile):
            values = line.strip().split(',')
            if i == 0:
                headers = values
            else:
                for j, val in enumerate(values):
                    val = int(val)
                    values[j] = val
                rows.append(dict(zip(headers, values)))
    return rows

### Problem 3

In [None]:
read_csv(fname)[:2]

In [None]:
# 4 --> working!

def read_csv(fname):
    #import pdb; pdb.set_trace()
    with open(fname, encoding='utf8') as csvfile:
        rows = []
        for i, line in enumerate(csvfile):
            values = line.strip().split(',')
            if i == 0:
                headers = values
            else:
                for j, val in enumerate(values):
                    try:
                        val = int(val)
                    except ValueError:
                        pass
                    else:
                        values[j] = val
                rows.append(dict(zip(headers, values)))
    return rows

In [None]:
read_csv(fname)[:2]

In [None]:
# Pause and make sure this function is working...

In [None]:
res = read_csv(fname)
len(res)

* Filter

In [None]:
# 1

def filter(rows, state):
    res = []
    for row in res:
        if row['state'] == state:
            res.append(row)
    return res

In [None]:
res_tx = filter(res, 'TX')
len(res_tx)

### Problem 4

More problems... get used to Errors...

In [None]:
# 2

def filter(rows, state):
    res = []
    for row in rows:
        if row['state'] == state:
            res.append(row)
    return res

In [None]:
res_tx = filter(res, 'TX')
len(res_tx)

In [None]:
res_tx[0]

In [None]:
# 3

def filter(rows, state):
    res = [row for row in rows if row['state'] == state]
    return res

In [None]:
res_tx = filter(res, 'TX')
len(res_tx)

In [None]:
pos = [row['positive'] for row in res_tx]
len(pos)

In [None]:
dir(pos)

In [None]:
pos.reverse()
pos

In [None]:
def get_date(row):
    return row['date']

* Sort

In [None]:
tx_sorted = sorted(res_tx, key=get_date)

In [None]:
len(tx_sorted)

In [None]:
tx_sorted[0]

In [None]:
tx_sorted[-1]

In [None]:
# 1

def sortby(rows, col_name):
    def get_col_name(row):
        return row[col_name]
    return sorted(rows, key=col_name)

In [None]:
res = read_csv(fname)
tx_res = filter(res, 'TX')
tx_res = sortby(tx_res, 'date')

### Problem 5

In [None]:
# 2

def sortby(rows, col_name):
    def get_col_name(row):
        return row[col_name]
    return sorted(rows, key=get_col_name)

In [None]:
res = read_csv(fname)
tx_res = filter(res, 'TX')
tx_res = sortby(tx_res, 'date')

In [None]:
len(tx_res)

In [None]:
tx_res[0]

In [None]:
# Make sure all are working...

### Exercise: 

Use Python csv module

https://docs.python.org/3/library/csv.html?highlight=csv

In [None]:
# csv.reader

def csv_reader(fname):
    pass

In [None]:
# csv_reader(fname)[:2]

* Pandas

https://pandas.pydata.org/

In [None]:
df = pd.read_csv(fname, sep=',')

In [None]:
type(df)

In [None]:
len(df)

In [None]:
df.head(3)

In [None]:
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_dict.html

df.to_dict('records')[:2]

In [None]:
# Make sure save the REPL...

* Plotting

https://matplotlib.org/index.html

https://matplotlib.org/gallery/lines_bars_and_markers/simple_plot.html#sphx-glr-gallery-lines-bars-and-markers-simple-plot-py

* Install matplotlib

https://pypi.org/project/matplotlib/

In [None]:
dir()

In [None]:
# dir(plt)

In [None]:
# 1

def get_value(rows, col_name):
    res = []
    for row in rows:
        res.append(row[col_name])
    return res

In [None]:
pos = get_value(res, 'positive')
len(pos)

In [None]:
# 2

def get_value(rows, col_name):
    res = [row[col_name] for row in rows]
    return res

In [None]:
tx_res[0].keys()

In [None]:
pos = get_value(res, 'positive')
len(pos)

In [None]:
# data for plotting
# x_axis = tx_res
# y_axix = col 'pisitive'

fig, ax = plt.subplots()
ax.plot(get_value(tx_res, 'positive'))
# plt.show()

In [None]:
fig, ax = plt.subplots()
ax.plot(get_value(tx_res, 'death'))
# plt.show()

In [None]:
fig, ax = plt.subplots()
ax.plot(get_value(tx_res, 'hospitalized'))
# plt.show()

In [None]:
# all three plots
fig, ax = plt.subplots()
ax.plot(get_value(tx_res, 'positive'))
ax.plot(get_value(tx_res, 'death'))
ax.plot(get_value(tx_res, 'hospitalized'))
# plt.show()

In [None]:
fig.savefig('out/texas.png')

In [None]:
ls out/

### Solutions

* Exercise: use Python csv module

In [None]:
def csv_reader(fname):
    with open(fname, 'r') as csvfile:
        csv_reader = csv.reader(csvfile)
        # print(csv_reader)
        rows = []
        for i, line in enumerate(csv_reader):
            values = line  # csv.reader takes care of new line
            if i == 0:
                headers = line
            else:
                for j, val in enumerate(values):
                    try:
                        val = int(val)
                    except ValueError:
                        pass
                    else:
                        values[j] = val
                rows.append(dict(zip(headers, values)))
        return rows

In [None]:
csv_reader(fname)[:2]