# 0. Load imports 

In [1]:
## imports
import pandas as pd
import numpy as np
import re

import matplotlib.pyplot as plt
import seaborn as sns

## print multiple things from same cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

## load data on 2020 crimes in DC
df = dc_crim_2020 = pd.read_csv("https://opendata.arcgis.com/datasets/f516e0dd7b614b088ad781b0c4002331_2.csv")

# 1. Questions: list comprehension

- What would this do?

In [2]:
### pool of courses
all_courses = ["QSS20", "QSS17", "GOV10", "GOV4", "CSC1"]

## toy example
[course for course in all_courses if "QSS" in course]

['QSS20', 'QSS17']

## 1.1 Application 1: filtering to a smaller list

When we might use: have a lot of columns in a dataframe; want to filter to a smaller set using some pattern

In [3]:
### pull out ones that contain GOV in the string
gov_c = [course for course in all_courses if "GOV" in course]
gov_c # result

['GOV10', 'GOV4']

In [4]:
### showing that the "course" is just a placeholder/
### arbitrary interator
gov_c_alt = [x for x in all_courses if "GOV" in x]

gov_c == gov_c_alt

True

## 1.2 Application two: keep all objects in the list but do some transformation

In [5]:
all_courses

## strip the numbers from the course names
courses_prefix = [x[:3] for x in all_courses]
courses_prefix # could then find unique elements


['QSS20', 'QSS17', 'GOV10', 'GOV4', 'CSC1']

['QSS', 'QSS', 'GOV', 'GOV', 'CSC']

In [6]:
# Join all together example
" #:(# ".join(courses_prefix)

'QSS #:(# QSS #:(# GOV #:(# GOV #:(# CSC'

#### Your turn: Using original list, add "dartmouth_" prefix to the course name

In [7]:
dart_course = ["dartmouth_" + course for course in all_courses]
dart_course

['dartmouth_QSS20',
 'dartmouth_QSS17',
 'dartmouth_GOV10',
 'dartmouth_GOV4',
 'dartmouth_CSC1']

## 1.3 Subsetting columns

Use list comprehension to filter to columns with id in the string. Then, create a new dataframe called df1 that contains only column heads with "id"

In [8]:
id_cols = [col for col in df.columns if "ID" in col]
id_cols

## Then, filter the data
df[id_cols]

['BID', 'OBJECTID', 'OCTO_RECORD_ID']

Unnamed: 0,BID,OBJECTID,OCTO_RECORD_ID
0,,802282718,
1,,802282726,
2,,802285348,
3,,802287553,
4,,802287554,
...,...,...,...
27926,,802842283,
27927,,802842304,
27928,GOLDEN TRIANGLE,802842326,
27929,,802842327,


## 1.4 Comprehension for numbers

Here we compare two ways of creating a list of even numbers.

In [9]:
num_list = np.arange(10000)
num_list

array([   0,    1,    2, ..., 9997, 9998, 9999], shape=(10000,))

In [10]:
%%time
even_nums = [i for i in num_list if (i % 2) == 0]

CPU times: user 925 μs, sys: 36 μs, total: 961 μs
Wall time: 984 μs


In [11]:
%%time
num_list[~(num_list % 2).astype(bool)]

CPU times: user 508 μs, sys: 120 μs, total: 628 μs
Wall time: 507 μs


array([   0,    2,    4, ..., 9994, 9996, 9998], shape=(5000,))

In [12]:
num_list[ ~(num_list % 2).astype(bool) ]

array([   0,    2,    4, ..., 9994, 9996, 9998], shape=(5000,))

#### Your turn: Extract all numbers in num_list that end in 7

In [14]:
inc_7 = [num for num in num_list if str(num)[-1] == "7"]
inc_7

[np.int64(7),
 np.int64(17),
 np.int64(27),
 np.int64(37),
 np.int64(47),
 np.int64(57),
 np.int64(67),
 np.int64(77),
 np.int64(87),
 np.int64(97),
 np.int64(107),
 np.int64(117),
 np.int64(127),
 np.int64(137),
 np.int64(147),
 np.int64(157),
 np.int64(167),
 np.int64(177),
 np.int64(187),
 np.int64(197),
 np.int64(207),
 np.int64(217),
 np.int64(227),
 np.int64(237),
 np.int64(247),
 np.int64(257),
 np.int64(267),
 np.int64(277),
 np.int64(287),
 np.int64(297),
 np.int64(307),
 np.int64(317),
 np.int64(327),
 np.int64(337),
 np.int64(347),
 np.int64(357),
 np.int64(367),
 np.int64(377),
 np.int64(387),
 np.int64(397),
 np.int64(407),
 np.int64(417),
 np.int64(427),
 np.int64(437),
 np.int64(447),
 np.int64(457),
 np.int64(467),
 np.int64(477),
 np.int64(487),
 np.int64(497),
 np.int64(507),
 np.int64(517),
 np.int64(527),
 np.int64(537),
 np.int64(547),
 np.int64(557),
 np.int64(567),
 np.int64(577),
 np.int64(587),
 np.int64(597),
 np.int64(607),
 np.int64(617),
 np.int64(627),
 np

#### Your turn: Divide each number  in num_list by 2

In [16]:
num_list_2 = [(x/2) for x in num_list]
num_list_2

[np.float64(0.0),
 np.float64(0.5),
 np.float64(1.0),
 np.float64(1.5),
 np.float64(2.0),
 np.float64(2.5),
 np.float64(3.0),
 np.float64(3.5),
 np.float64(4.0),
 np.float64(4.5),
 np.float64(5.0),
 np.float64(5.5),
 np.float64(6.0),
 np.float64(6.5),
 np.float64(7.0),
 np.float64(7.5),
 np.float64(8.0),
 np.float64(8.5),
 np.float64(9.0),
 np.float64(9.5),
 np.float64(10.0),
 np.float64(10.5),
 np.float64(11.0),
 np.float64(11.5),
 np.float64(12.0),
 np.float64(12.5),
 np.float64(13.0),
 np.float64(13.5),
 np.float64(14.0),
 np.float64(14.5),
 np.float64(15.0),
 np.float64(15.5),
 np.float64(16.0),
 np.float64(16.5),
 np.float64(17.0),
 np.float64(17.5),
 np.float64(18.0),
 np.float64(18.5),
 np.float64(19.0),
 np.float64(19.5),
 np.float64(20.0),
 np.float64(20.5),
 np.float64(21.0),
 np.float64(21.5),
 np.float64(22.0),
 np.float64(22.5),
 np.float64(23.0),
 np.float64(23.5),
 np.float64(24.0),
 np.float64(24.5),
 np.float64(25.0),
 np.float64(25.5),
 np.float64(26.0),
 np.float64(2