Skip to content

Commit

Permalink
update class 20 materials
Browse files Browse the repository at this point in the history
  • Loading branch information
justmarkham committed Oct 23, 2015
1 parent a1623fb commit e68fa0e
Show file tree
Hide file tree
Showing 3 changed files with 933 additions and 72 deletions.
2 changes: 1 addition & 1 deletion README.md
Expand Up @@ -555,7 +555,7 @@ Tuesday | Thursday
* [Baltimore homicide data](data/homicides.txt)
* [Regular expressions 101](https://regex101.com/#python): real-time testing of regular expressions
* [Reference guide](code/20_regex_reference.py)
* Exercise
* [Exercise](code/20_regex_exercise.py)

**Homework:**
* Your final project is due next week!
Expand Down
61 changes: 61 additions & 0 deletions code/20_regex_exercise.py
@@ -0,0 +1,61 @@
'''
EXERCISE: Regular Expressions
'''

# open file and store each line as one list element
with open('homicides.txt', mode='rU') as f:
data = [row for row in f]


'''
Create a list of ages
'''

import re

ages = []
for row in data:
match = re.search(r'\d+ years? old', row)
if match:
ages.append(match.group())
else:
ages.append('0')

# split the string on spaces, only keep the first element, and convert to int
ages = [int(element.split()[0]) for element in ages]

# calculate average age
sum(ages) / float(len(ages))

# check that 'data' and 'ages' are the same length
assert(len(data)==len(ages))


'''
Create a list of ages (using match groups)
'''

ages = []
for row in data:
match = re.search(r'(\d+)( years? old)', row)
if match:
ages.append(int(match.group(1)))
else:
ages.append(0)


'''
Create a list of causes
'''

causes = []
for row in data:
match = re.search(r'Cause: (.+?)<', row)
if match:
causes.append(match.group(1).lower())
else:
causes.append('unknown')

# tally the causes
from collections import Counter
Counter(causes)

0 comments on commit e68fa0e

Please sign in to comment.