### Writing to a file

In [1]:
grades = {'Jenna': 80, 'Dylan': 78, 'Anis': 65, 'Keisha': 82}
grades

{'Jenna': 80, 'Dylan': 78, 'Anis': 65, 'Keisha': 82}

In [3]:
scores_file = open("scores_file.txt", "w")
for key, value in grades.items():
    scores_file.write(key + ":" + str(value) + '\n')
scores_file.close() # IMPORTANT TO CLOSE!!

### Exercise

In [4]:
def getPopulations(file):
    pops = {}
    for line in open(file):
        country, pop = line.split('|')
        population = int(pop.replace(',',''))
        pops[country] = population
    return pops

In [6]:
populations = getPopulations('population.txt')

In [7]:
populations['China']

1347350000

In [10]:
# get countries with population over 1 million
large_pops = [c for c,p in populations.items() if p > 100000000]
print(large_pops)

['China', 'India', 'United States', 'Indonesia', 'Brazil', 'Pakistan', 'Nigeria', 'Bangladesh', 'Russia', 'Japan', 'Mexico']


### Exercise

In [12]:
from collections import Counter

word_counts = Counter()
for line in open('metamorphosis.txt'):
    tokens = line.split()
    for token in tokens:
        word_counts.update([token.lower().strip(',.')])
        
print(word_counts)

Counter({'he': 10, 'to': 6, 'the': 4, 'that': 4, 'was': 4, 'his': 4, 'a': 3, 'and': 3, 'look': 2, 'at': 2, 'dull': 2, 'feel': 2, 'right': 2, 'have': 2, 'gregor': 1, 'then': 1, 'turned': 1, 'out': 1, 'window': 1, 'weather': 1, 'drops': 1, 'of': 1, 'rain': 1, 'could': 1, 'be': 1, 'heard': 1, 'hitting': 1, 'pane': 1, 'which': 1, 'made': 1, 'him': 1, 'quite': 1, 'sad': 1, 'how': 1, 'about': 1, 'if': 1, 'i': 1, 'sleep': 1, 'little': 1, 'bit': 1, 'longer': 1, 'forget': 1, 'all': 1, 'this': 1, 'nonsense': 1, 'thought': 1, 'but': 1, 'something': 1, 'unable': 1, 'do': 1, 'because': 1, 'used': 1, 'sleeping': 1, 'on': 1, 'in': 1, 'present': 1, 'state': 1, "couldn't": 1, 'get': 1, 'into': 1, 'position': 1, 'however': 1, 'hard': 1, 'threw': 1, 'himself': 1, 'onto': 1, 'always': 1, 'rolled': 1, 'back': 1, 'where': 1, 'must': 1, 'tried': 1, 'it': 1, 'hundred': 1, 'times': 1, 'shut': 1, 'eyes': 1, 'so': 1, "wouldn't": 1, 'floundering': 1, 'legs': 1, 'only': 1, 'stopped': 1, 'when': 1, 'began': 1, 'mil

In [14]:
# find top 5 most common words
for word, count in word_counts.most_common(5):
    print(word,count)

he 10
to 6
the 4
that 4
was 4


In [16]:
# Find words of length 4 or more that occur at least twice
commons = [(w,c) for w,c in word_counts.most_common() if len(w) >= 4 and c >= 2]
commons

[('that', 4), ('look', 2), ('dull', 2), ('feel', 2), ('right', 2), ('have', 2)]

In [17]:
# Find up to 5 words of length 4 or more that occur at least twice
commons = [(w,c) for w,c in word_counts.most_common() if len(w) >= 4 and c >= 2][:5]
commons

[('that', 4), ('look', 2), ('dull', 2), ('feel', 2), ('right', 2)]

----

### Regular Expressions

In [18]:
import re

In [19]:
# Searching for a pattern in a string using re.search function
res = re.search('a','cat') # search for pattern 'a' in target 'cat'
res

<re.Match object; span=(1, 2), match='a'>

In [20]:
res = re.search('a','dog')
res

In [21]:
print(res)

None


In [22]:
print('matched') if re.search('a','dog') else print('not matched')

not matched


In [23]:
res = re.search('ar','barbaric')
res

<re.Match object; span=(1, 3), match='ar'>

### Writing regex patterns with metacharacters

In [25]:
# example 1 using * metacharacter
while True:
    astr = input("string? ('quit' to stop) ")
    if astr == 'quit':
        break
    res = re.search('ac*t', astr)
    print('match') if res else print('no match') 

string? ('quit' to stop)  at


match


string? ('quit' to stop)  cartesian


no match


string? ('quit' to stop)  factor


match


string? ('quit' to stop)  account


no match


string? ('quit' to stop)  quit


In [27]:
# example 2 using + metacharacter
while True:
    astr = input("string? ('quit' to stop) ")
    if astr == 'quit':
        break
    res = re.search('ac+t', astr)
    print('match') if res else print('no match') 

string? ('quit' to stop)  at


no match


string? ('quit' to stop)  act


match


string? ('quit' to stop)  factor


match


string? ('quit' to stop)  quit


In [28]:
# example 3 using [] and * metacharacters

In [29]:
while True:
    astr = input("string? ('quit' to stop) ")
    if astr == 'quit':
        break
    res = re.search('a[0-9]*t', astr)
    print('match') if res else print('no match') 

string? ('quit' to stop)  at


match


string? ('quit' to stop)  a2t


match


string? ('quit' to stop)  a123t


match


string? ('quit' to stop)  ffa1256ttqq


match


string? ('quit' to stop)  quit


In [30]:
# example 4 using metacharacters [] and *
while True:
    astr = input("string? ('quit' to stop) ")
    if astr == 'quit':
        break
    res = re.search('a[a-zA-Z0-9]*t', astr)
    print('match') if res else print('no match') 

string? ('quit' to stop)  at


match


string? ('quit' to stop)  aat


match


string? ('quit' to stop)  aA19at


match


string? ('quit' to stop)  quit


In [31]:
# example 5 using metacharacters [] and *
while True:
    astr = input("string? ('quit' to stop) ")
    if astr == 'quit':
        break
    res = re.search('a[a-zA-Z]+[0-9]+t', astr)
    print('match') if res else print('no match') 

string? ('quit' to stop)  at


no match


string? ('quit' to stop)  aAt


no match


string? ('quit' to stop)  aA1t


match


string? ('quit' to stop)  a1At


no match


string? ('quit' to stop)  quit


In [32]:
# Metacharacter '.', matches any character

In [33]:
while True:
    astr = input("string? ('quit' to stop) ")
    if astr == 'quit':
        break
    res = re.search('a.*t', astr)
    print('match') if res else print('no match') 

string? ('quit' to stop)  at


match


string? ('quit' to stop)  art


match


string? ('quit' to stop)  factoid


match


string? ('quit' to stop)  cpen15t


no match


string? ('quit' to stop)  cpppt


no match


string? ('quit' to stop)  apen15t


match


string? ('quit' to stop)  ac12##tor


match


string? ('quit' to stop)  quit


In [37]:
# Metacharacter '?', matches zero or One occurence of preceding character (there or not there)
res = re.search('ac?t','at')
print(res)
res = re.search('ac?t','act')
print(res)
res = re.search('ac?t','tractor')
print(res)
res = re.search('ac?t','art')
print(res)
res = re.search('ac?t','acct')
print(res)

<re.Match object; span=(0, 2), match='at'>
<re.Match object; span=(0, 3), match='act'>
<re.Match object; span=(2, 5), match='act'>
None
None


In [38]:
# metacharacter ^ matches start of target string when used outside []
# metacharacter $ matches end of string

In [39]:
# example 1
while True:
    astr = input("string? ('quit' to stop) ")
    if astr == 'quit':
        break
    res = re.search('^p', astr) # target string must start with p
    print('match') if res else print('no match') 

string? ('quit' to stop)  pat


match


string? ('quit' to stop)  part


match


string? ('quit' to stop)  apart


no match


string? ('quit' to stop)  p


match


string? ('quit' to stop)  quit


In [40]:
while True:
    astr = input("string? ('quit' to stop) ")
    if astr == 'quit':
        break
    res = re.search('p$', astr) # target string must END with p
    print('match') if res else print('no match') 

string? ('quit' to stop)  tarp


match


string? ('quit' to stop)  


no match


string? ('quit' to stop)  p


match


string? ('quit' to stop)  carp


match


string? ('quit' to stop)  cap


match


string? ('quit' to stop)  captor


no match


string? ('quit' to stop)  quit


In [None]:
# example 1
while True:
    astr = input("string? ('quit' to stop) ")
    if astr == 'quit':
        break
    res = re.search('ar[a-z]+t$', astr)
    print('match') if res else print('no match') 

string? ('quit' to stop)  art


no match


string? ('quit' to stop)  arrest


match


string? ('quit' to stop)  artistry


no match
