In [1]:
# Imports should first be from standard library
# Then one blank line
# Then third party imports
# Then one blank line
# Then local imports

from collections import Counter
import calendar
import itertools
import random
import re
import string

import requests

In [2]:
names = 'pybites mike bob julian tim sara guido'.split()
names

['pybites', 'mike', 'bob', 'julian', 'tim', 'sara', 'guido']

In [3]:
for name in names:
    print(name.title())

Pybites
Mike
Bob
Julian
Tim
Sara
Guido


In [4]:
first_half_of_alphabet = list(string.ascii_lowercase)[:13]
first_half_of_alphabet

['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm']

In [5]:
# without list comprehension
new_names = []
for name in names:
    if name[0] in first_half_of_alphabet:
        new_names.append(name.title())
new_names

['Mike', 'Bob', 'Julian', 'Guido']

In [6]:
# with a list comprehension
new_names2 = [name.title() for name in names if name[0] in first_half_of_alphabet]

In [7]:
new_names == new_names2

True

In [8]:
# interesting thing I learned on Talk Python re assert
# don't use this in production code especially to validate for security purposes because 
# assert statements can be skipped when python is run with optimizations

In [9]:
resp = requests.get('http://projects.bobbelderbos.com/pcc/harry.txt')

In [10]:
len(resp.text)

26022

In [11]:
words = resp.text.lower().split()
len(words)

4656

In [12]:
words[:10]

['the',
 'boy',
 'who',
 'lived',
 'mr.',
 'and',
 'mrs.',
 'dursley,',
 'of',
 'number']

In [13]:
cnt = Counter(words)
cnt.most_common(10)

[('the', 202),
 ('he', 136),
 ('a', 108),
 ('and', 100),
 ('to', 93),
 ('was', 86),
 ('of', 72),
 ('his', 69),
 ('in', 57),
 ('it', 55)]

In [14]:
'-' in words

True

In [15]:
# https://regexone.com
# re.sub(pattern, repl, string, count=0, flags=0)
words = [re.sub(r'\W+', r'', word) for word in words]

In [16]:
'-' in words

False

In [17]:
'the' in words

True

In [18]:
resp2 = requests.get('http://projects.bobbelderbos.com/pcc/stopwords.txt')
stop_words = resp2.text.lower().split()

In [19]:
len(stop_words)

319

In [20]:
words = [word for word in words if word.strip() and word not in stop_words]

In [21]:
'the' in words

False

In [22]:
len(words)

2147

In [23]:
cnt2 = Counter(words)
cnt2.most_common(20)

[('dursley', 45),
 ('dumbledore', 35),
 ('said', 32),
 ('mr', 30),
 ('professor', 30),
 ('mcgonagall', 25),
 ('mrs', 21),
 ('people', 21),
 ('didnt', 20),
 ('cat', 20),
 ('harry', 19),
 ('street', 14),
 ('know', 12),
 ('potters', 11),
 ('owls', 11),
 ('hagrid', 11),
 ('potter', 10),
 ('eyes', 10),
 ('yes', 10),
 ('saying', 10)]

In [24]:
'said' in stop_words

False

### Generators

In [25]:
def num_gen():
    for i in range(5):
        yield i
        
a_gen = num_gen()

In [26]:
next(a_gen)

0

In [27]:
for i in a_gen:
    print(i)

1
2
3
4


In [28]:
next(a_gen)

StopIteration: 

In [29]:
options = 'red yellow blue white black green purple'.split()
options

['red', 'yellow', 'blue', 'white', 'black', 'green', 'purple']

In [33]:
def create_select_options():
    '''list of options in form
    <option value=red>Red</option>'''
    select_list = []
    
    for option in options:
        select_list.append(f'option value={option}>{option.title()}</option>')
        
    return select_list
        

In [34]:
from pprint import pprint as pp
pp(create_select_options())

['option value=red>Red</option>',
 'option value=yellow>Yellow</option>',
 'option value=blue>Blue</option>',
 'option value=white>White</option>',
 'option value=black>Black</option>',
 'option value=green>Green</option>',
 'option value=purple>Purple</option>']


In [35]:
def create_select_options_gen(options=options):    
    for option in options:
        yield f'<option value={option}>{option.title()}</option>'

In [36]:
list(create_select_options_gen())

['<option value=red>Red</option>',
 '<option value=yellow>Yellow</option>',
 '<option value=blue>Blue</option>',
 '<option value=white>White</option>',
 '<option value=black>Black</option>',
 '<option value=green>Green</option>',
 '<option value=purple>Purple</option>']

In [37]:
# list
def leap_years_lst(n=1000000):
    leap_years = []
    for year in range(1, n+1):
        if calendar.isleap(year):
            leap_years.append(year)
    return leap_years

# generator
def leap_years_gen(n=1000000):
    for year in range(1, n+1):
        if calendar.isleap(year):
            yield year

In [38]:
# this had me waiting for a few seconds
%timeit -n1 leap_years_lst()

241 ms ± 7.14 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [39]:
# this was instant
%timeit -n1 leap_years_gen()

The slowest run took 4.81 times longer than the fastest. This could mean that an intermediate result is being cached.
616 ns ± 470 ns per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [40]:
NAMES = ['arnold schwarzenegger', 'alec baldwin', 'bob belderbos',
         'julian sequeira', 'sandra bullock', 'keanu reeves',
         'julbob pybites', 'bob belderbos', 'julian sequeira',
         'al pacino', 'brad pitt', 'matt damon', 'brad pitt']

In [42]:
# title case
title_case = [name.title() for name in NAMES]
title_case

['Arnold Schwarzenegger',
 'Alec Baldwin',
 'Bob Belderbos',
 'Julian Sequeira',
 'Sandra Bullock',
 'Keanu Reeves',
 'Julbob Pybites',
 'Bob Belderbos',
 'Julian Sequeira',
 'Al Pacino',
 'Brad Pitt',
 'Matt Damon',
 'Brad Pitt']

In [45]:
last_first = [f'{name.split()[1].title()} {name.split()[0].title()}' for name in NAMES]
last_first

['Schwarzenegger Arnold',
 'Baldwin Alec',
 'Belderbos Bob',
 'Sequeira Julian',
 'Bullock Sandra',
 'Reeves Keanu',
 'Pybites Julbob',
 'Belderbos Bob',
 'Sequeira Julian',
 'Pacino Al',
 'Pitt Brad',
 'Damon Matt',
 'Pitt Brad']

In [70]:
def gen_pairs():
    while True: # this makes it an infinite generator
        name1, name2 = random.sample([name.split()[0].title() for name in NAMES], 2)
        yield f'{name1} teams up with {name2}'

In [71]:
pairs = gen_pairs()

In [72]:
next(pairs)

'Al teams up with Julian'

In [75]:
for _ in range(10):
    print(next(pairs))

Alec teams up with Julian
Julian teams up with Julian
Brad teams up with Arnold
Julian teams up with Arnold
Arnold teams up with Brad
Keanu teams up with Sandra
Arnold teams up with Julian
Matt teams up with Bob
Julian teams up with Bob
Brad teams up with Bob
