# namedtuple

In [2]:
from collections import defaultdict, namedtuple, Counter, deque
import csv
import random 
from urllib.request import urlretrieve

In [3]:
user = ('bob', 'coder')

In [4]:
f'{user[0]} is a {user[1]}'

'bob is a coder'

In [5]:
User  = namedtuple('User', 'name role')

In [6]:
user = User(name = 'bob', role = 'coder')

In [7]:
user.role

'coder'

In [9]:
user.name


'bob'

In [10]:
f'{user.name} is a {user.role}'


'bob is a coder'

# defaultdict

In [15]:
users = {'bob':'coder'}

In [16]:
users['bob']
users['julian']

KeyError: 'julian'

In [17]:
users.get('bob')

'coder'

In [18]:
users.get('julian') is None

True

In [19]:
challenges_done = [('mike', 10), ('julian', 7), ('bob', 5), ('mike', 11), ('julian', 8), ('bob', 6)]

In [20]:
challenges_done

[('mike', 10),
 ('julian', 7),
 ('bob', 5),
 ('mike', 11),
 ('julian', 8),
 ('bob', 6)]

In [24]:
challenges = {}
for name, challenge in challenges_done:
    challenges[name].append(challenge)

KeyError: 'mike'

In [25]:
challenges = defaultdict(list)
for name, challenge in challenges_done:
    challenges[name].append(challenge)
challenges

defaultdict(list, {'mike': [10, 11], 'julian': [7, 8], 'bob': [5, 6]})

# Counter

In [26]:
words = """Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been 
the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and 
scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into 
electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of
Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus
PageMaker including versions of Lorem Ipsum""".split()
words[:5]

['Lorem', 'Ipsum', 'is', 'simply', 'dummy']

In [28]:
common_words = {}

for word in words:
    if word not in common_words:
        common_words[word]= 0
    common_words[word] += 1

for k, v in sorted(common_words.items(), 
                    key = lambda x:x[1],
                    reverse = True)[:5]:
    print(k, v)

the 6
Lorem 4
Ipsum 4
of 4
and 3


In [29]:
Counter(words).most_common(5)

[('the', 6), ('Lorem', 4), ('Ipsum', 4), ('of', 4), ('and', 3)]

# deque

In [32]:
lst = list(range(10000000))
deq = deque(range(10000000))

In [33]:
def insert_and_delete(ds):
    for _ in range(10):
        index = random.choice(range(100))
        ds.remove(index)
        ds.insert(index, index)
%timeit insert_and_delete(lst)
%timeit insert_and_delete(deq)

162 ms ± 1.78 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
19.8 µs ± 178 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [38]:
movie_data = 'https://raw.githubusercontent.com/pybites/challenges/solutions/13/movie_metadata.csv'
movies_csv = 'movies.csv'
urlretrieve(movie_data, movies_csv)

('movies.csv', <http.client.HTTPMessage at 0x7fef7995eb50>)

In [39]:
Movie = namedtuple('Movie', 'title year score')

In [40]:
def get_movies_by_director(data = movies_csv):
    directors = defaultdict(list)

    with open(data, encoding = 'utf-8') as f:
        for line in csv.DictReader(f):
            try: 
                director = line['director_name']
                movie = line['movie_title'].replace('\xa0', '')
                year = int(line['title_year'])
                score = float(line['imdb_score'])
            except ValueError:
                continue
            m = Movie(title = movie, year = year, score = score)
            directors[director].append(m)

    return directors

In [41]:
directors = get_movies_by_director()

In [43]:
directors['Christopher Nolan']

[Movie(title='The Dark Knight Rises', year=2012, score=8.5),
 Movie(title='The Dark Knight', year=2008, score=9.0),
 Movie(title='Interstellar', year=2014, score=8.6),
 Movie(title='Inception', year=2010, score=8.8),
 Movie(title='Batman Begins', year=2005, score=8.3),
 Movie(title='Insomnia', year=2002, score=7.2),
 Movie(title='The Prestige', year=2006, score=8.5),
 Movie(title='Memento', year=2000, score=8.5)]

In [45]:
cnt = Counter()
for director, movies in directors.items():
    cnt[director] += len(movies)
cnt.most_common(5)


[('Steven Spielberg', 26),
 ('Woody Allen', 22),
 ('Martin Scorsese', 20),
 ('Clint Eastwood', 20),
 ('Ridley Scott', 17)]