# Chapter-2: A crash course in Python

## Dictionaries

In [None]:
empty_dict = {}                         # Pythonic
empty_dict2 = dict()                    # less Pythonic
grades = {"Joel": 80, "Tim": 95}        # dictionary literal

In [None]:
joels_grade = grades["Joel"]
joels_grade

80

In [None]:
# getting a key error
try:
  kates_grade = grades["Kate"]
except KeyError:
  print("no grades for Kate!")

no grades for Kate!


In [None]:
# It is fast for large dictionaries also
joel_has_grade = "Joel" in grades
kate_has_grade = "Kate" in grades

joel_has_grade, kate_has_grade

(True, False)

In [None]:
joels_grade = grades.get("Joel", 0)
kates_grade = grades.get("Kate", 0)
no_ones_grade = grades.get("No One")      # default is None

joels_grade, kates_grade, no_ones_grade

(80, 0, None)

In [None]:
grades["Tim"] = 99
grades["Kate"] = 100

num_students = len(grades)
num_students

3

In [None]:
# example of structured data
## not a great approach though
tweet = {
    "user": "nikhilsingh",
    "text": "GoogleColab is awesome",
    "retweet_count": 100,
    "hashtags": ["#data", "#science", "#datascience", "#awesome", "#yolo"]
}

tweet_keys = tweet.keys()                  # iterable for the keys
tweet_values = tweet.values()              # iterable for the values
tweet_items = tweet.items()                # iterable for the items

tweet_keys, tweet_values, tweet_items

(dict_keys(['user', 'text', 'retweet_count', 'hashtags']),
 dict_values(['nikhilsingh', 'GoogleColab is awesome', 100, ['#data', '#science', '#datascience', '#awesome', '#yolo']]),
 dict_items([('user', 'nikhilsingh'), ('text', 'GoogleColab is awesome'), ('retweet_count', 100), ('hashtags', ['#data', '#science', '#datascience', '#awesome', '#yolo'])]))

In [None]:
"user" in tweet_keys                          # not pythonic

True

In [None]:
"user" in tweet                               # pythonic

True

In [None]:
"nikhilsingh" in tweet_values                 # slow but only way to check

True

> Dictionary keys are hashable. List cannot be used as keys.    
> For multipart key, use tuple or convert key to a string.

## defaultdict

In [None]:
document = ['Google', 'colab', 'is', 'good', 'for', 'creating', 'models', 'and', 'coding', '.']

In [None]:
word_counts = {}
for word in document:
  if word in word_counts:
    word_counts[word] += 1
  else:
    word_counts[word] = 1

In [None]:
word_counts = {}
for word in document:
  try:
    word_counts[word] += 1
  except KeyError:
    word_counts[word] = 1

In [None]:
word_counts={}
for word in document:
  previous_count = word_counts.get(word, 0)
  word_counts[word] = previous_count + 1

A `defaultdict` is like a regular dictionary, if any key is not there, it will use a `zero-argument-function` to add `0` value.

In [None]:
from collections import defaultdict

word_counts = defaultdict(int)
for word in word_counts:
  word_counts[word] +=1

In [None]:
dd_list = defaultdict(list)
dd_list[2].append(1)
dd_list

defaultdict(list, {2: [1]})

In [None]:
dd_dict = defaultdict(dict)
dd_dict["Joel"]["City"] = "Seattle"
dd_dict

defaultdict(dict, {'Joel': {'City': 'Seattle'}})

In [None]:
dd_pair = defaultdict(lambda: [0, 0])
dd_pair[2][1] = 1
dd_pair

defaultdict(<function __main__.<lambda>>, {2: [0, 1]})

## Counter

> Count hashable objects

In [None]:
from collections import Counter

c = Counter([0,1,2,0])
c

Counter({0: 2, 1: 1, 2: 1})

In [None]:
c = Counter()                           # a new, empty counter
print(c)
c = Counter('gallahad')                 # a new counter from an iterable
print(c)
c = Counter({'red': 4, 'blue': 2})      # a new counter from a mapping
print(c)
c = Counter(cats=4, dogs=8)             # a new counter from keyword args
c

Counter()
Counter({'a': 3, 'l': 2, 'g': 1, 'h': 1, 'd': 1})
Counter({'red': 4, 'blue': 2})


Counter({'cats': 4, 'dogs': 8})

In [None]:
word_counts = Counter(document)
word_counts

Counter({'.': 1,
         'Google': 1,
         'and': 1,
         'coding': 1,
         'colab': 1,
         'creating': 1,
         'for': 1,
         'good': 1,
         'is': 1,
         'models': 1})

In [None]:
document = "My name is Nikhil and I am learning Python data structures."
word_counts = Counter(document)
word_counts

Counter({' ': 10,
         '.': 1,
         'I': 1,
         'M': 1,
         'N': 1,
         'P': 1,
         'a': 6,
         'c': 1,
         'd': 2,
         'e': 3,
         'g': 1,
         'h': 2,
         'i': 4,
         'k': 1,
         'l': 2,
         'm': 2,
         'n': 5,
         'o': 1,
         'r': 3,
         's': 3,
         't': 4,
         'u': 2,
         'y': 2})

so here, I am trying to split words in a sentence and trying to get only those words with count more than one.

In [None]:
gen_str = "This this is is data data science science; learn using using Python."
myDict = Counter(gen_str.split())
print(myDict)
myList = [k for k, v in myDict.items() if v > 1]
myList

Counter({'is': 2, 'data': 2, 'using': 2, 'This': 1, 'this': 1, 'science': 1, 'science;': 1, 'learn': 1, 'Python.': 1})


['is', 'data', 'using']

get the elements from the counter directly using `elements`

In [None]:
c = Counter(a=4, b=2, c=0, d=-2)
print(f"these are sorted: {sorted(c.elements())}")

print(f"you'll get the object: {c.elements()}")

# to print
[i for i in c.elements()]

these are sorted: ['a', 'a', 'a', 'a', 'b', 'b']
you'll get the object: <itertools.chain object at 0x7f4e07da0990>


['a', 'a', 'a', 'a', 'b', 'b']

another great method in `Counter` instance which is worth knowing is `most_common`.

In [None]:
word_counts.most_common(10) # it will print the 10 most common words and their counts

[(' ', 10),
 ('a', 6),
 ('n', 5),
 ('i', 4),
 ('t', 4),
 ('e', 3),
 ('s', 3),
 ('r', 3),
 ('y', 2),
 ('m', 2)]

In [None]:
# to print in a proper order
for word, count in word_counts.most_common(10):
  print(word, count)

  10
a 6
n 5
i 4
t 4
e 3
s 3
r 3
y 2
m 2


## Sets
> collection of _distinct_ elements
-----

In [None]:
primes_below_10 = {2, 3, 5, 7} # not a dict
print(type(primes_below_10))

empty_dict = {}
type(empty_dict) # not a set

<class 'set'>


dict

In [None]:
s = set()
s.add(1)
s.add(2)
s.add(2)

print(s)

x = len(s)
y = 2 in s
z = 3 in s

x, y, z

{1, 2}


(2, True, False)

In [None]:
list_of_words = "a heartening tale of small victories and endu no sophomore \
slump for director sam mendes w if you are an actor who can \
relate to the sea it's this memory-as-identity obviation \
that g boyd's screenplay ( co-written with guardian"

stopwords_list = ["a", "an", "at"] + list_of_words.split() + ["yet", "you"]
stopwords_set = set(stopwords_list)

In [None]:
%time
"zip" in stopwords_list

CPU times: user 4 µs, sys: 0 ns, total: 4 µs
Wall time: 8.82 µs


False

In [None]:
%time
"zip" in stopwords_set # this is very fast

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 7.15 µs


False

In [None]:
item_list = [1,2,3,1,2,3]
num_items = len(item_list); print(num_items)
item_set = set(item_list); print(item_set)
num_distinct_items = len(item_set); print(num_distinct_items)
distinct_item_list = list(item_set); print(distinct_item_list)

6
{1, 2, 3}
3
[1, 2, 3]


## Control flow

> _ternary_ if-then-else

In [None]:
parity = "even" if x%2 == 0 else "odd"
parity

'even'

> `while` loop

In [None]:
x = 0
while x < 10:
  print(f"{x} is less than 10")
  x += 1

0 is less than 10
1 is less than 10
2 is less than 10
3 is less than 10
4 is less than 10
5 is less than 10
6 is less than 10
7 is less than 10
8 is less than 10
9 is less than 10


In [None]:
for x in range(10):
  print(f"{x} is less than 10")

0 is less than 10
1 is less than 10
2 is less than 10
3 is less than 10
4 is less than 10
5 is less than 10
6 is less than 10
7 is less than 10
8 is less than 10
9 is less than 10


with `continue` and `break`

In [None]:
for x in range(10):
  if x==3:
    continue # immediately to the next iteration
  if x==5:
    break # quite loop completely
  print(x)

0
1
2
4


## Truthiness

In [None]:
print(1<2); True == False

True


False

> _booleans are capitalized in Python_

In [None]:
x = None
assert x == None # non pythonic
assert x is None # pythonic

> `all` and `any`

In [None]:
print(all([True,1,{3}]))
print(all([True,1,{}]))
print(any([True,1,{3}]))
print(all([]))
print(any([]))

True
False
True
True
False


## Sorting

In [None]:
x = sorted([-10,1,2,3,-8,-9,-4,-5,6,7,], reverse=True, key=abs)

x

[-10, -9, -8, 7, 6, -5, -4, 3, 2, 1]

In [None]:
document = "This principle creates a powerful inductive bias that we leverage with neural \
architectures that are defined recursively and dynamically, by learning two scaleinvariant \
atomic operations: how to split a given input into smaller sets, and how \
to merge two partially solved tasks into a larger partial solution. Our model can be \
trained in weakly supervised environments, namely by just observing input-output \
pairs, and in even weaker environments, using a non-differentiable reward signal. \
Moreover, thanks to the dynamic aspect of our architecture, we can incorporate \
the computational complexity as a regularization term that can be optimized by \
backpropagation."

In [None]:
from collections import Counter

word_counts = Counter(document.split())

In [None]:
wc = sorted(word_counts.items(), key=lambda word_and_count: word_and_count[1], reverse=True)

In [None]:
# wc

In [None]:
doc = "here is an an extra example example document extra"

word_counts = Counter(doc.split())
wc = sorted(word_counts.items(), key=lambda word_and_count: word_and_count[1])#, reverse=True)

In [None]:
wc

[('here', 1),
 ('is', 1),
 ('document', 1),
 ('an', 2),
 ('extra', 2),
 ('example', 2)]

## List comprehensions

In [None]:
even_numbers = [x for x in range(5) if x%2==0]
even_numbers

[0, 2, 4]

In [None]:
squares = [x*x for x in range(5)]
squares

[0, 1, 4, 9, 16]

In [None]:
square_dict = {x:x * x for x in range(5)}
square_dict

{0: 0, 1: 1, 2: 4, 3: 9, 4: 16}

In [None]:
square_set = {x*x for x in [1, -1]}
square_set

{1}

In [None]:
zeros = [0 for _ in range(5)]
zeros

[0, 0, 0, 0, 0]

In [None]:
pairs = [(x,y) 
          for x in range(3) 
          for y in range(3)]
pairs

[(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2), (2, 0), (2, 1), (2, 2)]

In [None]:
increasing_pairs = [(x,y) for x in range(3) for y in range(x+1, 3)]
increasing_pairs

[(0, 1), (0, 2), (1, 2)]

## Automated testing and `assert`

In [None]:
def smallest_item(xs):
   return min(xs)

assert smallest_item([10, 20, 5, 40]) == 5 # no error

In [None]:
assert smallest_item([1, 0, -2, 2]) == -1, "Smallest number is not equal to -1" # will generate an error with the message

AssertionError: ignored

In [None]:
def smallest_item_in_assert(xs):
  assert xs, "empty list has no smallest item"
  return min(xs)

In [None]:
smallest_item_in_assert([]) # will throw an insertion error

AssertionError: ignored

## Object oriented programming

In [None]:
class CountingClicker:
  """maintains count, increment count, read count, etc."""

  def __init__(self, count=0):
    self.count=count

  def __repr__(self):
    return f"CountingClicker(count={self.count})"

  def click(self, num_times=1):
    """Click the clicker some number of times."""
    self.count += num_times
  
  def read(self):
    return self.count

  def reset(self):
    self.count = 0

In [None]:
clicker1 = CountingClicker()
clicker2 = CountingClicker(100)
clicker3 = CountingClicker(count=100)

In [None]:
clicker = CountingClicker()
assert clicker.read()==0, "clicker should start with count 0"
clicker.click()
clicker.click()
assert clicker.read()==2, "after two clicks, clicker should have count 2"
clicker.reset()
assert clicker.read()==0, "after reset, clicker should be back to 0"

In [None]:
# a subclass inherits all the behaviour of its parent class.
class NoResetClicker(CountingClicker):
  # This class has all the same methods as CountingClicker
  # Except that it has a reset method that does nothing.

  def reset(self):
    pass

In [None]:
clicker2 = NoResetClicker()
clicker2

CountingClicker(count=0)

In [None]:
assert clicker2.read()==0 # no error



In [None]:
clicker2.click()
clicker2.read()

1

In [None]:
clicker2.reset()
assert clicker2.read()==1, "reset shouldn't do anything"

## Iterables and generators

In [None]:
def generate_range(n):
  i=0
  while i<n:
    yield i # every call to yield produces a value of the generator
    i+=1

In [None]:
for i in generate_range(10):
  print(f"i: {i}")

i: 0
i: 1
i: 2
i: 3
i: 4
i: 5
i: 6
i: 7
i: 8
i: 9


In [None]:
def natural_numbers():
  """returns 1,2,3,..."""
  n = 1
  while True:
    yield n
    n += 1

In [None]:
evens_below_20 = (i for i in generate_range(20) if i%2==0)

In [None]:
[i for i in evens_below_20]

[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]

In [None]:
# playing with enumerate

names = ["A", "B", "C", "D"]

for i, name in enumerate(names):
  print(f"name {i+1} is {name}")

name 1 is A
name 2 is B
name 3 is C
name 4 is D


## Randomness

In [None]:
import random
random.seed(10)

In [None]:
four_uniform_randoms = [random.random() for _ in range(4)]
four_uniform_randoms

[0.5714025946899135,
 0.4288890546751146,
 0.5780913011344704,
 0.20609823213950174]

In [None]:
random.seed(10)
print(random.random())
random.seed(10)
print(random.random())

0.5714025946899135
0.5714025946899135


In [None]:
random.randrange(10)

6

In [None]:
random.randrange(3,6)

4

In [None]:
up_to_ten = [1,2,3,4,5,6,7,8,9,10]
random.shuffle(up_to_ten)
up_to_ten

[5, 6, 9, 2, 3, 7, 8, 4, 1, 10]

In [None]:
character_vars = ['a','b','c','d']
random.shuffle(character_vars)
character_vars

['b', 'd', 'a', 'c']

In [None]:
choose_randomly = random.choice(["A", "B", "C", "D"])
choose_randomly

'C'

In [None]:
lottery_numbers = range(60)
winning_numbers = random.sample(lottery_numbers, 6) # without replacement
winning_numbers

[2, 26, 55, 8, 38, 22]

In [None]:
num_replacements = [random.choice(range(10)) for _ in range(4)] # with replacement
num_replacements # several calls to choice

[6, 6, 4, 4]

## Regular Expressions

In [None]:
import re

In [None]:
not re.match("a", "cat"), re.search("a", "cat"), not re.search("c", "dog"), 3==len(re.split("[ab]", "carbs")), "R-D-"==re.sub("[0-9]", "-", "R2D2")

(True, <re.Match object; span=(1, 2), match='a'>, True, True, True)

> `re.match` beginning of the string

> `re.serch` any part of a string

## `zip` and Argument Unpacking

In [None]:
list1 = ['a', 'b', 'c']
list2 = [1,2,3]

In [None]:
[pair for pair in zip(list1, list2)]

[('a', 1), ('b', 2), ('c', 3)]