# Standard library 

In this notebook we walk through examples to get familiar with the key functionality of
  - `pathlib`
  - `datetime`
  - `collections`
  - `itertools`
  - `functools`


This module offers classes representing filesystem paths (with semantics appropriate for different operating systems.)

No need to install, as of python 3.4 it's standard lib.

In [17]:
from pathlib import Path

In [18]:
cwd_path = Path('.')
cwd_path

WindowsPath('.')

In [19]:
cwd_path.absolute()

WindowsPath('D:/DATA100')

In [20]:
Path.cwd()

WindowsPath('D:/DATA100')

# Building paths

In [21]:
student_folder = cwd_path.joinpath('data').joinpath('student-data')
student_folder

WindowsPath('data/student-data')

In [22]:
#Same again just using / operator
student_folder = cwd_path / 'data' / 'student-data'
student_folder

WindowsPath('data/student-data')

In [23]:
# Same again using string
student_folder = Path('./data/student-data')
student_folder

WindowsPath('data/student-data')

In [24]:
student_data_path = student_folder / 'data.json'

## File parts and parents

In [25]:
student_data_path

WindowsPath('data/student-data/data.json')

In [26]:
student_data_path.name, student_data_path.stem, student_data_path.suffix

('data.json', 'data', '.json')

In [27]:
student_data_path.parts

('data', 'student-data', 'data.json')

In [28]:
print(student_data_path)

data\student-data\data.json


In [29]:
#Absolutely complete path can be obtained by using
print(student_data_path.absolute())

D:\DATA100\data\student-data\data.json


In [30]:
student_data_path.parent

WindowsPath('data/student-data')

In [31]:
# As the parent returns a Path instance
# we can call parent on that too

student_data_path.parent.parent

WindowsPath('data')

## Changing the name, stem or file extension

In [32]:
student_data_path

WindowsPath('data/student-data/data.json')

In [33]:
# replaces .jpeg with .py
student_data_path.with_suffix('.py')

WindowsPath('data/student-data/data.py')

In [34]:
# replaces data.jpeg with student_data.txt
student_data_path.with_name('student_data.txt')

WindowsPath('data/student-data/student_data.txt')

## Interacting with files, reading, writing and renaming

In [35]:
student_data_path.exists()

True

Clearly, the path was created but it does not exist

In [36]:
student_data_folder = student_data_path.parent

In [37]:
student_data_folder.exists()

True

In [38]:
# Adding parents = True means the mkdir call will make parents if they are not there
# Adding exists_ok = True means the mkdir call will not fail if the folder already exists
student_data_folder.mkdir(parents=True, exist_ok=True)

In [39]:
student_data_folder.exists()

True

In [40]:
student_data_path.exists()

True

In [41]:
student_data = [
    {
        "name": "John Smith",
        "age": 10,
        "on_vacation": False,
        "test_scores": [66, 85, 39, 61, 16, 92, 33, 3, 87, 71],
    },
    {
        "name": "Jane Doe",
        "age": 10,
        "on_vacation": False,
        "test_scores": [4, 73, 75, 4, 50, 83, 8, 23, 42, 23],
    },
    {
        "name": "Isaac Newton",
        "age": 30,
        "on_vacation": True,
        "test_scores": [93, 96, 94, 92, 95, 90, 100, 98, 90, 94],
    },
]

In [42]:
import json # to convert dict to json string

student_data_path.write_text(json.dumps(student_data, indent=4))

856

## Renaming the File

In [43]:
#Create the name
moved_file_location = student_data_path.parent.parent / 'new_location.txt'
moved_file_location

WindowsPath('data/new_location.txt')

In [44]:
#Then Rename
if not moved_file_location.exists():
    student_data_path.rename(moved_file_location)

# NO WARNING if overwriting. So be careful

In [45]:
moved_file_location.exists()

True

## Deleting dirs and files

In [46]:
# unlink -- deletes a file.
#The missing_ok ensures no error if the file doesn't exist

if student_data_path.exists():
    student_data_path.unlink()
if moved_file_location.exists():
    moved_file_location.unlink()

In [47]:
# Remove an empty directory
if student_data_folder.is_dir():
    student_data_folder.rmdir()
if student_data_folder.parent.is_dir():
    student_data_folder.parent.rmdir()

OSError: [WinError 145] The directory is not empty: 'data'

In [48]:
# Redo these to use data Data.

student_data_folder.mkdir(parents=True, exist_ok=True)
student_data_path.write_text(json.dumps(student_data, indent=4))

856

### Iterating on a dir.

Returns the names of all the files and folders in the directory

In [49]:
list(cwd_path.iterdir())

[WindowsPath('.ipynb_checkpoints'),
 WindowsPath('annoying_puzzle2.csv'),
 WindowsPath('baby.csv'),
 WindowsPath('babynames.zip'),
 WindowsPath('babynamesbystate.zip'),
 WindowsPath('bob.pdf'),
 WindowsPath('box.py'),
 WindowsPath('co2_mm_mlo.md'),
 WindowsPath('confirmed_cases.json.md'),
 WindowsPath('data'),
 WindowsPath('dataframe.png'),
 WindowsPath('Deeper Exploration of the Standard Library .ipynb'),
 WindowsPath('download.PNG'),
 WindowsPath('EDA.png'),
 WindowsPath('elections.csv'),
 WindowsPath('Faithfulness.png'),
 WindowsPath('Granularity.png'),
 WindowsPath('groupby.png'),
 WindowsPath('groupby2.png'),
 WindowsPath('groupby3.png'),
 WindowsPath('groupby4.png'),
 WindowsPath('HBD.docx'),
 WindowsPath('HDFC.png'),
 WindowsPath('How To Address Missing Data.png'),
 WindowsPath('iloc loc [].png'),
 WindowsPath('July5'),
 WindowsPath('keys.png'),
 WindowsPath('L1.ipynb'),
 WindowsPath('L3 Pandas.ipynb'),
 WindowsPath('L4 Pandas Advanced.ipynb'),
 WindowsPath('L5 Data Wrangling an

In [50]:
# We can do pattern matching using glob
# Here the **/*.png looks in all subfolders matching any file with the .jpeg ending

[path for path in cwd_path.glob('**/*.png')]

[WindowsPath('dataframe.png'),
 WindowsPath('download.PNG'),
 WindowsPath('EDA.png'),
 WindowsPath('Faithfulness.png'),
 WindowsPath('Granularity.png'),
 WindowsPath('groupby.png'),
 WindowsPath('groupby2.png'),
 WindowsPath('groupby3.png'),
 WindowsPath('groupby4.png'),
 WindowsPath('HDFC.png'),
 WindowsPath('How To Address Missing Data.png'),
 WindowsPath('iloc loc [].png'),
 WindowsPath('keys.png'),
 WindowsPath('Matrix Data all same.png'),
 WindowsPath('multinomial.png'),
 WindowsPath('pivot.png'),
 WindowsPath('pivot2.png'),
 WindowsPath('pivot3.png'),
 WindowsPath('pivot4.png'),
 WindowsPath('Popularity of a name group.agg.png'),
 WindowsPath('puzzle.png'),
 WindowsPath('Temporality.png'),
 WindowsPath('UNIX TIME.png'),
 WindowsPath('Variable Feature Types.png')]

In [51]:
# Challenge 1

# Code a function to replace the file endings of all .txt files to .md within the current directory

def replace_all_txt_with_md():
    
    for file_path in Path.cwd().glob("*.txt"):
        file_path.rename(file_path.with_suffix(".md"))

In [52]:
Path('example.txt').write_text("# Example")

9

In [53]:
replace_all_txt_with_md()

In [54]:
#Clean up the unwanted file
Path('example.md').unlink()

## datetime

The datetime module supplies classes for manipulating dates and times.

### A note on timezones

Date and time objects may be categorized as “aware” or “naive” depending on whether or not they include timezone information.

`datetime.date`
Attributes: year, month, and day.

`datetime.time`
Attributes: hour, minute, second, microsecond, and tzinfo.

`datetime.datetime`
Attributes: year, month, day, hour, minute, second, microsecond, and tzinfo.

`datetime.timedelta`
A duration expressing the difference between two date, time, or datetime instances to microsecond resolution.

`datetime.tzinfo`
An abstract base class for time zone information objects. These are used by the datetime and time classes to provide a customizable notion of time adjustment (for example, to account for time zone and/or daylight saving time).

`datetime.timezone`
A class that implements the tzinfo abstract base class as a fixed offset from the UTC.



Notes:
- Objects of these types are immutable. (if the value of an object cannot be changed over time, then it is known as immutable)
- Objects of these types are hashable, meaning that they can be used as dictionary keys.
- Objects of these types support efficient pickling via the pickle module.

In [1]:
import datetime

## date

In [2]:
twenty_fourth_april = datetime.date(year=2022, month=4, day=24)
twenty_fourth_april

datetime.date(2022, 4, 24)

In [3]:
today = datetime.date.today()
today

datetime.date(2022, 7, 8)

In [4]:
today.day, today.month, today.year

(8, 7, 2022)

## time

In [5]:
four_thirty = datetime.time(hour=16, minute=30, second=0, microsecond=0)
four_thirty

datetime.time(16, 30)

## datetime

In [6]:
order_at = datetime.datetime(
    year=2022,
    month=9,
    day=16,
    hour=20,
    minute=30,
    second=12,
    microsecond=123,
    tzinfo=None
)

In [7]:
order_at

datetime.datetime(2022, 9, 16, 20, 30, 12, 123)

In [8]:
order_at.date()

datetime.date(2022, 9, 16)

In [9]:
order_at.time()

datetime.time(20, 30, 12, 123)

In [10]:
now = datetime.datetime.now()
now

datetime.datetime(2022, 7, 8, 18, 13, 48, 282125)

## Key methods

In [11]:
datetime.datetime.combine(date=today, time=four_thirty)

datetime.datetime(2022, 7, 8, 16, 30)

In [12]:
str(datetime.datetime.combine(date=today, time=four_thirty))

'2022-07-08 16:30:00'

In [13]:
str(today)

'2022-07-08'

## isoformat

In [14]:
#returns the string in the standardied form ISO 8601 format
today.isoformat()

'2022-07-08'

In [15]:
now.isoformat()

'2022-07-08T18:13:48.282125'

In [16]:
datetime.date.fromisoformat('2022-04-24')

datetime.date(2022, 4, 24)

In [17]:
datetime.datetime.fromisoformat('2022-04-24T17:23:54.908505')

datetime.datetime(2022, 4, 24, 17, 23, 54, 908505)

## more formats

In [18]:
# You can use the strftime method which has special formatting directives to help customise

for format_str in [
    '%a', '%A', '%w', '%d', '%b', '%B',
    '%m', '%y', '%Y', '%H', '%I', '%p',
    '%M', '%S', '%f', '%z', '%j', '%U',
    '%W', '%c', '%x', '%X', '%%']:
    print(f"now with format {format_str} is {now.strftime(format_str)}")

now with format %a is Fri
now with format %A is Friday
now with format %w is 5
now with format %d is 08
now with format %b is Jul
now with format %B is July
now with format %m is 07
now with format %y is 22
now with format %Y is 2022
now with format %H is 18
now with format %I is 06
now with format %p is PM
now with format %M is 13
now with format %S is 48
now with format %f is 282125
now with format %z is 
now with format %j is 189
now with format %U is 27
now with format %W is 27
now with format %c is Fri Jul  8 18:13:48 2022
now with format %x is 07/08/22
now with format %X is 18:13:48
now with format %% is %


In [19]:
# If you can specify the format you can convert from str to datetime

datetime.datetime.strptime('Sunday-24-April----17:23:54   2022', 
                           '%A-%d-%B----%X   %Y')

datetime.datetime(2022, 4, 24, 17, 23, 54)

## replace

In [74]:
today.replace(year=today.year - 1) #last year using replace

datetime.date(2021, 7, 7)

In [75]:
now

datetime.datetime(2022, 7, 7, 21, 38, 7, 867351)

In [76]:
now.replace(hour=6)

datetime.datetime(2022, 7, 7, 6, 38, 7, 867351)

## timedelta

In [77]:
delta = datetime.timedelta(
    days=50,
    seconds=27,
    microseconds=10,
    milliseconds=29000,
    minutes=5,
    hours=8,
    weeks=2
)
# Only days, seconds, and microseconds remain
delta

datetime.timedelta(days=64, seconds=29156, microseconds=10)

In [78]:
#What does a 100 hour journey mean?
#It means
journey = datetime.timedelta(
    hours=100
)
journey

datetime.timedelta(days=4, seconds=14400)

In [79]:
delta.total_seconds(), journey.total_seconds()

(5558756.00001, 360000.0)

In [80]:
# Challenge 1

# Write a function days_until_next_birthday taking a month and a day and returning an integer number of 
# days until the date.

def days_until_next_birthday(month:int, day:int) -> int:
    today = datetime.date.today()
    birthday = today.replace(month=month, day=day)
    if birthday<today:
        birthday = birthday.replace(year=birthday.year + 1)
    return(birthday - today).days



In [81]:
days_until_next_birthday(month=4, day=1)

268

## collections

This module implements specialized container datatypes providing alternatives to Python’s general purpose built-in containers, dict, list, set, and tuple.


| Name         | Description                                                          |
| ------------ | -------------------------------------------------------------------- |
| namedtuple() | factory function for creating tuple subclasses with named fields     |
| deque        | list-like container with fast appends and pops on either end         |
| ChainMap     | dict-like class for creating a single view of multiple mappings      |
| Counter      | dict subclass for counting hashable objects                          |
| OrderedDict  | dict subclass that remembers the order entries were added            |
| defaultdict  | dict subclass that calls a factory function to supply missing values |
| UserDict     | wrapper around dictionary objects for easier dict subclassing        |
| UserList     | wrapper around list objects for easier list subclassing              |
| UserString   | wrapper around string objects for easier string subclassing          |


In [82]:
from collections import Counter
from collections import deque
from collections import defaultdict
from collections import namedtuple

### Counter

A Counter is a dict subclass for counting hashable objects. It is a collection where elements are stored as dictionary keys and their counts are stored as dictionary values.

In [83]:
# From an string (iterable)
counter = Counter('misissippi')
counter

Counter({'m': 1, 'i': 4, 's': 3, 'p': 2})

In [84]:
# From an iterable
counter = Counter(["cat", "cat", "dog", "dog", "cat", "gold fish"])
counter

Counter({'cat': 3, 'dog': 2, 'gold fish': 1})

In [85]:
counter['shark']

0

In [86]:
list(counter.elements())

['cat', 'cat', 'cat', 'dog', 'dog', 'gold fish']

In [87]:
#Nice and fast using the standard library heapq
counter.most_common(2)

[('cat', 3), ('dog', 2)]

In [88]:
another_counter = Counter({'cat':3, 'dog':10})

In [89]:
counter + another_counter

Counter({'cat': 6, 'dog': 12, 'gold fish': 1})

In [90]:
counter

Counter({'cat': 3, 'dog': 2, 'gold fish': 1})

In [91]:
#Nice and fast using the standard library heapq
counter.most_common(2)

[('cat', 3), ('dog', 2)]

In [92]:
counter['dog'] += 1

In [93]:
counter

Counter({'cat': 3, 'dog': 3, 'gold fish': 1})

## deque

Deques support thread-safe, memory efficient appends and pops from either side of the deque with approximately the same O(1) performance in either direction

In [94]:
example_deque = deque(range(5))

In [95]:
example_deque

deque([0, 1, 2, 3, 4])

In [96]:
example_deque.append(5)
example_deque.appendleft(-1)

In [97]:
example_deque

deque([-1, 0, 1, 2, 3, 4, 5])

In [98]:
example_deque.extend([6, 7, 8])
example_deque.extendleft([-2, -3, -4]) # Note the ordering.
example_deque

deque([-4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8])

In [99]:
example_deque.index(3)

7

In [100]:
# This is inplace
# meaning the object is edited directly instead of creating a new reversed deque
example_deque.reverse()

In [101]:
example_deque

deque([8, 7, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4])

deque with maxlen

In [102]:
limited_deque = deque(range(5), maxlen=5)
limited_deque

deque([0, 1, 2, 3, 4])

In [103]:
#note this pops off the first 0
limited_deque.append(5)
limited_deque

deque([1, 2, 3, 4, 5])

In [104]:
# this pushes off the 5 off the other end
limited_deque.appendleft(0)
limited_deque

deque([0, 1, 2, 3, 4])

In [105]:
# this sequentiall pushes the 5, 6, 7 
# onto the right end forcing out the 0, 1, 2
limited_deque.extend([5, 6, 7])
limited_deque

deque([3, 4, 5, 6, 7])

In [106]:
# this rotates elements n steps. 
limited_deque.rotate(2)
limited_deque

deque([6, 7, 3, 4, 5])

In [107]:
# we can access the maxlen
limited_deque.maxlen

5

In [108]:
limited_deque.clear()
limited_deque

deque([])

In [109]:
# note if the initial iterable is longer than
# the maxlen then we only get the last n elements
deque([1,2,3,4,5], maxlen=3)

deque([3, 4, 5])

### defaultdict

In [110]:
sentence = (
    "imagine we want to take a sentence and store words in lists in"
    " a dictionary keyed on the letter that each word starts with")
sentence

'imagine we want to take a sentence and store words in lists in a dictionary keyed on the letter that each word starts with'

In [111]:
words_by_starting_letter = {}
for word in sentence.split(' '):
    if word[0] not in words_by_starting_letter:
        words_by_starting_letter[word[0]] = [word]
    else: # we know it's a list so append
        words_by_starting_letter[word[0]].append(word)

words_by_starting_letter

{'i': ['imagine', 'in', 'in'],
 'w': ['we', 'want', 'words', 'word', 'with'],
 't': ['to', 'take', 'the', 'that'],
 'a': ['a', 'and', 'a'],
 's': ['sentence', 'store', 'starts'],
 'l': ['lists', 'letter'],
 'd': ['dictionary'],
 'k': ['keyed'],
 'o': ['on'],
 'e': ['each']}

In [112]:
words_by_starting_letter = defaultdict(list)
for word in sentence.split(' '):
    words_by_starting_letter[word[0]].append(word)
words_by_starting_letter


defaultdict(list,
            {'i': ['imagine', 'in', 'in'],
             'w': ['we', 'want', 'words', 'word', 'with'],
             't': ['to', 'take', 'the', 'that'],
             'a': ['a', 'and', 'a'],
             's': ['sentence', 'store', 'starts'],
             'l': ['lists', 'letter'],
             'd': ['dictionary'],
             'k': ['keyed'],
             'o': ['on'],
             'e': ['each']})

In [113]:
# An example from the offical docs:
s = 'mississippi'
letter_counts = defaultdict(int)
for char in s:
    letter_counts[char] += 1

letter_counts

defaultdict(int, {'m': 1, 'i': 4, 's': 4, 'p': 2})

### named tuple

Named tuples assign meaning to each position in a tuple and allow for more readable, self-documenting code. 

In [114]:
student_data

[{'name': 'John Smith',
  'age': 10,
  'on_vacation': False,
  'test_scores': [66, 85, 39, 61, 16, 92, 33, 3, 87, 71]},
 {'name': 'Jane Doe',
  'age': 10,
  'on_vacation': False,
  'test_scores': [4, 73, 75, 4, 50, 83, 8, 23, 42, 23]},
 {'name': 'Isaac Newton',
  'age': 30,
  'on_vacation': True,
  'test_scores': [93, 96, 94, 92, 95, 90, 100, 98, 90, 94]}]

In [115]:
student = ('Simon Ward-Jones', 30, True, [100, 100, 100, 99, 100])

In [116]:
def display_student(student):
    print(f"Student {student[0]} is {student[1]} years old "
          f"and has test scores {student[3]}")

In [117]:
display_student(student)

Student Simon Ward-Jones is 30 years old and has test scores [100, 100, 100, 99, 100]


### Instead let's create a named tuple

In [118]:
Student = namedtuple("Student", "name age on_vacation test_scores")

In [119]:
Student = namedtuple("Student", ["name", "age", "on_vacation", "test_scores"])

In [120]:
simon = Student(name='Simon Ward-Jones',
                age=30,
                on_vacation=True,
               test_scores=[100,100,100,99,100])

In [121]:
simon[2]

True

In [122]:
simon.on_vacation

True

In [123]:
def display_student(student: Student):
    print(f"Student {student.name} is {student.age} years old"
         f"and has test scores {student.test_scores}")

In [124]:
display(simon)

Student(name='Simon Ward-Jones', age=30, on_vacation=True, test_scores=[100, 100, 100, 99, 100])

Named tuples are immutable and therefore another named tuple has to be created

In [125]:
older_simon = simon._replace(age=31)
older_simon # note this is a new instance!

Student(name='Simon Ward-Jones', age=31, on_vacation=True, test_scores=[100, 100, 100, 99, 100])

In [126]:
simon._fields

('name', 'age', 'on_vacation', 'test_scores')

In [127]:
# We can also do the same thing using the typing.NamedTuple

In [128]:
from typing import NamedTuple, List

In [129]:
class Student(NamedTuple):
    name: str
    age: int
    on_vacation: bool
    test_scores: List[int]

In [130]:
simon = Student(
    name='Simon Ward-Jones', 
    age=30,
    on_vacation=True,
    test_scores=[100, 100, 100, 99, 100])

In [131]:
simon

Student(name='Simon Ward-Jones', age=30, on_vacation=True, test_scores=[100, 100, 100, 99, 100])

In [132]:
student_data[0]

{'name': 'John Smith',
 'age': 10,
 'on_vacation': False,
 'test_scores': [66, 85, 39, 61, 16, 92, 33, 3, 87, 71]}

In [133]:
students = [Student._make(student.values()) for student in student_data]

In [134]:
students

[Student(name='John Smith', age=10, on_vacation=False, test_scores=[66, 85, 39, 61, 16, 92, 33, 3, 87, 71]),
 Student(name='Jane Doe', age=10, on_vacation=False, test_scores=[4, 73, 75, 4, 50, 83, 8, 23, 42, 23]),
 Student(name='Isaac Newton', age=30, on_vacation=True, test_scores=[93, 96, 94, 92, 95, 90, 100, 98, 90, 94])]

In [135]:
# Challenge 3

# Create a namedtuple called Point with and x and y attribute to represent points on a grid
# Create a list with 100 random points with x values in 1,2,3 and y values in 1,2,3
# Find the most common point in the list

# Hint: use random.randint(1, 3)




In [136]:
from random import randint

Point = namedtuple("Point", "x y")

class Point(NamedTuple):
    x: int
    y: int
        
points = [Point(randint(1,3), randint(1,3)) for i in range(100)]

In [137]:
counter = Counter(points)
counter

Counter({Point(x=3, y=2): 9,
         Point(x=3, y=1): 12,
         Point(x=2, y=1): 9,
         Point(x=2, y=2): 17,
         Point(x=1, y=2): 12,
         Point(x=1, y=1): 11,
         Point(x=1, y=3): 10,
         Point(x=2, y=3): 9,
         Point(x=3, y=3): 11})

In [138]:
most_common, count = Counter(points).most_common(1)[0]
print(f"{most_common} appears {count} times")

Point(x=2, y=2) appears 17 times


## itertools

This module implements a number of iterator building blocks!

In [139]:
import itertools

In [140]:
student_data

[{'name': 'John Smith',
  'age': 10,
  'on_vacation': False,
  'test_scores': [66, 85, 39, 61, 16, 92, 33, 3, 87, 71]},
 {'name': 'Jane Doe',
  'age': 10,
  'on_vacation': False,
  'test_scores': [4, 73, 75, 4, 50, 83, 8, 23, 42, 23]},
 {'name': 'Isaac Newton',
  'age': 30,
  'on_vacation': True,
  'test_scores': [93, 96, 94, 92, 95, 90, 100, 98, 90, 94]}]

### Quick asside -> What is an iterable and what is an iterator and how a for loop works

Iterators

An iterator is an object that implements the iterator protocol. In other words, an iterator is an object that implements the following methods:

`__iter__` returns the iterator object itself.

`__next__` returns the next element.

It means that you cannot use the iterator object again.

Iterables

An iterable is an object that you can iterate over.

An object is iterable when it implements the `__iter__` method. And its `__iter__` method returns a new iterator.

In [141]:
# An iterator implements two very special functions 
# __iter__ -> Must return an iterable
# __next__ -> Steps through the iterator

In [142]:
student_data.__iter__

<method-wrapper '__iter__' of list object at 0x000000000972F880>

In [143]:
#What is really going on in this for loop?
for item in student_data:
    print(item)

{'name': 'John Smith', 'age': 10, 'on_vacation': False, 'test_scores': [66, 85, 39, 61, 16, 92, 33, 3, 87, 71]}
{'name': 'Jane Doe', 'age': 10, 'on_vacation': False, 'test_scores': [4, 73, 75, 4, 50, 83, 8, 23, 42, 23]}
{'name': 'Isaac Newton', 'age': 30, 'on_vacation': True, 'test_scores': [93, 96, 94, 92, 95, 90, 100, 98, 90, 94]}


In [144]:
student_data

[{'name': 'John Smith',
  'age': 10,
  'on_vacation': False,
  'test_scores': [66, 85, 39, 61, 16, 92, 33, 3, 87, 71]},
 {'name': 'Jane Doe',
  'age': 10,
  'on_vacation': False,
  'test_scores': [4, 73, 75, 4, 50, 83, 8, 23, 42, 23]},
 {'name': 'Isaac Newton',
  'age': 30,
  'on_vacation': True,
  'test_scores': [93, 96, 94, 92, 95, 90, 100, 98, 90, 94]}]

In [145]:
student_data.__iter__()

<list_iterator at 0x2a4d4d00>

In [146]:
iter(student_data)

<list_iterator at 0x2a4d4790>

In [147]:
student_data_iterator = iter(student_data)

In [148]:
student_data_iterator

<list_iterator at 0x2a4c7f40>

Student 1

In [149]:
student_data_iterator.__next__()

{'name': 'John Smith',
 'age': 10,
 'on_vacation': False,
 'test_scores': [66, 85, 39, 61, 16, 92, 33, 3, 87, 71]}

Student 2

In [150]:
next(student_data_iterator)

{'name': 'Jane Doe',
 'age': 10,
 'on_vacation': False,
 'test_scores': [4, 73, 75, 4, 50, 83, 8, 23, 42, 23]}

Student 3

In [151]:
next(student_data_iterator)

{'name': 'Isaac Newton',
 'age': 30,
 'on_vacation': True,
 'test_scores': [93, 96, 94, 92, 95, 90, 100, 98, 90, 94]}

In [152]:
next(student_data_iterator)

StopIteration: 

In [153]:
# summary of for loop

student_data_iterator = iter(student_data)

while True:
    try:
        item = next(student_data_iterator)
        # This is now the same as the for block
        print(item)
    except StopIteration:
        break

{'name': 'John Smith', 'age': 10, 'on_vacation': False, 'test_scores': [66, 85, 39, 61, 16, 92, 33, 3, 87, 71]}
{'name': 'Jane Doe', 'age': 10, 'on_vacation': False, 'test_scores': [4, 73, 75, 4, 50, 83, 8, 23, 42, 23]}
{'name': 'Isaac Newton', 'age': 30, 'on_vacation': True, 'test_scores': [93, 96, 94, 92, 95, 90, 100, 98, 90, 94]}


### Itertools fun 

In [154]:
itertools.chain([1, 2, 3], [4, 5, 6], [7, 8, 9]) # note lazy!

<itertools.chain at 0x2a4ef9d0>

In [155]:
for city in itertools.chain([1, 2, 3], [4, 5, 6], [7, 8, 9]):
    print(city)

1
2
3
4
5
6
7
8
9


### Chain from iterable

In [156]:
# iterable of iterables.
#USED FOR list of lists

for number in itertools.chain.from_iterable([[1, 2, 3], [4, 5, 6], [7, 8, 9]]):
    print(number)

1
2
3
4
5
6
7
8
9


### Count

In [157]:
# Similar to range but when you don't know how many you want.
# itertools.count(3) -> 3, 4, 5, 6, ...

In [158]:
for n in itertools.count(start=1, step=1):
    print(f"n={n}")
    if n == 5:
        break

n=1
n=2
n=3
n=4
n=5


In [159]:
#less code than the above
n = 1
while n < 6:
    print(f"n={n}")
    n = n+1

n=1
n=2
n=3
n=4
n=5


## Combinations

In [160]:
#Two element subsets
list(itertools.combinations([1,2,3,4], 2))

[(1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)]

In [161]:
#With replacement
list(itertools.combinations_with_replacement([1,2,3,4], 2))

[(1, 1),
 (1, 2),
 (1, 3),
 (1, 4),
 (2, 2),
 (2, 3),
 (2, 4),
 (3, 3),
 (3, 4),
 (4, 4)]

## Cycle

Acts like an infinite iterator

In [162]:
import random

In [163]:
#Simulating the game
def player_move(player):
    if random.randint(1,4) == 1:
        print(f"Player {player} moved and won!")
        return True
    else:
        print(f"Player {player} moved")
        return False

In [164]:
for player in itertools.cycle(["A", "B"]):
    if player_move(player):
        break

Player A moved and won!


## Groupby

In [165]:
list(itertools.groupby('MISSISSIPPI'))

[('M', <itertools._grouper at 0x6635be0>),
 ('I', <itertools._grouper at 0x6635a90>),
 ('S', <itertools._grouper at 0x6635fd0>),
 ('I', <itertools._grouper at 0x6635ca0>),
 ('S', <itertools._grouper at 0x6635c70>),
 ('I', <itertools._grouper at 0x66359a0>),
 ('P', <itertools._grouper at 0x6635a00>),
 ('I', <itertools._grouper at 0x6635190>)]

In [166]:
for key, group in itertools.groupby('MISSISSIPPI'):
    print(f"A group of {key} with {list(group)}")

A group of M with ['M']
A group of I with ['I']
A group of S with ['S', 'S']
A group of I with ['I']
A group of S with ['S', 'S']
A group of I with ['I']
A group of P with ['P', 'P']
A group of I with ['I']


## islice

In [167]:
#[list], starting point, ending point, step 
list(itertools.islice([1,2,3,4,5], 1, None, 2))

[2, 4]

In [168]:
long_list = list(range(1_000_000))

In [169]:
import sys

def convert_bytes(size):
    for x in ['bytes', 'KB', 'MB', 'GB', 'TB']:
        if size < 1024.0:
            return "%3.1f %s" % (size, x)
        size /= 1024.0
        
    return size

In [170]:
convert_bytes(sys.getsizeof(long_list))

'7.6 MB'

In [171]:
# This displays the size of a new list generated by 
# starting in the begining
# ending in the end
# step=2

convert_bytes(sys.getsizeof(long_list[::2]))

'3.8 MB'

In [172]:
#Same as above but very memory efficient
convert_bytes(sys.getsizeof(itertools.islice(long_list, 0, None, 2)))

'72.0 bytes'

## Same list obtained but one is more memory efficient

In [173]:
long_list[::2]

[0,
 2,
 4,
 6,
 8,
 10,
 12,
 14,
 16,
 18,
 20,
 22,
 24,
 26,
 28,
 30,
 32,
 34,
 36,
 38,
 40,
 42,
 44,
 46,
 48,
 50,
 52,
 54,
 56,
 58,
 60,
 62,
 64,
 66,
 68,
 70,
 72,
 74,
 76,
 78,
 80,
 82,
 84,
 86,
 88,
 90,
 92,
 94,
 96,
 98,
 100,
 102,
 104,
 106,
 108,
 110,
 112,
 114,
 116,
 118,
 120,
 122,
 124,
 126,
 128,
 130,
 132,
 134,
 136,
 138,
 140,
 142,
 144,
 146,
 148,
 150,
 152,
 154,
 156,
 158,
 160,
 162,
 164,
 166,
 168,
 170,
 172,
 174,
 176,
 178,
 180,
 182,
 184,
 186,
 188,
 190,
 192,
 194,
 196,
 198,
 200,
 202,
 204,
 206,
 208,
 210,
 212,
 214,
 216,
 218,
 220,
 222,
 224,
 226,
 228,
 230,
 232,
 234,
 236,
 238,
 240,
 242,
 244,
 246,
 248,
 250,
 252,
 254,
 256,
 258,
 260,
 262,
 264,
 266,
 268,
 270,
 272,
 274,
 276,
 278,
 280,
 282,
 284,
 286,
 288,
 290,
 292,
 294,
 296,
 298,
 300,
 302,
 304,
 306,
 308,
 310,
 312,
 314,
 316,
 318,
 320,
 322,
 324,
 326,
 328,
 330,
 332,
 334,
 336,
 338,
 340,
 342,
 344,
 346,
 348,
 350,

In [174]:
list(itertools.islice(long_list, 0, None, 2))

[0,
 2,
 4,
 6,
 8,
 10,
 12,
 14,
 16,
 18,
 20,
 22,
 24,
 26,
 28,
 30,
 32,
 34,
 36,
 38,
 40,
 42,
 44,
 46,
 48,
 50,
 52,
 54,
 56,
 58,
 60,
 62,
 64,
 66,
 68,
 70,
 72,
 74,
 76,
 78,
 80,
 82,
 84,
 86,
 88,
 90,
 92,
 94,
 96,
 98,
 100,
 102,
 104,
 106,
 108,
 110,
 112,
 114,
 116,
 118,
 120,
 122,
 124,
 126,
 128,
 130,
 132,
 134,
 136,
 138,
 140,
 142,
 144,
 146,
 148,
 150,
 152,
 154,
 156,
 158,
 160,
 162,
 164,
 166,
 168,
 170,
 172,
 174,
 176,
 178,
 180,
 182,
 184,
 186,
 188,
 190,
 192,
 194,
 196,
 198,
 200,
 202,
 204,
 206,
 208,
 210,
 212,
 214,
 216,
 218,
 220,
 222,
 224,
 226,
 228,
 230,
 232,
 234,
 236,
 238,
 240,
 242,
 244,
 246,
 248,
 250,
 252,
 254,
 256,
 258,
 260,
 262,
 264,
 266,
 268,
 270,
 272,
 274,
 276,
 278,
 280,
 282,
 284,
 286,
 288,
 290,
 292,
 294,
 296,
 298,
 300,
 302,
 304,
 306,
 308,
 310,
 312,
 314,
 316,
 318,
 320,
 322,
 324,
 326,
 328,
 330,
 332,
 334,
 336,
 338,
 340,
 342,
 344,
 346,
 348,
 350,

## Permutations

In [175]:
#Order is important in permutations. Remember in permutations order matters
list(itertools.permutations([1,2,3], r=2))

[(1, 2), (1, 3), (2, 1), (2, 3), (3, 1), (3, 2)]

In [176]:
list(itertools.permutations([1,2,3], r=3))

[(1, 2, 3), (1, 3, 2), (2, 1, 3), (2, 3, 1), (3, 1, 2), (3, 2, 1)]

## product

In [177]:
list(itertools.product([1,2,3], [4,5]))

[(1, 4), (1, 5), (2, 4), (2, 5), (3, 4), (3, 5)]

In [178]:
#Can also repeat arg which is useful
list(itertools.product([1,2,3], repeat=2))

[(1, 1), (1, 2), (1, 3), (2, 1), (2, 2), (2, 3), (3, 1), (3, 2), (3, 3)]

## Zip Longest

In [179]:
#Note we lost Brighton and Leeds
list(zip([1,2], ['a', 'b', 'c']))

[(1, 'a'), (2, 'b')]

In [180]:
list(itertools.zip_longest([1,2], ['a', 'b', 'c']))

[(1, 'a'), (2, 'b'), (None, 'c')]

In [181]:
# Challenge 4

#  how many times does the each number appear in the multiples of 3 less than 1000?

three_times_table = (str(number) for number in itertools.islice(range(1000), 3, None, 3))
Counter(itertools.chain.from_iterable(three_times_table))

Counter({'3': 102,
         '6': 102,
         '9': 102,
         '1': 99,
         '2': 99,
         '5': 99,
         '8': 99,
         '4': 99,
         '7': 99,
         '0': 63})

## functools

The functools module is for higher-order functions: functions that act on or return other functions. In general, any callable object can be treated as a function for the purposes of this module.


In [182]:
import functools

### lru_cache, cache

Last recent cache - is a decorator to wrap a function with a memoizing callable that saves up to the maxsize most recent calls.

In [183]:
def factorial(n):
    return n * factorial(n-1) if n else 1

The decorator will create a new function with the same name and give it a cache

In [184]:
@functools.lru_cache(maxsize=None)
def fast_factorial(n):
    return n * factorial(n-1) if n else 1


In [185]:
%timeit factorial(200)

36.8 µs ± 1.25 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [186]:
%timeit fast_factorial(200)
#much faster nanoseconds

77.1 ns ± 1.12 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)


### total ordering

In [195]:
class Student:

    def __init__(self, name:str, age:int,
                 test_scores:List[int], on_vacation: bool=False):
        self.name = name
        self.age = age
        self.test_scores = test_scores
        self.on_vacation = on_vacation

    def __repr__(self):
        return f'Student(name={self.name})'

    @property
    def mean_test_score(self):
        return sum(x for x in self.test_scores) / len(self.test_scores)

In [196]:
john = Student(**student_data[0])
newton = Student(**student_data[2])

john, newton

(Student(name=John Smith), Student(name=Isaac Newton))

In [198]:
# this code will error if uncommented

#john < newton

In [199]:
class Student:

    def __init__(self, name:str, age:int,test_scores:List[int], on_vacation: bool=False):
        self.name = name
        self.age = age
        self.test_scores = test_scores
        self.on_vacation = on_vacation
    
    def __repr__(self):
        return f'Student(name={self.name})'

    @property
    def mean_test_score(self):
        return sum(x for x in self.test_scores) / len(self.test_scores)
    
    # add all these special methods.
        
    def __lt__(self, other: Student):
        return self.mean_test_score < other.mean_test_score

    def __le__(self, other: Student):
        return self.mean_test_score <= other.mean_test_score
    
    def __gt__(self, other: Student):
        return self.mean_test_score > other.mean_test_score

    def __ge__(self, other: Student):
        return self.mean_test_score >= other.mean_test_score

    def __eq__(self, other: Student):
        return self.mean_test_score == other.mean_test_score

In [208]:
john = Student(**student_data[0])
newton = Student(**student_data[2])
john, newton

(Student(name=John Smith), Student(name=Isaac Newton))

In [201]:
john > newton

False

In [202]:
#Same as
john.__gt__(newton)

False

## But what total_ordering class decorator is...
Just need to specify two properties

In [203]:
@functools.total_ordering
class Student:
    
    def __init__(self, name:str, age:int,test_scores:List[int], on_vacation: bool=False):
        self.name = name
        self.age = age
        self.test_scores = test_scores
        self.on_vacation = on_vacation
    
    def __repr__(self):
        return f'Student(name={self.name})'

    @property
    def mean_test_score(self):
        return sum(x for x in self.test_scores) / len(self.test_scores)
    
    # add all these special methods.
        
    def __lt__(self, other: Student):
        return self.mean_test_score < other.mean_test_score

    def __eq__(self, other: Student):
        return self.mean_test_score == other.mean_test_score

In [204]:
# Because Greater than is the same as not less than and not equal.

In [209]:
#Gives you information
john.__gt__??

## partial

In [210]:
def is_pass(student: Student, pass_mark = 60):
    passed = student.mean_test_score > pass_mark
    print((f'{student.name} has test score '
          f"{'above' if passed else 'below'} {pass_mark}"))
    return passed

In [211]:
is_pass(john)

John Smith has test score below 60


False

In [212]:
is_pass(newton)

Isaac Newton has test score above 60


True

In [215]:
def is_top_set(student: Student):
    return is_pass(student, 30)

In [217]:
is_top_set(newton)

Isaac Newton has test score above 30


True

In [218]:
is_top_set = functools.partial(is_pass, pass_mark=80)

In [219]:
is_top_set

functools.partial(<function is_pass at 0x000000002A5028B0>, pass_mark=80)

In [220]:
# another way to create is_top_set
#don't need the explicit definition of the function

In [227]:
is_top_set = functools.partial(is_pass, pass_mark=80)

In [230]:
is_top_set(newton)

Isaac Newton has test score above 80


True

In [231]:
from statistics import median
min([(1,2), (5,1), (2,3)])

(1, 2)

In [232]:
#Compares the 1st index to each of the other tuples
min([(1,2), (5,1), (2,3)], key=lambda item: item[1])

(5, 1)

In [234]:
min_student = functools.partial(min, 
                              key=lambda student : median(student.test_scores))

In [235]:
min_student(students)

Student(name='Jane Doe', age=10, on_vacation=False, test_scores=[4, 73, 75, 4, 50, 83, 8, 23, 42, 23])

In [237]:
# Challenge 5

# create a function called student_pairs to find all comninations of 2 students

student_pairs = functools.partial(itertools.combinations, r=2)
  
for student_a, student_b in student_pairs(students):
    print((student_a.name, student_b.name))

('John Smith', 'Jane Doe')
('John Smith', 'Isaac Newton')
('Jane Doe', 'Isaac Newton')


### Playing with f strings

Debugging purposes

In [240]:
str_value = "other"
num_value = 123

print(f'the value is {str_value}')

the value is other


In [243]:
#Just put an = sign to get what's there to the left
print(f'the value is {str_value=}')

the value is str_value='other'


In [244]:
print(f'{num_value % 2 = }')

num_value % 2 = 1


In [255]:
def conversion():
    str_value = "other"
    print(f'{str_value!r}')   #equivalent to repr(str_value)

In [256]:
conversion()

'other'


'other'
