## Introducing the Path class

In [1]:
from pathlib import Path

In [2]:
cwd_path = Path('.')
cwd_path

WindowsPath('.')

In [3]:
cwd_path.absolute()

WindowsPath('C:/Users/sanka/Desktop/python_tutorials/python_standard_library')

In [4]:
Path.cwd()

WindowsPath('C:/Users/sanka/Desktop/python_tutorials/python_standard_library')

## Building paths

In [5]:
student_folder = cwd_path.joinpath('data').joinpath('student-data')
student_folder

WindowsPath('data/student-data')

In [6]:
# same uisng / operator
student_folder = cwd_path / 'data' / 'student-data'

# same using string
student_folder = Path('./data/student-data')
student_folder

WindowsPath('data/student-data')

In [7]:
student_data_path = student_folder / 'data.json'

In [8]:
student_data_path

WindowsPath('data/student-data/data.json')

## File parts and parents

In [9]:
student_data_path.name

'data.json'

In [10]:
student_data_path.stem

'data'

In [11]:
student_data_path.suffix

'.json'

In [12]:
student_data_path.parts

('data', 'student-data', 'data.json')

In [13]:
print(student_data_path)

data\student-data\data.json


In [14]:
student_data_path.absolute()

WindowsPath('C:/Users/sanka/Desktop/python_tutorials/python_standard_library/data/student-data/data.json')

In [15]:
print(student_data_path.absolute())

C:\Users\sanka\Desktop\python_tutorials\python_standard_library\data\student-data\data.json


In [16]:
student_data_path.parent

WindowsPath('data/student-data')

In [17]:
# As the parent returns a Path instance
# we can call parent on that too
student_data_path.parent.parent

WindowsPath('data')

In [18]:
student_data_path.parent.parent.parent

WindowsPath('.')

In [19]:
student_data_path.parent.parent.parent.parent

WindowsPath('.')

## Changing the name, stem or file extension

In [20]:
student_data_path

WindowsPath('data/student-data/data.json')

In [21]:
# replace .json with .py
student_data_path.with_suffix('.py')

WindowsPath('data/student-data/data.py')

In [22]:
# replace data.py with student_data.txt
student_data_path.with_name('student_data.txt')

WindowsPath('data/student-data/student_data.txt')

In [23]:
# we can chain calls
student_data_path.with_name('numbers').with_suffix('.xlsx')

WindowsPath('data/student-data/numbers.xlsx')

## Interacting with the files, reading, writing, and renaming

In [24]:
student_data_path.exists()

True

In [25]:
# is_file checks if this path exists and is a file
# is_dir checks if this path exists and is a directory
student_data_path.is_file(), student_data_path.is_dir()

(True, False)

In [26]:
student_data_folder = student_data_path.parent
student_data_folder

WindowsPath('data/student-data')

In [27]:
student_data_folder.exists()

True

In [28]:
# this will throw an error because student_data_folder doesn't exist
student_data_folder.mkdir()

FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'data\\student-data'

In [33]:
# parents=True will make parents if it doesn't exists
# exists_ok = True will not fail if the folder already exists
student_data_folder.mkdir(parents=True, exist_ok=True)

In [34]:
student_data_folder.exists()

True

In [35]:
student_data_path.exists()

True

In [36]:
student_data = [
    {
        "name": "John Smith",
        "age": 38,
        "on_vacation": False,
        "test_scores": [33, 55, 66, 88, 96]
    },
    {
        "name": "Ashley Doe",
        "age": 38,
        "on_vacation": False,
        "test_scores": [98, 14, 25, 35, 69]
    },
    {
        "name": "Bull Smith",
        "age": 86,
        "on_vacation": True,
        "test_scores": [96, 75, 25, 36, 65]
    }
]

In [37]:
import json # to convert dict to json string
student_data_path.write_text(json.dumps(student_data, indent=4))

619

In [38]:
print(student_data_path.read_text())

[
    {
        "name": "John Smith",
        "age": 38,
        "on_vacation": false,
        "test_scores": [
            33,
            55,
            66,
            88,
            96
        ]
    },
    {
        "name": "Ashley Doe",
        "age": 38,
        "on_vacation": false,
        "test_scores": [
            98,
            14,
            25,
            35,
            69
        ]
    },
    {
        "name": "Bull Smith",
        "age": 86,
        "on_vacation": true,
        "test_scores": [
            96,
            75,
            25,
            36,
            65
        ]
    }
]


## Renaming files

In [39]:
moved_file_location = student_data_path.parent.parent / 'new_location.txt'
moved_file_location

WindowsPath('data/new_location.txt')

In [40]:
moved_file_location.exists()

False

In [41]:
# NO WARNING: if overwritng. So becareful
if not moved_file_location.exists():
    student_data_path.rename(moved_file_location)

In [42]:
moved_file_location.exists()

True

## Deleting Dirs and Files

In [43]:
# unlink - deletes a file. The missing_ok ensures no error if the file doesn't exist
if student_data_path.exists():
    student_data_path.unlink()
if moved_file_location.exists():
    moved_file_location.unlink()

In [44]:
# Remove an empty directory
if student_data_folder.is_dir():
    student_data_folder.rmdir()
if student_data_folder.parent.is_dir():
    student_data_folder.parent.rmdir()

OSError: [WinError 145] The directory is not empty: 'data\\student-data'

In [45]:
# Recreate the deleted because we still need them in this lesson
student_data_folder.mkdir(parents=True, exist_ok=True)
student_data_path.write_text(json.dumps(student_data, indent=4))

619

## iterating on a dir

In [46]:
list(cwd_path.iterdir())

[WindowsPath('.ipynb_checkpoints'),
 WindowsPath('data'),
 WindowsPath('example.txt'),
 WindowsPath('file1.txt'),
 WindowsPath('file2.txt'),
 WindowsPath('standard_library.ipynb')]

In [47]:
# we can do pattern matching using glob
txt_path = [path for path in cwd_path.glob('**/*.txt')]

In [48]:
for i in txt_path:
    print(i)

example.txt
file1.txt
file2.txt
data\student-data\file3.txt
data\student-data\file4.txt


## Chalenge 1
#### Code a function to replace the file endings of all .txt files to .md within the cwd

In [49]:
def replace_all_txt_with_md():
    '''Replace .txt suffix with .md in cwd'''
    for file_path in Path.cwd().glob('**/*.md'):
        print(file_path)
        file_path.rename(file_path.with_suffix(".md"))
        print(file_path)

In [50]:
# test the solution by
# 1. writing .txt file
# 2. running the function
# 3. check if the file ending changed
Path('example.txt').write_text('#Example')
replace_all_txt_with_md()

## datetime

In [51]:
import datetime

### date

In [52]:
twenty_fourth_april = datetime.date(year=2022, month=4, day=24)
twenty_fourth_april

datetime.date(2022, 4, 24)

In [53]:
today = datetime.date.today()
today

datetime.date(2023, 8, 11)

In [54]:
today.day, today.month, today.year

(11, 8, 2023)

### time

In [55]:
four_thirty = datetime.time(hour=16, minute=30, second=0, microsecond=20)
four_thirty

datetime.time(16, 30, 0, 20)

### datetime

In [56]:
order_at = datetime.datetime(
    year=2022,
    month=9,
    day=16,
    hour=2,
    minute=30,
    second=12,
    microsecond=123,
    tzinfo=None
)
order_at

datetime.datetime(2022, 9, 16, 2, 30, 12, 123)

In [57]:
order_at.date()

datetime.date(2022, 9, 16)

In [58]:
order_at.time()

datetime.time(2, 30, 12, 123)

In [59]:
now = datetime.datetime.now()
now

datetime.datetime(2023, 8, 11, 15, 18, 47, 59425)

### key methods

In [60]:
date_combined = datetime.datetime.combine(date=today, time=four_thirty)
date_combined

datetime.datetime(2023, 8, 11, 16, 30, 0, 20)

In [61]:
str(datetime.datetime.combine(date=today, time=four_thirty))

'2023-08-11 16:30:00.000020'

### isoformat

In [62]:
# returns a string in a standardised form ISO 8601 format
today.isoformat()

'2023-08-11'

In [63]:
now.isoformat()

'2023-08-11T15:18:47.059425'

In [64]:
date_combined.isoformat()

'2023-08-11T16:30:00.000020'

In [65]:
datetime.date.fromisoformat('2022-04-02')

datetime.date(2022, 4, 2)

### more formats

In [66]:
# differents formats with strftime
for format_str in [
    '%a', '%A', '%w', '%d', '%b', '%B',
    '%m', '%y', '%Y', '%H', '%I', '%p',
    '%M', '%S', '%f', '%z', '%j', '%U',
    '%W', '%c', '%x', '%X', '%%',]:
    print(f'now with format {format_str} is {now.strftime(format_str)}')

now with format %a is Fri
now with format %A is Friday
now with format %w is 5
now with format %d is 11
now with format %b is Aug
now with format %B is August
now with format %m is 08
now with format %y is 23
now with format %Y is 2023
now with format %H is 15
now with format %I is 03
now with format %p is PM
now with format %M is 18
now with format %S is 47
now with format %f is 059425
now with format %z is 
now with format %j is 223
now with format %U is 32
now with format %W is 32
now with format %c is Fri Aug 11 15:18:47 2023
now with format %x is 08/11/23
now with format %X is 15:18:47
now with format %% is %


In [67]:
# if you can specify the format you can convert from str to datetime
datetime.datetime.strptime('Sunday-24-April----17:23:54  2022', '%A-%d-%B----%X  %Y')

datetime.datetime(2022, 4, 24, 17, 23, 54)

### replace

In [68]:
today.replace(year=today.year-1, month=8) # last year using replace

datetime.date(2022, 8, 11)

In [69]:
now

datetime.datetime(2023, 8, 11, 15, 18, 47, 59425)

In [70]:
now.replace(year= 2022, month=2)

datetime.datetime(2022, 2, 11, 15, 18, 47, 59425)

### timedelta

In [71]:
delta = datetime.timedelta(
    days=50,
    seconds=27,
    microseconds=10,
    milliseconds=29000,
    minutes=5,
    hours=8,
    weeks=2
)

In [72]:
delta # only days, seconds, and microseconds remain

datetime.timedelta(days=64, seconds=29156, microseconds=10)

In [73]:
datetime.timedelta(days=50, weeks=2)

datetime.timedelta(days=64)

In [74]:
delta.total_seconds()

5558756.00001

In [75]:
year = datetime.timedelta(days=365)
another_year = datetime.timedelta(weeks=40, days=84, hours=23,
                                 minutes=50, seconds=600)
another_year, year

(datetime.timedelta(days=365), datetime.timedelta(days=365))

## Challenge 2
#### Write a function days_until_next_birthday taking a month and a day and returning number of days untill next birthday

In [76]:
def days_untill_next_birthday(month:int, day:int) -> int:
    today = datetime.date.today()
    birthday = today.replace(month=month, day=day)
    if birthday < today:
        birthday = birthday.replace(year=birthday.year + 1)
    return (birthday - today).days

In [77]:
days_untill_next_birthday(month=7, day=1)

325

## collections

<table>
    <thead>
        <tr>
            <th>Name</th>
            <th>Description</th>
        </tr>
    </thead>
    <tbody>
        <tr>
            <td>namedtuple()</td>
            <td>factory function for creating tuple subclasses with named fields</td>
        </tr>
        <tr>
            <td>deque</td>
            <td>list-like container with fast appends and pops on either end</td>
        </tr>
        <tr>
            <td>ChainMap</td>
            <td>dict-like class for creating a single view of multiple mappings</td>
        </tr>
        <tr>
            <td>Counter</td>
            <td>dict subclass for counting hashable objects</td>
        </tr>
        <tr>
            <td>OrderedDict</td>
            <td>dict subclass that remembers the order entries were added</td>
        </tr>
        <tr>
            <td>defaultdict</td>
            <td>dict subclass that calls a factory function to supply missing values</td>
        </tr>
        <tr>
            <td>UserDict</td>
            <td>wrapper around dictionary objects for easier dict subclassing</td>
        </tr>
        <tr>
            <td>UserList</td>
            <td>wrapper around list objects for easier list subclassing</td>
        </tr>
        <tr>
            <td>UserString</td>
            <td>wrapper around string objects fro easier string subclassing</td>
        </tr>
    </tbody>
</table>

In [78]:
from collections import Counter
from collections import deque
from collections import defaultdict
from collections import namedtuple

### Counter
#### A Counter is a dict subclass for counting hashable objects. It's a collection where elements are stored as dictionary key and their and their counts are stored as dictionary values 

In [79]:
# from a string(iterable)
counter = Counter("misissippi")
counter

Counter({'m': 1, 'i': 4, 's': 3, 'p': 2})

In [80]:
# from list
animals = Counter(["cat", "dog", "cat", "gold fish", "dog"])
animals

Counter({'cat': 2, 'dog': 2, 'gold fish': 1})

In [81]:
# from dict
counter = Counter({"cat": 3, "dog": 2, "gold fish": 1})
counter

Counter({'cat': 3, 'dog': 2, 'gold fish': 1})

In [82]:
# missing elements have 0
counter["sharks"]

0

In [83]:
# we can go back to the elements
list(counter.elements())

['cat', 'cat', 'cat', 'dog', 'dog', 'gold fish']

In [84]:
# finding the most common
counter.most_common(2)

[('cat', 3), ('dog', 2)]

In [85]:
another_counter = Counter({"cat": 13, "dog": 22})

In [86]:
counter + another_counter

Counter({'cat': 16, 'dog': 24, 'gold fish': 1})

In [87]:
counter

Counter({'cat': 3, 'dog': 2, 'gold fish': 1})

In [88]:
# you can increment the values like so:
counter["dog"] += 1

In [89]:
counter

Counter({'cat': 3, 'dog': 3, 'gold fish': 1})

### deque
#### Deque support thread-safe, memory efficient appends and pops from either side of the deque with approximatelythe same O(1) performance in either direction

In [90]:
example_deque = deque(range(5))

In [91]:
example_deque

deque([0, 1, 2, 3, 4])

In [92]:
example_deque.append(5)
example_deque.appendleft(-1)
example_deque

deque([-1, 0, 1, 2, 3, 4, 5])

In [93]:
type(example_deque)

collections.deque

In [94]:
example_deque.extend([6, 17, 8])
example_deque.extendleft([-2, -3, -4]) # note the ordering
example_deque

deque([-4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 17, 8])

In [95]:
example_deque.index(3)

7

In [96]:
# This is inplace
# meaning the object is edited directly instead of creating a new reversed deque
example_deque.reverse()

In [97]:
example_deque

deque([8, 17, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4])

### deque with maxlen

In [98]:
limited_deque = deque(range(5), maxlen=5)
limited_deque

deque([0, 1, 2, 3, 4])

In [99]:
# this will pushes off the 0
limited_deque.append(5)
limited_deque

deque([1, 2, 3, 4, 5])

In [100]:
# this will pushes off the the 5 at the other end
limited_deque.appendleft(0)
limited_deque

deque([0, 1, 2, 3, 4])

In [101]:
limited_deque.extend([5, 6, 7])
limited_deque

deque([3, 4, 5, 6, 7])

In [102]:
limited_deque.extendleft([-5, -6, -7])
limited_deque

deque([-7, -6, -5, 3, 4])

In [103]:
# this rotates elements n steps
limited_deque.rotate(2)
limited_deque

deque([3, 4, -7, -6, -5])

In [104]:
# we can access the maxlen
limited_deque.maxlen

5

In [105]:
# we can clear the deque
limited_deque.clear()
limited_deque

deque([])

In [106]:
# note if the iterable is longer than the maxlen, we only get the last n elements
deque([1, 2, 3, 4, 5], maxlen=3)

deque([3, 4, 5])

### defaultdict

In [107]:
sentence = (
    "Imagine we want to take a sentence and store words in lists in"
    " a dictionary keyed on the letter that each word starts with"
)

sentence

'Imagine we want to take a sentence and store words in lists in a dictionary keyed on the letter that each word starts with'

In [108]:
words_by_starting_letter = {}
for word in sentence.split(" "):
    if word[0] not in words_by_starting_letter:
        words_by_starting_letter[word[0]] = [word]
    else: # we know it's a list so append
        words_by_starting_letter[word[0]].append(word)
        
words_by_starting_letter

{'I': ['Imagine'],
 'w': ['we', 'want', 'words', 'word', 'with'],
 't': ['to', 'take', 'the', 'that'],
 'a': ['a', 'and', 'a'],
 's': ['sentence', 'store', 'starts'],
 'i': ['in', 'in'],
 'l': ['lists', 'letter'],
 'd': ['dictionary'],
 'k': ['keyed'],
 'o': ['on'],
 'e': ['each']}

In [109]:
# using defaultdict to the same as above
words_by_starting_letter = defaultdict(list)
for word in sentence.split(" "):
    words_by_starting_letter[word[0]].append(word)
words_by_starting_letter

defaultdict(list,
            {'I': ['Imagine'],
             'w': ['we', 'want', 'words', 'word', 'with'],
             't': ['to', 'take', 'the', 'that'],
             'a': ['a', 'and', 'a'],
             's': ['sentence', 'store', 'starts'],
             'i': ['in', 'in'],
             'l': ['lists', 'letter'],
             'd': ['dictionary'],
             'k': ['keyed'],
             'o': ['on'],
             'e': ['each']})

In [110]:
# An example from the official docs
s = 'mississippi'
letter_counts = defaultdict(int)
for char in s:
    letter_counts[char] += 1
    
letter_counts

defaultdict(int, {'m': 1, 'i': 4, 's': 4, 'p': 2})

### named tuple

#### named tuples assign meaning to each position in a tuple and allow for more readable, self dcoumenting code

In [111]:
student_data

[{'name': 'John Smith',
  'age': 38,
  'on_vacation': False,
  'test_scores': [33, 55, 66, 88, 96]},
 {'name': 'Ashley Doe',
  'age': 38,
  'on_vacation': False,
  'test_scores': [98, 14, 25, 35, 69]},
 {'name': 'Bull Smith',
  'age': 86,
  'on_vacation': True,
  'test_scores': [96, 75, 25, 36, 65]}]

In [112]:
student = ('Alpha Bravo-Delta', 30, True, [[100, 56, 87, 65, 89]])

In [113]:
def display_student(student):
    print(f"Student {student[0]} is {student[1]} years old and has test scores {student[3]}")

In [114]:
display_student(student)

Student Alpha Bravo-Delta is 30 years old and has test scores [[100, 56, 87, 65, 89]]


In [115]:
# student_named_tuple = namedtuple("Student", "name age on_vacation test_scores")

# same declaration as abovd
student_named_tuple = namedtuple("Student", ["name", "age", "on_vacation", "test_scores"]) 

In [116]:
simon = student_named_tuple(
    name = "Charle X-Ray",
    age = 30,
    on_vacation = True,
    test_scores = [100, 56, 87, 65, 89]
)

In [117]:
simon[2]

True

In [118]:
simon.on_vacation

True

In [119]:
# better implementation than above display_student()
def display_student(student):
    print(f"Student {student.name} is {student.age} years old and has test scores {student.test_scores}")

In [120]:
display_student(simon)

Student Charle X-Ray is 30 years old and has test scores [100, 56, 87, 65, 89]


In [121]:
# we can change simon attributes by _replace
older_simon = simon._replace(age=31)
older_simon # new instance

Student(name='Charle X-Ray', age=31, on_vacation=True, test_scores=[100, 56, 87, 65, 89])

In [122]:
simon

Student(name='Charle X-Ray', age=30, on_vacation=True, test_scores=[100, 56, 87, 65, 89])

In [123]:
type(older_simon)

__main__.Student

In [124]:
simon._fields

('name', 'age', 'on_vacation', 'test_scores')

In [125]:
# we can also do the samething using the typing.NamedTuple
from typing import NamedTuple, List
class Student(NamedTuple):
    name: str
    age: int
    on_vacation: bool
    test_scores: List[int]

In [126]:
simon = Student(
    name = "Charle X-Ray",
    age = 30,
    on_vacation = True,
    test_scores = [100, 56, 87, 65, 89]
)

In [127]:
simon

Student(name='Charle X-Ray', age=30, on_vacation=True, test_scores=[100, 56, 87, 65, 89])

In [128]:
student_data

[{'name': 'John Smith',
  'age': 38,
  'on_vacation': False,
  'test_scores': [33, 55, 66, 88, 96]},
 {'name': 'Ashley Doe',
  'age': 38,
  'on_vacation': False,
  'test_scores': [98, 14, 25, 35, 69]},
 {'name': 'Bull Smith',
  'age': 86,
  'on_vacation': True,
  'test_scores': [96, 75, 25, 36, 65]}]

In [129]:
students = [Student._make(student.values()) for student in student_data]

In [130]:
students

[Student(name='John Smith', age=38, on_vacation=False, test_scores=[33, 55, 66, 88, 96]),
 Student(name='Ashley Doe', age=38, on_vacation=False, test_scores=[98, 14, 25, 35, 69]),
 Student(name='Bull Smith', age=86, on_vacation=True, test_scores=[96, 75, 25, 36, 65])]

In [131]:
# CHallenge 3
# Create a namedtuple called Point with an x and y attributes to represent points on a grid
# Create a list with 100 random points x-values in 1,2,3 and y-values in 1, 2, 3
# Find the most common point in the list
from typing import NamedTuple
import random

class Point(NamedTuple):
    x_value: int
    y_value: int
        

# random_points = []
# for i in range(500):
#     random_point = Point(
#     x_value= random.randint(0, 9),
#     y_value= random.randint(0, 9)
#     )
#     random_points.append(random_point)
random_points = [Point(random.randint(0, 9), random.randint(0, 9)) for _ in range(500)]
# random_points

In [132]:
# random_dict = {}
# for i in random_points:
#     if i not in random_dict:
#         random_dict[i] = 1
#     else:
#         random_dict[i] += 1
# random_dict

In [133]:
# random_dict
# random_points

In [134]:
counter = Counter(random_points)
counter

Counter({Point(x_value=3, y_value=1): 4,
         Point(x_value=4, y_value=9): 4,
         Point(x_value=1, y_value=5): 13,
         Point(x_value=0, y_value=5): 5,
         Point(x_value=4, y_value=4): 3,
         Point(x_value=3, y_value=3): 3,
         Point(x_value=5, y_value=6): 9,
         Point(x_value=6, y_value=7): 7,
         Point(x_value=7, y_value=3): 5,
         Point(x_value=7, y_value=9): 6,
         Point(x_value=9, y_value=8): 6,
         Point(x_value=9, y_value=9): 5,
         Point(x_value=6, y_value=4): 2,
         Point(x_value=3, y_value=4): 9,
         Point(x_value=4, y_value=3): 4,
         Point(x_value=7, y_value=4): 3,
         Point(x_value=2, y_value=1): 6,
         Point(x_value=5, y_value=7): 9,
         Point(x_value=2, y_value=4): 7,
         Point(x_value=6, y_value=9): 3,
         Point(x_value=6, y_value=8): 3,
         Point(x_value=2, y_value=8): 5,
         Point(x_value=8, y_value=5): 4,
         Point(x_value=3, y_value=2): 6,
         Point(

In [135]:
# counter, count = Counter(random_dict).most_common(1)[0]
counter, count = Counter(random_points).most_common(1)[0]
print(f"The most common appears {count} times")
counter

The most common appears 13 times


Point(x_value=1, y_value=5)

### itertools

#### This module implements a number of iterator building blocks!

In [136]:
import itertools

## Quick asside -> What is an iterable and what is an iterator and how a for loop works

#### An iterator is an object that implements the iterator protocol. In other words, an iterator is an object that implements the following methods:
#### \__iter__  returns the iterator object itself
#### \__next__ returns the next element
#### It means you can not use use the iterator object again.

#### An itearable is an object that you can iterate over.
#### An object is iterable when it implements the __iter__ method. And its __iter__ method returns a new iterator

In [137]:
# An iterator implements two very special functions
# __iter__ -> must return an iterable
# __next__ -> steps through iterator

In [138]:
student_data

[{'name': 'John Smith',
  'age': 38,
  'on_vacation': False,
  'test_scores': [33, 55, 66, 88, 96]},
 {'name': 'Ashley Doe',
  'age': 38,
  'on_vacation': False,
  'test_scores': [98, 14, 25, 35, 69]},
 {'name': 'Bull Smith',
  'age': 86,
  'on_vacation': True,
  'test_scores': [96, 75, 25, 36, 65]}]

In [139]:
student_data.__iter__

<method-wrapper '__iter__' of list object at 0x0000017BD1A4FC80>

In [140]:
student_data.__iter__()

<list_iterator at 0x17bd1a189a0>

In [141]:
iter(student_data)

<list_iterator at 0x17bd1a18310>

In [142]:
student_data_iterator = iter(student_data)
student_data_iterator

<list_iterator at 0x17bd1a4aca0>

In [143]:
student_data_iterator.__next__

<method-wrapper '__next__' of list_iterator object at 0x0000017BD1A4ACA0>

In [144]:
# call the next iterator
student_data_iterator.__next__()

{'name': 'John Smith',
 'age': 38,
 'on_vacation': False,
 'test_scores': [33, 55, 66, 88, 96]}

In [145]:
# you can also call the next iterator with nex()
next(student_data_iterator)

{'name': 'Ashley Doe',
 'age': 38,
 'on_vacation': False,
 'test_scores': [98, 14, 25, 35, 69]}

In [146]:
# you can also call the next iterator with nex()
next(student_data_iterator)

{'name': 'Bull Smith',
 'age': 86,
 'on_vacation': True,
 'test_scores': [96, 75, 25, 36, 65]}

In [147]:
# will throw error because it has finished iterating
next(student_data_iterator)

StopIteration: 

In [148]:
# summary of for loop
student_data_iterator = iter(student_data)
while True:
    try:
        item = next(student_data_iterator)
        # This is now the same as the for block
        print(item)
    except:
        break

{'name': 'John Smith', 'age': 38, 'on_vacation': False, 'test_scores': [33, 55, 66, 88, 96]}
{'name': 'Ashley Doe', 'age': 38, 'on_vacation': False, 'test_scores': [98, 14, 25, 35, 69]}
{'name': 'Bull Smith', 'age': 86, 'on_vacation': True, 'test_scores': [96, 75, 25, 36, 65]}


## itertools is fun

### Chain

In [149]:
itertools.chain(([1,2,3], [4,5,6], [7,8,9])) # note lazy

<itertools.chain at 0x17bd1a4abb0>

In [150]:
for i in itertools.chain([1,2,3], [4,5,6], [7,8,9]):
    print(i)

1
2
3
4
5
6
7
8
9


### Chain from iterable

In [151]:
# iterable from iterables
for i in itertools.chain.from_iterable([[1,2,3], [4,5,6], [7,8,9]]):
    print(i)

1
2
3
4
5
6
7
8
9


### Count

In [152]:
# similar to range but when you don't how many you want
for n in itertools.count(start=1, step=1):
    print(f"n = {n}")
    if n==5:
        break

n = 1
n = 2
n = 3
n = 4
n = 5


In [153]:
# less code than this
n = 1
while n < 6:
    print(f"n = {n}")
    if n == 5:
        break
    n += 1

n = 1
n = 2
n = 3
n = 4
n = 5


### Combinations

In [154]:
list(itertools.combinations([1,2,3,4], 2))

[(1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)]

In [155]:
list(itertools.combinations_with_replacement([1,2,3,4],2))

[(1, 1),
 (1, 2),
 (1, 3),
 (1, 4),
 (2, 2),
 (2, 3),
 (2, 4),
 (3, 3),
 (3, 4),
 (4, 4)]

### Cycle

In [156]:
# itertools.cycle(['A', 'B', 'C']) -> 'A', 'B', 'C', 'A', 'B', 'C', 'A', ...

In [157]:
import random
def player_move(player):
    if random.randint(1, 4) == 1:
        print(f"Player {player} moved and won!")
        return True
    else:
        print(f"Player {player} moved!")
        return False

In [158]:
for player in itertools.cycle(["A", "B"]):
    if player_move(player):
        break

Player A moved and won!


### Groupby

In [159]:
list(itertools.groupby("MISSISSIPPI"))

[('M', <itertools._grouper at 0x17bd1a53dc0>),
 ('I', <itertools._grouper at 0x17bd1a53220>),
 ('S', <itertools._grouper at 0x17bd1a532e0>),
 ('I', <itertools._grouper at 0x17bd1a538b0>),
 ('S', <itertools._grouper at 0x17bd1a535e0>),
 ('I', <itertools._grouper at 0x17bd1a53850>),
 ('P', <itertools._grouper at 0x17bd1a53ca0>),
 ('I', <itertools._grouper at 0x17bd1a53730>)]

In [160]:
for key, group in itertools.groupby("MISSISSIPPI"):
    print(f"A group of {key} with {list(group)}")

A group of M with ['M']
A group of I with ['I']
A group of S with ['S', 'S']
A group of I with ['I']
A group of S with ['S', 'S']
A group of I with ['I']
A group of P with ['P', 'P']
A group of I with ['I']


### islice

In [161]:
list(itertools.islice([1,2,3,4,5], 1, None, 2))

[2, 4]

In [162]:
long_list = list(range(1_000_000))

In [163]:
import sys

def convert_bytes(size):
    for x in ['bytes', 'KB', 'MB', 'GB', 'TB']:
        if size < 1024.0:
            return "%3.1f %s" % (size, x)
        size /= 1024.0
    return size

In [164]:
convert_bytes(sys.getsizeof(long_list))

'7.6 MB'

In [165]:
convert_bytes(sys.getsizeof(long_list[::2]))

'3.8 MB'

In [166]:
# doesn't create a new object in memory
convert_bytes(sys.getsizeof(itertools.islice(long_list, 1, None, 2)))

'72.0 bytes'

### permutations

In [167]:
# order is important. i.e. order matters
list(itertools.permutations([1,2,3],3))

[(1, 2, 3), (1, 3, 2), (2, 1, 3), (2, 3, 1), (3, 1, 2), (3, 2, 1)]

In [168]:
list(itertools.combinations([1,2,3],3))

[(1, 2, 3)]

In [169]:
list(itertools.combinations_with_replacement([1,2,3],3))

[(1, 1, 1),
 (1, 1, 2),
 (1, 1, 3),
 (1, 2, 2),
 (1, 2, 3),
 (1, 3, 3),
 (2, 2, 2),
 (2, 2, 3),
 (2, 3, 3),
 (3, 3, 3)]

### product

In [170]:
list(itertools.product([1,2,3],[4, 5]))

[(1, 4), (1, 5), (2, 4), (2, 5), (3, 4), (3, 5)]

In [171]:
# can also repeat arg which is useful
list(itertools.product([1,2,3],[4, 5], repeat=2))

[(1, 4, 1, 4),
 (1, 4, 1, 5),
 (1, 4, 2, 4),
 (1, 4, 2, 5),
 (1, 4, 3, 4),
 (1, 4, 3, 5),
 (1, 5, 1, 4),
 (1, 5, 1, 5),
 (1, 5, 2, 4),
 (1, 5, 2, 5),
 (1, 5, 3, 4),
 (1, 5, 3, 5),
 (2, 4, 1, 4),
 (2, 4, 1, 5),
 (2, 4, 2, 4),
 (2, 4, 2, 5),
 (2, 4, 3, 4),
 (2, 4, 3, 5),
 (2, 5, 1, 4),
 (2, 5, 1, 5),
 (2, 5, 2, 4),
 (2, 5, 2, 5),
 (2, 5, 3, 4),
 (2, 5, 3, 5),
 (3, 4, 1, 4),
 (3, 4, 1, 5),
 (3, 4, 2, 4),
 (3, 4, 2, 5),
 (3, 4, 3, 4),
 (3, 4, 3, 5),
 (3, 5, 1, 4),
 (3, 5, 1, 5),
 (3, 5, 2, 4),
 (3, 5, 2, 5),
 (3, 5, 3, 4),
 (3, 5, 3, 5)]

### zip longest

In [172]:
list(zip([1, 2], ['a', 'b', 'c']))

[(1, 'a'), (2, 'b')]

In [173]:
list(itertools.zip_longest([1, 2], ['a', 'b', 'c']))

[(1, 'a'), (2, 'b'), (None, 'c')]

In [174]:
# also you might want an error thrown
list(zip([1, 2], ['a', 'b', 'c'], strict=True))

TypeError: zip() takes no keyword arguments

## challenge 4

In [175]:
# how many times does each number appear in the multiples of 3 less than 100
three_times_table = (str(number) for number in itertools.islice(range(100), 3, None, 3))
list(three_times_table)
# Counter(itertools.chain.from_iterable(three_times_table))

['3',
 '6',
 '9',
 '12',
 '15',
 '18',
 '21',
 '24',
 '27',
 '30',
 '33',
 '36',
 '39',
 '42',
 '45',
 '48',
 '51',
 '54',
 '57',
 '60',
 '63',
 '66',
 '69',
 '72',
 '75',
 '78',
 '81',
 '84',
 '87',
 '90',
 '93',
 '96',
 '99']

In [176]:
# how many times does each number appear in the multiples of 3 less than 100
three_times_table = (str(number) for number in itertools.islice(range(100), 3, None, 3))
# list(three_times_table)
Counter(itertools.chain.from_iterable(three_times_table))

Counter({'3': 8,
         '6': 8,
         '9': 8,
         '1': 6,
         '2': 6,
         '5': 6,
         '8': 6,
         '4': 6,
         '7': 6,
         '0': 3})

## functools

#### The functools module is for highe-order functions: functions that act on or return other functions.
#### In general, any callable object can be treated as a function for the purposes of this module

In [177]:
import functools

### lru_cache, cache
#### Last recent cache - is a decorator to wrap a function with a memoizing callable that saves up to the maxsize most recent calls

In [178]:
def factorial(n):
    return n * factorial(n-1) if n else 1

In [179]:
@functools.lru_cache(maxsize=None)
def fast_factorial(n):
    return n * factorial(n-1) if n else 1

In [180]:
%timeit factorial(200)

44.4 µs ± 2.94 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [181]:
%timeit fast_factorial(200)

94.2 ns ± 7.48 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


In [182]:
# in python 3.9 + - no maxsize
@functools.cache
def fast_factorial_cache(n):
    return n * factorial(n-1) if n else 1
%timeit fast_factorial_cache(200)

89.8 ns ± 6.46 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


## total ordering

In [184]:
class Student:
    def __init__(self, name:str, age:int, test_scores:List[int], on_vacation:bool=False):
        self.name = name
        self.age = age
        self.test_scores = test_scores
        self.on_vaction = on_vacation
        
    def __repr__(self):
        return f"Student(name={self.name})"
    
    @property
    def mean_test_score(self):
        return sum(x for x in self.test_scores) / len(self.test_scores)

In [189]:
student_data

[{'name': 'John Smith',
  'age': 38,
  'on_vacation': False,
  'test_scores': [33, 55, 66, 88, 96]},
 {'name': 'Ashley Doe',
  'age': 38,
  'on_vacation': False,
  'test_scores': [98, 14, 25, 35, 69]},
 {'name': 'Bull Smith',
  'age': 86,
  'on_vacation': True,
  'test_scores': [96, 75, 25, 36, 65]}]

In [193]:
john = Student(**student_data[0])
newton = Student(**student_data[2])

In [194]:
john, newton

(Student(name=John Smith), Student(name=Bull Smith))

In [195]:
# this will throw error
john < newton

TypeError: '<' not supported between instances of 'Student' and 'Student'

In [203]:
class Student:
    def __init__(self, name:str, age:int, test_scores:List[int], on_vacation:bool=False):
        self.name = name
        self.age = age
        self.test_scores = test_scores
        self.on_vaction = on_vacation
        
    def __repr__(self):
        return f"Student(name={self.name})"
    
    @property
    def mean_test_score(self):
        return sum(x for x in self.test_scores) / len(self.test_scores)
    
    # add all these special methods
    def __lt__(self, other:Student):
        return self.mean_test_score < other.mean_test_score
    
    def __le__(self, other:Student):
        return self.mean_test_score <= other.mean_test_score
    
    def __gt__(self, other:Student):
        return self.mean_test_score > other.mean_test_score
    
    def __ge__(self, other:Student):
        return self.mean_test_score >= other.mean_test_score
    
    def __eq__(self, other:Student):
        return self.mean_test_score == other.mean_test_score

In [205]:
john = Student(**student_data[0])
newton = Student(**student_data[2])

In [206]:
john > newton

True

In [207]:
john < newton

False

In [209]:
# sames as
john.__gt__(newton)

True

In [230]:
@functools.total_ordering # this will just make us define only a special methods in which it opposite is obvious
class StudentOrdering:
    def __init__(self, name:str, age:int, test_scores:List[int], on_vacation:bool=False):
        self.name = name
        self.age = age
        self.test_scores = test_scores
        self.on_vaction = on_vacation
        
    def __repr__(self):
        return f"Student(name={self.name})"
    
    @property
    def mean_test_score(self):
        return sum(x for x in self.test_scores) / len(self.test_scores)
    
    # add all these special methods
    def __lt__(self, other:Student):
        return self.mean_test_score < other.mean_test_score
    
#     def __le__(self, other:Student):
#         return self.mean_test_score <= other.mean_test_score
    
#     def __gt__(self, other:Student):
#         return self.mean_test_score > other.mean_test_score
    
#     def __ge__(self, other:Student):
        return self.mean_test_score >= other.mean_test_score
    
    def __eq__(self, other:Student):
        return self.mean_test_score == other.mean_test_score

In [231]:
john_ordering = StudentOrdering(**student_data[0])
newton_ordering = StudentOrdering(**student_data[2])

In [232]:
john_ordering < newton_ordering

False

## partial

In [237]:
def is_pass(student: Student, pass_mark=60):
    passed = student.mean_test_score > pass_mark
    print((f"{student.name} has a test score " 
          f"{'above' if passed else 'below'} {pass_mark} which is {student.mean_test_score}"))

In [238]:
is_pass(john)

John Smith has a test score above 60 which is 67.6


In [239]:
is_pass(newton)

Bull Smith has a test score below 60 which is 59.4


In [243]:
# we can redefine the is_pass condition above
def is_top_set(student: Student):
    return is_pass(student, 30)

In [244]:
is_top_set(newton)

Bull Smith has a test score above 30 which is 59.4


In [246]:
# another way of doing this using functools.partial
is_top_set = functools.partial(is_pass, pass_mark=40)
is_top_set(newton)

Bull Smith has a test score above 40 which is 59.4


In [247]:
# another example
from statistics import median

In [251]:
min([2, 1], [1, 2], [2, 3]) # this will compare x-axis

[1, 2]

In [253]:
min([(1, 2), (2, 1), (2, 3)], key=lambda item: item[1]) # key condition would make it compare y-axis

(2, 1)

In [254]:
min_student = functools.partial(min, key=lambda student: median(student.test_scores))

In [256]:
min_student(students)

Student(name='Ashley Doe', age=38, on_vacation=False, test_scores=[98, 14, 25, 35, 69])

## Challenge 5

In [259]:
# create a function called student_pairs to find all combinations of 2 students
student_pairs = functools.partial(itertools.combinations, r=2)
for student_a, student_b in student_pairs(students):
    print(student_a.name, student_b.name)

John Smith Ashley Doe
John Smith Bull Smith
Ashley Doe Bull Smith


# Fin