# Collection Module

## `Counter`
* Special dictionary used for counting data, measuring frequency.

In [1]:
from collections import Counter

In [2]:
lst = ['dave', 'purvil', 'japan', 'dave', 'japan']

In [3]:
my_freq = Counter(lst)

In [4]:
my_freq

Counter({'dave': 2, 'purvil': 1, 'japan': 2})

* `most_common()` methods returns the counter values in descending order.

In [5]:
my_freq.most_common()

[('dave', 2), ('japan', 2), ('purvil', 1)]

In [6]:
my_freq.most_common(1)

[('dave', 2)]

### `defaultdict`
* We can pass a default type that every key will have even if it does not exist.
* Works exactly like dictionary.

In [7]:
from collections import defaultdict

numbers_by_name = defaultdict(list) # Default is list.

In [8]:
numbers_by_name['purvil']

[]

In [9]:
numbers_by_name['dave'].append(3)

In [10]:
numbers_by_name['dave']

[3]

### `ordereddict`
* Normal dictionary does not maintain order of keys before python 3.6 but after python 3.6 it does.

In [11]:
from collections import OrderedDict

* Using `OrderedDict` I can use `popitem()` method returns items in reverse insertion order.
*  Using `last = False` we can return items in insertion order.

### `namedtuple`
* Tuple where each position has name.
* Alternative to dict and pandas dataframe.

In [12]:
from collections import namedtuple

In [13]:
DateDetails = namedtuple('DateDetails', ['date', 'stop', 'riders'])

In [14]:
a = DateDetails('13/06/1955', 'Miramesa', 'Shailesh')

In [15]:
a

DateDetails(date='13/06/1955', stop='Miramesa', riders='Shailesh')

In [16]:
a.riders # We can access element as attribute

'Shailesh'

------------------

### Binary search and maintaining sorted list `bisect`
* `bisect` implements binary search and insertion in sorted list.
* `bisect.bisect` find location where element should be inserted to keep it sorted
* `bisect.insort` actually inserts the element into that location.
* bisect module functions do not check whether the list is sorted, as doing so would be expensive.

In [17]:
import bisect

In [18]:
c = [1,2,2,2,3,4,7]

In [19]:
bisect.bisect(c,2)

4

In [20]:
bisect.bisect(c,6)

6

In [21]:
bisect.insort(c, 5)

In [22]:
c

[1, 2, 2, 2, 3, 4, 5, 7]

### `os`

In [23]:
import os

In [24]:
wd = os.getcwd() # get name of current working directory

In [25]:
wd

'C:\\Users\\davep\\MY NOTESSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS\\Python'

In [26]:
os.listdir(wd)

['.git',
 '.ipynb_checkpoints',
 '1 Variable Type Operator.ipynb',
 '10 Files.ipynb',
 '11 Input Output.ipynb',
 '12 Module and Packages.ipynb',
 '13 Exception.ipynb',
 '2 Conditional  statement and Loop.ipynb',
 '3 Function.ipynb',
 '4 List.ipynb',
 '5 Dictionary.ipynb',
 '6 String.ipynb',
 '7 Tuple.ipynb',
 '8 Set.ipynb',
 '9 Class.ipynb',
 'Anaconda.ipynb',
 'beatles-discography.csv',
 'Collection_Module_and_others.ipynb',
 'compressedFile',
 'Date and Time.ipynb',
 'emp.pickle',
 'filename',
 'images',
 'JSON Pickle CSV.ipynb',
 'jsonData.txt',
 'Jupyter notebook.ipynb',
 'myModule.py',
 'myScript.py',
 'myText.txt',
 'Regular_Expressoin.ipynb',
 'Special Python Tools.ipynb',
 'Web_Scrapping.ipynb',
 '__pycache__']

Infinite iterators: count, cycle, repeat
Finite iterators: accumulate, chain, zip_longest, etc.

### DataClasses
* Kind of mutable version of NamedTuple
* It is python class with some function generated automatically for us
* It is immutable
* For python >= 3.7

In [2]:
import datetime
from dataclasses import dataclass
@dataclass
class StockPrice:
    symbol:str
    date: datetime.date
    closing_price: float
        
    def is_high_tech(self) -> bool:
        return self.symbol in ['MSFT', 'GOOG', 'FB', 'AMZN', 'AAPL']

In [3]:
price2 = StockPrice('MSFT', datetime.date(2018, 12, 14), 106.03)

In [4]:
price2.symbol

'MSFT'

In [5]:
price2.closing_price

106.03

In [6]:
price2.is_high_tech()

True

In [7]:
price2.closing_price /= 2

In [8]:
price2.closing_price

53.015

### tqdm
* Show progress bar for running computation

In [11]:
import tqdm
import random

* Iterable wrapped in `tqdm.tqdm` will produce a progress bar

In [12]:
for i in tqdm.tqdm(range(100)):
    # do something slow
    _ = [random.random() for _ in range(1000000)]

100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [00:11<00:00,  9.28it/s]


In [None]:
def primes_up_to(n: int):
    primes = [2]

    with tqdm.trange(3, n) as t:
        for i in t:
            # i is prime if no smaller prime divides it
            i_is_prime = not any(i % p == 0 for p in primes)
            if i_is_prime:
                primes.append(i)

            t.set_description(f"{len(primes)} primes")

    return primes

my_primes = primes_up_to(100_000)