### `pathlib` module

Windows OS use '\' for path system while Unix like OS use '/'. This introduces a lot of difficulties. 

In [1]:
import pathlib
pathlib.Path.cwd()

WindowsPath('c:/miniconda3/notebooks/My Notebooks')

In [2]:
pathlib.Path.home()

WindowsPath('C:/users/mayank')

In [11]:
a = pathlib.Path(pathlib.Path.cwd()).glob('*.ipynb')
list(a)[:6]

[WindowsPath('c:/miniconda3/notebooks/my notebooks/ML - Clustering - K-Means, DBSCAN and GMM.ipynb'),
 WindowsPath('c:/miniconda3/notebooks/my notebooks/ML - Logistic Regression.ipynb'),
 WindowsPath('c:/miniconda3/notebooks/my notebooks/ML - Naive Bayes.ipynb'),
 WindowsPath('c:/miniconda3/notebooks/my notebooks/ML - PCA.ipynb'),
 WindowsPath('c:/miniconda3/notebooks/my notebooks/ML - Regression.ipynb'),
 WindowsPath('c:/miniconda3/notebooks/my notebooks/ML - Support Vector Machines.ipynb')]

In [26]:
pathlib.Path('C:\\miniconda3\\notebooks\\path.txt') #it doesn't create a file if it doesn't exists.

WindowsPath('C:/miniconda3/notebooks/path.txt')

In [28]:
pathlib.Path(r'C:\miniconda3\notebooks\path.txt')

WindowsPath('C:/miniconda3/notebooks/path.txt')

In [20]:
from pathlib import Path, PureWindowsPath
path = PureWindowsPath("c:\\miniconda3\\notebooks\\path.txt")

In [21]:
path

PureWindowsPath('c:/miniconda3/notebooks/path.txt')

In [22]:
path.name

'path.txt'

In [23]:
path.suffix

'.txt'

In [25]:
path.parent

PureWindowsPath('c:/miniconda3/notebooks')

In [29]:
path.root

'\\'

#### Read the Content of a File as String with `pathlib.Path.read_text()`

In [2]:
p = pathlib.Path('file.txt')
p.read_text()

"my name is mayank\nI am an engineer\nI live in Delhi\nI'm married\nI like programming\nmayankk here!\nmayank here!!\na\naaa\nabab\naab\nababbb\nabcbca\nabracadabra\n\n\n"

### -> `os.path` module

In [11]:
import os.path
import os

In [13]:
import os
import os.path
a = os.getcwd()
print("CWD is: ", a)
b = os.path.basename('c:\\Users\\Mayank\\Desktop')
print("basename is: ", b)
b = os.path.dirname('c:\\Users\\Mayank\\Desktop')
print("dir name: ", b)
b = os.path.abspath('.')
print("abs path for cwd (.) is: ", b)
b = os.path.abspath('..')
print(" one step above(..): ", b)
b = os.path.join('/','base')
print("join of '/' and 'base': ", b)
b = os.path.join('\\','base')
print("join of '\\' and 'base': ", b)
# in shell o/p is diff from o/p from script
# shell o/p - '\\base', script o/p - '\base'


CWD is:  c:\miniconda3\notebooks\my notebooks
basename is:  Desktop
dir name:  c:\Users\Mayank
abs path for cwd (.) is:  c:\miniconda3\notebooks\my notebooks
 one step above(..):  c:\miniconda3\notebooks
join of '/' and 'base':  /base
join of '\' and 'base':  \base


### -> `collections` module

In [5]:
import collections

In [7]:
c = collections.Counter('abcababcddefffg')
c

Counter({'a': 3, 'b': 3, 'c': 2, 'd': 2, 'e': 1, 'f': 3, 'g': 1})

In [4]:
from collections import deque
d = deque()
d.extend('abc')
d.append('d')
d

deque(['a', 'b', 'c', 'd'])

In [17]:
d1 = deque()
d1.extend(range(5))
d1

deque([0, 1, 2, 3, 4])

In [18]:
d1.append(5)
d1

deque([0, 1, 2, 3, 4, 5])

In [19]:
d2 = deque()
d2.extendleft(range(5))
d2

deque([4, 3, 2, 1, 0])

In [20]:
d2.appendleft(5)
d2.append(-1)
d2

deque([5, 4, 3, 2, 1, 0, -1])

In [21]:
d1

deque([0, 1, 2, 3, 4, 5])

In [22]:
d1.pop()
d1

deque([0, 1, 2, 3, 4])

In [23]:
d1.popleft()
d1

deque([1, 2, 3, 4])

In [24]:
d

deque(['a', 'b', 'c', 'd'])

#### `deque.rotate()`

In [25]:
d.rotate(2)

In [26]:
d

deque(['c', 'd', 'a', 'b'])

In [27]:
d.rotate(-2)

In [28]:
d


deque(['a', 'b', 'c', 'd'])

#### Constraining the queue size
A `deque` instance can be configured with a maximum length so that it never grows beyond that size. When the queue reaches the specified length, existing items are discarded as new items are added. This behavior is useful for finding the last n items in a stream of undetermined length

In [29]:
d3 = deque(maxlen = 4)
for i in range(6):
    d3.append(i)
d3    

deque([2, 3, 4, 5])

### -> `itertools` module

In [1]:
import itertools as it
import operator

**`accumulate()`**

`itertools.accumulate(iterable[,func])`

In [2]:
data = [1,2,3,4,5]
result = it.accumulate(data, operator.mul)
for i in result:
    print(i)

1
2
6
24
120


In [3]:
data = [5,2,6,4,5,9,1]
result = it.accumulate(data, max)
for i in result:
    print(i)

5
5
6
6
6
9
9


In [5]:
result = it.accumulate(data) #if no function provided, items will be summed
for i in result:
    print(i)

5
7
13
17
22
31
32


**`combinations()`**

`itertools.combinations(iterable,r)`

In [6]:
shape = ['circle','triangle','squre']
result = it.combinations(shape,2)
for i in result:
    print(i)

('circle', 'triangle')
('circle', 'squre')
('triangle', 'squre')


**`combinations_with_replacement()`**

In [8]:
result = it.combinations_with_replacement(shape,2)
for i in result:
    print(i)

('circle', 'circle')
('circle', 'triangle')
('circle', 'squre')
('triangle', 'triangle')
('triangle', 'squre')
('squre', 'squre')


**`count()`**

`itertools.count(start=0, step = 1)`


In [9]:
result = it.count(10,3) #no end value
for i in result:
    if i > 25:
        break
    print(i)    

10
13
16
19
22
25


**`cycle()`**

`itertools.cycle(iterable)` - will cycle or loop through the iterable endlessly

In [12]:
data = [2,4,3]
for i,j in enumerate(it.cycle(data)):
    if i > 20:
        break
    print(i,j)    
    

0 2
1 4
2 3
3 2
4 4
5 3
6 2
7 4
8 3
9 2
10 4
11 3
12 2
13 4
14 3
15 2
16 4
17 3
18 2
19 4
20 3


**`chain()`**

`itertools.chain(*iterables)`

The `chain()` function takes several iterators as arguments and returns a single iterator that produces the contents of all of the inputs as though they came from a single iterator.

In [1]:
from itertools import *
for i in chain([1, 2, 3], ['a', 'b', 'c',['d']]):
    print(i, end=' ')


1 2 3 a b c ['d'] 

In [2]:
for i in chain(range(6), range(6,8)):
    print(i)

0
1
2
3
4
5
6
7


**`compress()`**

`itertools.compress(data,selectors)`

In [17]:
shape = ['a','b','c','d','e']
selector = [0,1,1,0,1]  #['True',....] can also be used

list(it.compress(shape, selector))

['b', 'c', 'e']

**`dropwhile()`**

`itertools.dropwhile(predicate, iterable)`

In [27]:
data = [1,2,3,4,5,6,7,8,9,1,2]
result = it.dropwhile(lambda x: x<5, data)
list(result)

[5, 6, 7, 8, 9, 1, 2]

Careful! `dropwhile` will keep dropping elements as long as predicate is true. Will return the rest elements as soon as predicate return `False`. 

In [28]:
data = [4,5,6,1]
result = it.dropwhile(lambda x: x<3, data)
list(result)

[4, 5, 6, 1]

In above example, the predicate returned `False` for the very first element in `data` so entire `data` is returned.

**`filterfalse()`**

`itertools.filterfalse(predicate, iterable)`

In [29]:
data = [1,2,3,4,5,6,7,8,9,1,2]
result = it.filterfalse(lambda x: x<5, data)
list(result)

[5, 6, 7, 8, 9]

**`islice()`**

`itertools.islice(iterable, start, stop[,step])`

In [33]:
list(it.islice(shape,2))

['a', 'b']

**`permutations()`**

`itertools.permutations(iterable, r = None)`

In [34]:
shape

['a', 'b', 'c', 'd', 'e']

In [36]:
result = it.permutations(shape,2)
for i in result:
    print(i)

('a', 'b')
('a', 'c')
('a', 'd')
('a', 'e')
('b', 'a')
('b', 'c')
('b', 'd')
('b', 'e')
('c', 'a')
('c', 'b')
('c', 'd')
('c', 'e')
('d', 'a')
('d', 'b')
('d', 'c')
('d', 'e')
('e', 'a')
('e', 'b')
('e', 'c')
('e', 'd')


**`product()`**

`itertools.product()`


In [37]:
num_data = [1,2,3]
alpha_data = ['a','b','c']

list(it.product(num_data, alpha_data))

[(1, 'a'),
 (1, 'b'),
 (1, 'c'),
 (2, 'a'),
 (2, 'b'),
 (2, 'c'),
 (3, 'a'),
 (3, 'b'),
 (3, 'c')]

**`repeat()`**

`itertools.repeat(object[,times])`

In [39]:
list(it.repeat('spam', 3)) #endless stream if no 'times' argument is skipped.

['spam', 'spam', 'spam']

**`starmap()`**

`itertools.starmap(function, iterable)`

In [42]:
data = [(1,2),(3,4),(5,6)]
list(it.starmap(operator.mul,data))

[2, 12, 30]

**`takewhile()`**

`itertools.takewhile(predicate, iterable)`

In [47]:
data = [1,2,3,4,5,1,2]
list(it.takewhile(lambda x: x <5, data))

[1, 2, 3, 4]

`takewhile()` is opposite of `dropwhile()`. It returns elements as long as predicate is true, after that it stops evaluating rest of the content. 

**`tee()`**

`itertools.tee(iterable,n =2)`

Returns `n` independent iterables from single iterable. 

In [48]:
data

[1, 2, 3, 4, 5, 1, 2]

In [51]:
for i in it.tee(data, 2):
    print(list(i))

[1, 2, 3, 4, 5, 1, 2]
[1, 2, 3, 4, 5, 1, 2]


**`zip_longest()`**

`itertools.zip_longest(*iterables, fillvalue = None)`

In [53]:
a = [1,2,3]
b = ['a','b']

list(it.zip_longest(a,b))

[(1, 'a'), (2, 'b'), (3, None)]

**`groupby()`**

`itertools.groupby(iterable, key=None)`

*more example needed*

In [55]:
data = [{'a':2,'b':3},{'a':2, 'b':4},{'a':4,'b':6}]

for key, group in it.groupby(data, key = lambda x:x['a']):
    print(key)
    print(list(group))



2
[{'a': 2, 'b': 3}, {'a': 2, 'b': 4}]
4
[{'a': 4, 'b': 6}]


### -> `string` module

In [43]:
import string


In [2]:
string.ascii_letters

'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'

In [4]:
string.capwords('a cde', sep = ' ')

'A Cde'

In [5]:
string.digits

'0123456789'

In [8]:
string.Formatter() #what does it do?

<string.Formatter at 0x344e890>

In [10]:
string.hexdigits

'0123456789abcdefABCDEF'

In [11]:
string.printable

'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c'

In [12]:
string.punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [14]:
string.Template #??

string.Template

In [15]:
string.whitespace

' \t\n\r\x0b\x0c'

### `bisect` module

In [18]:
import bisect

In [22]:
a = [1,2,4,4,4,5,8]
bisect.bisect_right(a, 4) #return index where '4' will be inserted in order to keep 'a' sorted. 

#Since there are 3 '4' already in list, it will return index of right most place where '4' will be placed

5

In [23]:
a = [1,2,4,4,6]
bisect.bisect_left(a,4)

2

In [26]:
bisect.insort_right(a, 4) #same as bisect.insort, insert 4 in 'a'
a

[1, 2, 4, 4, 4, 4, 4, 6]

### `datetime` module

In [71]:
import datetime as dt

#### `datetime.date`

In [72]:
d = dt.date(2018, 8,7)

In [61]:
d.year, d.month, d.day

(2018, 8, 7)

In [73]:
today = dt.date.today()
print(today)

2018-08-07


In [74]:
today.ctime()

'Tue Aug  7 00:00:00 2018'

In [75]:
print('ordinal:', today.toordinal())
print('Year   :', today.year)
print('Mon    :', today.month)
print('Day    :', today.day)

ordinal: 736913
Year   : 2018
Mon    : 8
Day    : 7


In [76]:
tt = today.timetuple()
tt

time.struct_time(tm_year=2018, tm_mon=8, tm_mday=7, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=1, tm_yday=219, tm_isdst=-1)

####  `datetime.datetime`

In [62]:
da = dt.datetime(2018, 8,7)

In [63]:
da.year, da.month, da.day, da.hour, da.minute, da.second

(2018, 8, 7, 0, 0, 0)

In [64]:
dt.MAXYEAR, dt.MINYEAR

(9999, 1)

In [79]:
print('Now    :', dt.datetime.now())
print('Today  :', dt.datetime.today())
print('UTC Now:', dt.datetime.utcnow())

Now    : 2018-08-07 16:08:50.114547
Today  : 2018-08-07 16:08:50.114547
UTC Now: 2018-08-07 10:38:50.114547


In [80]:
FIELDS = [
    'year', 'month', 'day',
    'hour', 'minute', 'second',
    'microsecond',
]

d = datetime.datetime.now()
for attr in FIELDS:
    print('{:15}: {}'.format(attr, getattr(d, attr)))

year           : 2018
month          : 8
day            : 7
hour           : 16
minute         : 11
second         : 14
microsecond    : 212000


#### `datetime.time`

In [68]:
t = dt.time(1,2,3)
print(t)
print('hour       :', t.hour)
print('minute     :', t.minute)
print('second     :', t.second)
print('microsecond:', t.microsecond)
print('tzinfo     :', t.tzinfo)

01:02:03
hour       : 1
minute     : 2
second     : 3
microsecond: 0
tzinfo     : None


In [70]:
import datetime

print('Earliest  :', datetime.time.min)
print('Latest    :', datetime.time.max)
print('Resolution:', datetime.time.resolution) #The resolution for time is limited to whole microseconds.

Earliest  : 00:00:00
Latest    : 23:59:59.999999
Resolution: 0:00:00.000001


#### `datetime.timedelta`

In [77]:
print('microseconds:', dt.timedelta(microseconds=1))
print('milliseconds:', dt.timedelta(milliseconds=1))
print('seconds     :', dt.timedelta(seconds=1))
print('minutes     :', dt.timedelta(minutes=1))
print('hours       :', dt.timedelta(hours=1))
print('days        :', dt.timedelta(days=1))
print('weeks       :', dt.timedelta(weeks=1))

microseconds: 0:00:00.000001
milliseconds: 0:00:00.001000
seconds     : 0:00:01
minutes     : 0:01:00
hours       : 1:00:00
days        : 1 day, 0:00:00
weeks       : 7 days, 0:00:00


In [78]:
import datetime
for delta in [datetime.timedelta(microseconds=1),
              datetime.timedelta(milliseconds=1),
              datetime.timedelta(seconds=1),
              datetime.timedelta(minutes=1),
              datetime.timedelta(hours=1),
              datetime.timedelta(days=1),
              datetime.timedelta(weeks=1),
              ]:
    print('{:15} = {:8} seconds'.format(
        str(delta), delta.total_seconds())
    )

0:00:00.000001  =    1e-06 seconds
0:00:00.001000  =    0.001 seconds
0:00:01         =      1.0 seconds
0:01:00         =     60.0 seconds
1:00:00         =   3600.0 seconds
1 day, 0:00:00  =  86400.0 seconds
7 days, 0:00:00 = 604800.0 seconds


In [83]:
import datetime
today = datetime.date.today()
dt = datetime.timedelta(days = 2)
today+dt

datetime.date(2018, 8, 9)

#### `datetime.timezone`

In [89]:
d = datetime.datetime.now()
print(datetime.timezone.utc, ':',
      d.astimezone(datetime.timezone.utc))

UTC : 2018-08-07 11:05:00.974906+00:00


In [90]:
d.astimezone()

datetime.datetime(2018, 8, 7, 16, 35, 0, 974906, tzinfo=datetime.timezone(datetime.timedelta(0, 19800), 'India Standard Time'))