## Exploring Itertools

##### Summarized from https://www.youtube.com/watch?v=Qu3dThVy6KQ
##### Corey Schafer has done a wonderful job in introducing itertools through the video.

##### Prerequisites: Understand iterables and iterators in Python from Corey's video: https://youtu.be/jTYiNjvnHZY

#### Exploring count()

In [1]:
# Import itertools
import itertools

In [2]:
# Using itertools to create index - Case 1
data = [100,200,300,400,500,600,700,800]

indexed_data = list(zip(itertools.count(),data))

print(indexed_data)

[(0, 100), (1, 200), (2, 300), (3, 400), (4, 500), (5, 600), (6, 700), (7, 800)]


In [3]:
# Using itertools to create index - Case 2
data = [100,200,300,400,500,600,700,800]

indexed_data = list(zip(itertools.count(start=5,step=10),data))

print(indexed_data)

[(5, 100), (15, 200), (25, 300), (35, 400), (45, 500), (55, 600), (65, 700), (75, 800)]


In [4]:
# Using itertools to create index - Case 3
data = [100,200,300,400,500,600,700,800]

indexed_data = list(zip(itertools.count(start=5,step=-2.5),data))

print(indexed_data)

[(5, 100), (2.5, 200), (0.0, 300), (-2.5, 400), (-5.0, 500), (-7.5, 600), (-10.0, 700), (-12.5, 800)]


#### Exploring zip_longest()

In [5]:
data = [100,200,300,400,500,600,700,800]

In [6]:
# Using normal zip
data_2 = list(zip(range(10),data))
print(data_2)

[(0, 100), (1, 200), (2, 300), (3, 400), (4, 500), (5, 600), (6, 700), (7, 800)]


In [7]:
# Using zip_longest() from itertools
data_3 = list(itertools.zip_longest(range(10),data))
print(data_3)

[(0, 100), (1, 200), (2, 300), (3, 400), (4, 500), (5, 600), (6, 700), (7, 800), (8, None), (9, None)]


In [8]:
# zip_longest() runs till the longest iterator has values

#### Exploring cycle()

In [9]:
counter = itertools.cycle([1,2,3])

In [10]:
for i in range(10):
    print(next(counter))

1
2
3
1
2
3
1
2
3
1


#### Exploring repeat()

In [11]:
counter_repeat = itertools.repeat(2)

for i in range(10):
    print(next(counter_repeat))

2
2
2
2
2
2
2
2
2
2


In [12]:
counter_repeat = itertools.repeat(2, times = 5)

for i in range(10):
    print(next(counter_repeat))

2
2
2
2
2


StopIteration: 

In [13]:
# Usecase
squares = list(map(pow, range(10),itertools.repeat(2)))
print(squares)

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]


#### Exploring starmap()

In [14]:
# Usecase
squares_starmap = list(itertools.starmap(pow, [(0,2),(1,2),(2,2),(3,2),(4,2)]))
print(squares_starmap)

[0, 1, 4, 9, 16]


In [15]:
# It takes a list of tuples instead of two seperate lists

#### Exploring combinations()

In [16]:
names = ['Tomy','Bose','Cherian','Ashwin']

In [17]:
result = itertools.combinations(names,2)
for i in result:
    print(i)

('Tomy', 'Bose')
('Tomy', 'Cherian')
('Tomy', 'Ashwin')
('Bose', 'Cherian')
('Bose', 'Ashwin')
('Cherian', 'Ashwin')


#### Exploring permutations()

In [18]:
result2 = itertools.permutations(names,2)
for i in result2:
    print(i)

('Tomy', 'Bose')
('Tomy', 'Cherian')
('Tomy', 'Ashwin')
('Bose', 'Tomy')
('Bose', 'Cherian')
('Bose', 'Ashwin')
('Cherian', 'Tomy')
('Cherian', 'Bose')
('Cherian', 'Ashwin')
('Ashwin', 'Tomy')
('Ashwin', 'Bose')
('Ashwin', 'Cherian')


#### Exploring product()

In [19]:
# This is used when you need the permutations with replacement

In [20]:
result_product = itertools.product(names,repeat=2)
for i in result_product:
    print(i)

('Tomy', 'Tomy')
('Tomy', 'Bose')
('Tomy', 'Cherian')
('Tomy', 'Ashwin')
('Bose', 'Tomy')
('Bose', 'Bose')
('Bose', 'Cherian')
('Bose', 'Ashwin')
('Cherian', 'Tomy')
('Cherian', 'Bose')
('Cherian', 'Cherian')
('Cherian', 'Ashwin')
('Ashwin', 'Tomy')
('Ashwin', 'Bose')
('Ashwin', 'Cherian')
('Ashwin', 'Ashwin')


#### Exploring combinations_with_replacement()

In [21]:
result_comb_rep = itertools.combinations_with_replacement(names,2)
for i in result_comb_rep:
    print(i)

('Tomy', 'Tomy')
('Tomy', 'Bose')
('Tomy', 'Cherian')
('Tomy', 'Ashwin')
('Bose', 'Bose')
('Bose', 'Cherian')
('Bose', 'Ashwin')
('Cherian', 'Cherian')
('Cherian', 'Ashwin')
('Ashwin', 'Ashwin')


#### Exploring chain()

In [22]:
numbers = [1,2,3,4,5]
letters = ['a','b','c','d','e']
names = ['Tomy','Bose','Cherian','Ashwin']

In [23]:
combined = numbers + letters + names
# The problem with this approach is that the combining is done in memory and assigned to the new variable. 
# Hence, this will fail when handling large lists.

In [24]:
combined_new = itertools.chain(numbers, letters, names)
for i in combined_new:
    print(i)

1
2
3
4
5
a
b
c
d
e
Tomy
Bose
Cherian
Ashwin


#### Exploring islice()

In [25]:
# This function helps us perform slicing on an iterator

In [26]:
result_islice = itertools.islice(range(10),1,5)
for i in result_islice:
    print(i)

1
2
3
4


In [27]:
result_islice_2 = itertools.islice(range(10),1,5,2) # The new argument is the step
for i in result_islice_2:
    print(i)

1
3


In [28]:
# Usecase:
# When you want to read a few lines from a huge file, this function helps you do that without loading the entire contents
# of that file into memory.
# Note: The file object that is returned, when you open a file, is an iterator.

# Sample Code:
# with open('test.log','r') as f:
#     header = itertools.islice(f,3) # Returns the first 3 lines
    
#     for line in header:
#         print(line)
    

#### Exploring compress()

In [29]:
# This function helps us select/shortlist values from an iterable that have a 'True' value associated with them in the selector

In [30]:
letters = ['a','b','c','d','e']
selectors = [True, True, False, True, False]

In [31]:
result = itertools.compress(letters,selectors)
for i in result:
    print(i)

a
b
d


In [32]:
# Filter uses a function for shorlisting whereas 'compress' uses an iterable

#### Exploring filterfalse()

In [33]:
# Filter example:
def LessThan2(n):
    if n<2:
        return True
    else:
        return False

numbers = [1,2,3,4,5]
result_filter = filter(LessThan2,numbers)
for i in result_filter:
    print(i)

1


In [34]:
# filterfalse() example:

result_filter_false = itertools.filterfalse(LessThan2,numbers)
for i in result_filter_false:
    print(i)

2
3
4
5


#### Exploring dropwhile()

In [35]:
# Drops values from the iterable till it encounters a 'True'. Returns all the values after that.

In [36]:
numbers_new = [0,1,1,2,3,4,5,0,1,2,1,1,3]
result_dropwhile = itertools.dropwhile(LessThan2,numbers_new)
for i in result_dropwhile:
    print(i, end=" ")

2 3 4 5 0 1 2 1 1 3 

#### Exploring takewhile()

In [37]:
# Takes values from the iterable till it encounters a 'False'. Returns all the values it has up untill the first 'False'.

In [38]:
numbers_new2 = [0,1,0,0,1,3,4,5,0,1,2,1,1,3]
result_takewhile = itertools.takewhile(LessThan2,numbers_new2)
for i in result_takewhile:
    print(i, end=" ")

0 1 0 0 1 

#### Exploring accumulate()

In [39]:
# It takes an iterable and returns the accumulated sum of the elements that it sees. It uses addition by default.

In [40]:
print(numbers)

[1, 2, 3, 4, 5]


In [41]:
result_accumulate = itertools.accumulate(numbers)
for i in result_accumulate:
    print(i, end=" ")

1 3 6 10 15 

In [42]:
# Example using multiply operator
import operator
result_accumulate_mul = itertools.accumulate(numbers,operator.mul)
for i in result_accumulate_mul:
    print(i, end=" ")

1 2 6 24 120 

#### Exploring groupby()

In [43]:
# It goes through an iterable and group values based on a key.
# It returns a tuple. The first element is the key and the second element is an iterator with the values associated with that key

In [44]:
people = [
    {
        'name': 'John Doe',
        'city': 'Gotham',
        'state': 'NY'
    },
    {
        'name': 'Jane Doe',
        'city': 'Kings Landing',
        'state': 'NY'
    },
    {
        'name': 'Corey Schafer',
        'city': 'Boulder',
        'state': 'CO'
    },
    {
        'name': 'Al Einstein',
        'city': 'Denver',
        'state': 'CO'
    },
    {
        'name': 'John Henry',
        'city': 'Hinton',
        'state': 'WV'
    },
    {
        'name': 'Randy Moss',
        'city': 'Rand',
        'state': 'WV'
    },
    {
        'name': 'Nicole K',
        'city': 'Asheville',
        'state': 'NC'
    },
    {
        'name': 'Jim Doe',
        'city': 'Charlotte',
        'state': 'NC'
    },
    {
        'name': 'Jane Taylor',
        'city': 'Faketown',
        'state': 'NC'
    }
]

In [45]:
def get_state(person):
    return person['state']

In [46]:
person_group = itertools.groupby(people, get_state)

In [47]:
for key, group in person_group:
    print(key)
    for person in group:
        print(person)
    print()

NY
{'name': 'John Doe', 'city': 'Gotham', 'state': 'NY'}
{'name': 'Jane Doe', 'city': 'Kings Landing', 'state': 'NY'}

CO
{'name': 'Corey Schafer', 'city': 'Boulder', 'state': 'CO'}
{'name': 'Al Einstein', 'city': 'Denver', 'state': 'CO'}

WV
{'name': 'John Henry', 'city': 'Hinton', 'state': 'WV'}
{'name': 'Randy Moss', 'city': 'Rand', 'state': 'WV'}

NC
{'name': 'Nicole K', 'city': 'Asheville', 'state': 'NC'}
{'name': 'Jim Doe', 'city': 'Charlotte', 'state': 'NC'}
{'name': 'Jane Taylor', 'city': 'Faketown', 'state': 'NC'}



In [48]:
person_group = itertools.groupby(people, get_state)
for key, group in person_group:
    print(key,len(list(group)))

NY 2
CO 2
WV 2
NC 3


In [49]:
# Drawback: This method requires that the elements are sorted according to the key before they are fed to it.
# So effectively, the elements are already kinda grouped together before the groupby.

#### Exploring tee()

In [50]:
# Used for replicating iterators

In [51]:
person_group = itertools.groupby(people, get_state)
copy1, copy2 = itertools.tee(person_group)

In [52]:
# Note: After replicating an iterator, you are only supposed to use the copies and not the original one
# Otherwise there will be consequences like exhausting the items in the replicates

#### The End