# Iterators introduction

### Iterating with a for loop
* We can iterate over a list using a for loop
* We can iterate over a string using a for loop
* We can iterate over a range object using a for loop

In [1]:
employees = ['Nick', 'Lore', 'Hugo']

for employee in employees:
    print(employee)

Nick
Lore
Hugo


In [2]:
for letter in 'DataCamp':
    print(letter)

D
a
t
a
C
a
m
p


In [3]:
for i in range(4):
    print(i)

0
1
2
3


### Iterators vs. Iterables
* Iterable 
    * Examples: lists, strings, dictionaries, file connections
    * An object with an associated iter() method
    * Applying iter() to an iterable creates an Iterator
* Iterator 
    * Produces next value with next()



In [4]:
# Iterating over iterables: next()
word = 'Da'
it = iter(word)
print(next(it))
print(next(it))
print(next(it)) # Error

D
a


StopIteration: 

In [6]:
# Iterating all at once with *
word = 'Data'
it = iter(word)
print(*it)
print(*it) # would have to re-initilize for this to work

D a t a



In [7]:
# Iterating over dictionaries
pythonistas = {'hugo': 'bowne-anderson', 'francis': 'castro'}

for key, value in pythonistas.items():
    print(key, value)

hugo bowne-anderson
francis castro


In [8]:
# Iterating over file connections
file = open('file.txt')
it = iter(file)

print(next(it))
print(next(it))

This is the first line.

This is the second line.


In [10]:
# Exercise

flash1 = ['jay garrick', 'barry allen', 'wally west', 'bart allen']
flash2 = iter(flash1)
print(flash1)
print(next(flash2))
print(next(flash2))
print(next(flash2))
print(next(flash2))
#print(next(flash2))

['jay garrick', 'barry allen', 'wally west', 'bart allen']
jay garrick
barry allen
wally west
bart allen


In [11]:
# Exercise

# Create a list of strings: flash
flash = ['jay garrick', 'barry allen', 'wally west', 'bart allen']

# Print each list item in flash using a for loop
for person in flash:
    print(person)


# Create an iterator for flash: superspeed
superspeed = iter(flash)

# Print each item from the iterator
print(next(superspeed))
print(next(superspeed))
print(next(superspeed))
print(next(superspeed))

jay garrick
barry allen
wally west
bart allen
jay garrick
barry allen
wally west
bart allen


In [13]:
# Exercise

# Create an iterator for range(3): small_value
small_value = iter(range(3))

# Print the values in small_value
print('Small Value')
print(next(small_value))
print(next(small_value))
print(next(small_value))

# Loop over range(3) and print the values
print('Loop')
for num in range(3):
    print(num)


# Create an iterator for range(10 ** 100): googol
googol = iter(range(10 ** 100))

# Print the first 5 values from googol
print('First 5 values of googol')
print(next(googol))
print(next(googol))
print(next(googol))
print(next(googol))
print(next(googol))

Small Value
0
1
2
Loop
0
1
2
First 5 values of googol
0
1
2
3
4


In [14]:
# Exercise

# Create a range object: values
values = range(10,21)

# Print the range object
print(values)

# Create a list of integers: values_list
values_list = list(values)

# Print values_list
print(values_list)

# Get the sum of values: values_sum
values_sum = sum(values)

# Print values_sum
print(values_sum)

range(10, 21)
[10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
165


# Playing with Iterators

In [15]:
# Using enumerate()
avengers = ['hawkeye', 'iron man', 'thor', 'quicksilver']

e = enumerate(avengers)

print(type(e))
e_list = list(e)
print(e_list)

<class 'enumerate'>
[(0, 'hawkeye'), (1, 'iron man'), (2, 'thor'), (3, 'quicksilver')]


In [18]:
# enumerate() and unpack
avengers = ['hawkeye', 'iron man', 'thor', 'quicksilver']

print('Start 0:')
for index, value in enumerate(avengers):
    print(index, value)

print('Start 10:')
for index, value in enumerate(avengers, start=10):
    print(index, value)

Start 0:
0 hawkeye
1 iron man
2 thor
3 quicksilver
Start 10:
10 hawkeye
11 iron man
12 thor
13 quicksilver


In [19]:
# using zip()
avengers = ['hawkeye', 'iron man', 'thor', 'quicksilver']
names = ['barton', 'stark', 'odinson', 'maximoff']

z = zip(avengers, names)
z_list = list(z)
print(z_list)

[('hawkeye', 'barton'), ('iron man', 'stark'), ('thor', 'odinson'), ('quicksilver', 'maximoff')]


In [20]:
# zip() and unpack
avengers = ['hawkeye', 'iron man', 'thor', 'quicksilver']
names = ['barton', 'stark', 'odinson', 'maximoff']

for z1, z2 in zip(avengers, names):
    print(z1, z2)

hawkeye barton
iron man stark
thor odinson
quicksilver maximoff


In [21]:
# Print zip with *
avengers = ['hawkeye', 'iron man', 'thor', 'quicksilver']
names = ['barton', 'stark', 'odinson', 'maximoff']

z = zip(avengers, names)
print(*z)

('hawkeye', 'barton') ('iron man', 'stark') ('thor', 'odinson') ('quicksilver', 'maximoff')


In [24]:
# Exercise
# Create a list of strings: mutants
mutants = ['charles xavier', 
            'bobby drake', 
            'kurt wagner', 
            'max eisenhardt', 
            'kitty pryde']

# Create a list of tuples: mutant_list
mutant_list = list(enumerate(mutants))

# Print the list of tuples
print(':Mutant List:')
print(mutant_list)

# Unpack and print the tuple pairs
print(':Tuple pairs:')
for index1, value1 in enumerate(mutants):
    print(index1, value1)

# Change the start index
print(':Start at 1:')
for index2, value2 in enumerate(mutants, start=1):
    print(index2, value2)

:Mutant List:
[(0, 'charles xavier'), (1, 'bobby drake'), (2, 'kurt wagner'), (3, 'max eisenhardt'), (4, 'kitty pryde')]
:Tuple pairs:
0 charles xavier
1 bobby drake
2 kurt wagner
3 max eisenhardt
4 kitty pryde
:Start at 1:
1 charles xavier
2 bobby drake
3 kurt wagner
4 max eisenhardt
5 kitty pryde


In [31]:
# Exercise

mutants = ['charles xavier', 'bobby drake', 'kurt wagner', 'max eisenhardt', 'kitty pryde']
aliases = ['prof x', 'iceman', 'nightcrawler', 'magneto', 'shadowcat']
powers = ['telepathy', 'thermokinesis', 'teleportation', 'magnetokinesis', 'intangibility']

# Create a list of tuples: mutant_data
mutant_data = list(zip(mutants, aliases, powers))

# Print the list of tuples
print(':List of tuples:')
print(mutant_data)

# Create a zip object using the three lists: mutant_zip
mutant_zip = zip(mutants, aliases, powers)

# Print the zip object
print(":Zip object:")
print(mutant_zip)

# Unpack the zip object and print the tuple values
print(":Unpack and print:")
for value1, value2, value3 in zip(mutants, aliases, powers):
    print(value1,"is", value2,"and uses", value3)

:List of tuples:
[('charles xavier', 'prof x', 'telepathy'), ('bobby drake', 'iceman', 'thermokinesis'), ('kurt wagner', 'nightcrawler', 'teleportation'), ('max eisenhardt', 'magneto', 'magnetokinesis'), ('kitty pryde', 'shadowcat', 'intangibility')]
:Zip object:
<zip object at 0x70bd92d8>
:Unpack and print:
charles xavier is prof x and uses telepathy
bobby drake is iceman and uses thermokinesis
kurt wagner is nightcrawler and uses teleportation
max eisenhardt is magneto and uses magnetokinesis
kitty pryde is shadowcat and uses intangibility


In [36]:
# Exercise

mutants = ('charles xavier', 'bobby drake', 'kurt wagner', 'max eisenhardt', 'kitty pryde')
aliases = ('prof x', 'iceman', 'nightcrawler', 'magneto', 'shadowcat')
powers = ('telepathy', 'thermokinesis', 'teleportation', 'magnetokinesis', 'intangibility')

# Create a zip object from mutants and powers: z1
z1 = zip(mutants, powers)

# Print the tuples in z1 by unpacking with *
print(*z1)

# Re-create a zip object from mutants and powers: z1
z1 = zip(mutants, powers)

# 'Unzip' the tuples in z1 by unpacking with * and zip(): result1, result2
result1, result2 = zip(*z1)

# Check if unpacked tuples are equivalent to original tuples
print(result1 == mutants) 
print(result2 == powers)
# shows they are the same
print(result2)
print(powers)

('charles xavier', 'telepathy') ('bobby drake', 'thermokinesis') ('kurt wagner', 'teleportation') ('max eisenhardt', 'magnetokinesis') ('kitty pryde', 'intangibility')
True
True
('telepathy', 'thermokinesis', 'teleportation', 'magnetokinesis', 'intangibility')
('telepathy', 'thermokinesis', 'teleportation', 'magnetokinesis', 'intangibility')


# Using iterators for big data

### Loading data in chunks
* There can be too much data to hold in memory
* Solution: load data in chunks!
* Pandas function: read_csv()
    * Specify the chunk: chunksize
    

In [38]:
# iterating over data
import pandas as pd
result = []
for chunk in pd.read_csv('data.csv', chunksize = 1000):
    result.append(sum(chunk['x']))

total = sum(result)
print(total)
#4252532

FileNotFoundError: File b'data.csv' does not exist

In [None]:
# iterating over data 2
import pandas as pandas
total = 0
for chunk in pd.read_csv('data.csv', chunksize = 1000):
    total += sum(chunk['x'])

print(total)
#4252532

In [39]:
# Exercise

import pandas as pd

# Initialize an empty dictionary: counts_dict
counts_dict = {}

# Iterate over the file chunk by chunk
for chunk in pd.read_csv('tweets.csv', chunksize=10):

    # Iterate over the column in DataFrame
    for entry in chunk['lang']:
        if entry in counts_dict.keys():
            counts_dict[entry] += 1
        else:
            counts_dict[entry] = 1

# Print the populated dictionary
print(counts_dict)

{'en': 97, 'et': 1, 'und': 2}


In [41]:
# Exercise

import pandas as pd

# Define count_entries()
def count_entries(csv_file, c_size, colname):
    """Return a dictionary with counts of
    occurrences as value for each key."""
    
    # Initialize an empty dictionary: counts_dict
    counts_dict = {}

    # Iterate over the file chunk by chunk
    for chunk in pd.read_csv(csv_file, chunksize = c_size):

        # Iterate over the column in DataFrame
        for entry in chunk[colname]:
            if entry in counts_dict.keys():
                counts_dict[entry] += 1
            else:
                counts_dict[entry] = 1

    # Return counts_dict
    return counts_dict

# Call count_entries(): result_counts
result_counts = count_entries('tweets.csv', 10, 'lang')

# Print result_counts
print(result_counts)

{'en': 97, 'et': 1, 'und': 2}
