# List Comprehensions

## Loops with lists

In [4]:
# crie uma lista que tenha os quadrados do dobro de todos os numeros entre 1 e 10000
new_list = []

for value in range(1, 10001):
    new_value = (value*2)**2
    new_list.append(new_value)
    
print(old_list[:5])

[4, 16, 36, 64, 100]


In [5]:
new_list = [(value*2)**2 for value in range(1, 10001)] # Leve 2 Pague 1!

In [7]:
new_list[:5]

[4, 16, 36, 64, 100]

## Extra: tqdm

External package to visualize a loading bar in a `for loop`

In [13]:
import sys
sys.executable

'/usr/local/opt/python@3.7/bin/python3.7'

In [15]:
! /usr/local/opt/python@3.7/bin/python3.7 -m pip install tqdm



In [2]:
!pip3 install tqdm



In [3]:
# !pip install tqdm
# !pip3 install tqdm --user (windows users)

In [6]:
import time

In [4]:
from tqdm.auto import tqdm

In [18]:
new_list = []

for item in tqdm(range(10000), desc='Loading bar'):
    x = item * 2
    y = x**2
    new_list.append(y)
    time.sleep(0.001)

print(new_list[:100])

Loading bar:   0%|          | 0/10000 [00:00<?, ?it/s]

[0, 4, 16, 36, 64, 100, 144, 196, 256, 324, 400, 484, 576, 676, 784, 900, 1024, 1156, 1296, 1444, 1600, 1764, 1936, 2116, 2304, 2500, 2704, 2916, 3136, 3364, 3600, 3844, 4096, 4356, 4624, 4900, 5184, 5476, 5776, 6084, 6400, 6724, 7056, 7396, 7744, 8100, 8464, 8836, 9216, 9604, 10000, 10404, 10816, 11236, 11664, 12100, 12544, 12996, 13456, 13924, 14400, 14884, 15376, 15876, 16384, 16900, 17424, 17956, 18496, 19044, 19600, 20164, 20736, 21316, 21904, 22500, 23104, 23716, 24336, 24964, 25600, 26244, 26896, 27556, 28224, 28900, 29584, 30276, 30976, 31684, 32400, 33124, 33856, 34596, 35344, 36100, 36864, 37636, 38416, 39204]


## Comprehensions

Comprehensions are used for the same objective as the above. To create a new list. However, it has a different (more compact) syntax.

In [20]:
new_list = []

for item in range(10):
    new_list.append(item)

print(new_list)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


In [22]:
# Your code here!
new_list = [item for item in range(10)]
new_list

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

## Operations inside comprehensions

In [30]:
new_list = []

for item in range(10):
    new_list.append(item * 2)

In [31]:
# Your code here!
new_list = [item * 2 for item in range(10)]
print(new_list)

[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]


### `if` conditions in comprehensions

In [37]:
new_list = []

for item in range(10):
    if item % 2 == 0:
        new_list.append(item)
        
print(new_list)

[0, 2, 4, 6, 8]


In [39]:
# Your code here!
[item for item in range(10) if item % 2 == 0]

[0, 2, 4, 6, 8]

In [41]:
new_list = []

for item in range(10):
    if item % 2 == 0:
        new_list.append(item ** 2)
        
print(new_list)

[0, 4, 16, 36, 64]


In [42]:
# Your code here!
[item ** 2 for item in range(10) if item % 2 == 0]

[0, 4, 16, 36, 64]

In [44]:
list_to_run = [1, 5, 6, 8, 100, 1000]

squared_items = []
for i in list_to_run:
    squared_items.append(i ** 2)
    
squared_items

[1, 25, 36, 64, 10000, 1000000]

In [47]:
# Your code here!
[number ** 2 for number in squared_items]

[1, 625, 1296, 4096, 100000000, 1000000000000]

## Two `for` loops

In [49]:
list_of_lists = [[1,2,3,4], [5,6,7,8], [1,3,6,8]]

In [60]:
type(list_of_lists[0])

list

In [63]:
len(list_of_lists[0])

4

In [77]:
%time
list_of_lists = [[6], [1,2,3,4], [5,6,7,8], [1,3,6,8]] 
my_results = []

for item in list_of_lists:
    for another_item in item:
        if another_item > 5:    
            my_results.append(another_item)

print(my_results)

CPU times: user 3 µs, sys: 1 µs, total: 4 µs
Wall time: 7.87 µs
[6, 6, 7, 8, 6, 8]


In [82]:
%%timeit
[another_item for item in list_of_lists for another_item in item if another_item > 5]

1.74 µs ± 122 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [78]:
# Your code here!
%time
[another_item for item in list_of_lists if len(item) > 1 for another_item in item if another_item > 5]

CPU times: user 5 µs, sys: 1 µs, total: 6 µs
Wall time: 12.2 µs


[6, 7, 8, 6, 8]

## If-Else syntax in comprehensions

In [91]:
lst = []
for item in range(10):
    if item <= 5:
        lst.append(0)
    else:
        lst.append(1)

lst

[0, 0, 0, 0, 0, 0, 1, 1, 1, 1]

In [87]:
# Your code here!
[(0 if item < 5 else 1) for item in range(10)]

[0, 0, 0, 0, 0, 1, 1, 1, 1, 1]

# Applications


In [100]:
ls ..

[34mM01-L02-string_operations[m[m/      [34mM01-L03-regular-expressions[m[m/
[34mM01-L03-list_comprehension[m[m/     [34mM01-L04-functional-programming[m[m/


In [101]:
pwd

'/Users/maria.nubie/ironhack-cv-review/material/classes/M01-L03-list_comprehension'

In [117]:
mkdir data

mkdir: data: File exists


In [118]:
# usuários mac
!touch data/blablabla.cmd data/blablabla1.cmd data/blablabla2.cmd

In [122]:
ls data

FitBit.csv               bike_sharing_hourly.csv  blablabla2.cmd
asoidjfoi.cmd            bla.txt                  titanic.csv
asriowier.xlsx           blablabla.cmd            titanic_processed.csv
bike_sharing_daily.csv   blablabla1.cmd


In [137]:
os.listdir()

['list-comprehension-gabarito.ipynb',
 'data.zip',
 'list-comprehension.ipynb',
 '.ipynb_checkpoints',
 'data']

In [124]:

my_files = os.listdir('data/')

In [126]:
len(my_files)

11

In [129]:
my_files[0]

'asoidjfoi.cmd'

In [130]:
my_files[1]

'bike_sharing_hourly.csv'

In [131]:
my_files[2]

'asriowier.xlsx'

## Using for loop

In [134]:
datasets = []

for file in os.listdir('data/'):
    if file.endswith('.csv'):
        datasets.append('data/' + file)

## Using List Comprehensions

In [189]:
import os

In [190]:
os

<module 'os' from '/usr/local/Cellar/python@3.7/3.7.10_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/os.py'>

In [160]:
ls data

In [159]:
ls data

In [158]:
!cat data/FitBit.csv

cat: data/FitBit.csv: No such file or directory


In [164]:
!ls data

In [161]:
!unzip data.zip

Archive:  data.zip


In [151]:
rm -r data

In [171]:
cp data/sample_file_0.csv data/sample_file_1.csv

In [173]:
mkdir data/blabla.exe

In [174]:
ls data

[34mblabla.exe[m[m/        sample_file_0.csv  sample_file_1.csv


In [176]:
import pandas as pd

datasets = []

for file in os.listdir('data/'):
    if file.endswith('.csv'):
        datasets.append(pd.read_csv('data/' + file))

In [178]:
type(datasets)

list

In [180]:
datasets[0]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,0.734751,0.195362,0.734309,0.598184,0.763433,0.263434,0.868066,0.058092,0.753502,0.587513,0.311608,0.178356,0.182922,0.147631,0.391188,0.816049,0.749068,0.29326,0.937828,0.880858
1,0.772607,0.445391,0.249642,0.787922,0.598583,0.827238,0.624126,0.601524,0.688753,0.33887,0.081595,0.471474,0.267443,0.453351,0.800716,0.045749,0.683793,0.389789,0.016787,0.503695
2,0.226428,0.268764,0.694262,0.622335,0.063843,0.122683,0.815625,0.584542,0.032594,0.589775,0.76435,0.650973,0.565705,0.691784,0.265223,0.739031,0.560394,0.334802,0.517694,0.64611
3,0.362748,0.49543,0.113876,0.594149,0.612522,0.625204,0.86405,0.260279,0.528873,0.168043,0.715929,0.677014,0.175735,0.63237,0.926715,0.085675,0.120525,0.141746,0.771144,0.48966
4,0.033415,0.340433,0.464971,0.363737,0.025815,0.434129,0.415163,0.89221,0.381701,0.415264,0.790801,0.69693,0.819751,0.944029,0.869965,0.041723,0.81914,0.676051,0.109349,0.872947


In [None]:
os.listdir()

In [182]:
# Your code here!
my_dataframes = [pd.read_csv('data/'+file) for file in os.listdir('data/') if file.endswith('.csv')]

In [186]:
pd.concat(my_dataframes)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,0.734751,0.195362,0.734309,0.598184,0.763433,0.263434,0.868066,0.058092,0.753502,0.587513,0.311608,0.178356,0.182922,0.147631,0.391188,0.816049,0.749068,0.29326,0.937828,0.880858
1,0.772607,0.445391,0.249642,0.787922,0.598583,0.827238,0.624126,0.601524,0.688753,0.33887,0.081595,0.471474,0.267443,0.453351,0.800716,0.045749,0.683793,0.389789,0.016787,0.503695
2,0.226428,0.268764,0.694262,0.622335,0.063843,0.122683,0.815625,0.584542,0.032594,0.589775,0.76435,0.650973,0.565705,0.691784,0.265223,0.739031,0.560394,0.334802,0.517694,0.64611
3,0.362748,0.49543,0.113876,0.594149,0.612522,0.625204,0.86405,0.260279,0.528873,0.168043,0.715929,0.677014,0.175735,0.63237,0.926715,0.085675,0.120525,0.141746,0.771144,0.48966
4,0.033415,0.340433,0.464971,0.363737,0.025815,0.434129,0.415163,0.89221,0.381701,0.415264,0.790801,0.69693,0.819751,0.944029,0.869965,0.041723,0.81914,0.676051,0.109349,0.872947
0,0.734751,0.195362,0.734309,0.598184,0.763433,0.263434,0.868066,0.058092,0.753502,0.587513,0.311608,0.178356,0.182922,0.147631,0.391188,0.816049,0.749068,0.29326,0.937828,0.880858
1,0.772607,0.445391,0.249642,0.787922,0.598583,0.827238,0.624126,0.601524,0.688753,0.33887,0.081595,0.471474,0.267443,0.453351,0.800716,0.045749,0.683793,0.389789,0.016787,0.503695
2,0.226428,0.268764,0.694262,0.622335,0.063843,0.122683,0.815625,0.584542,0.032594,0.589775,0.76435,0.650973,0.565705,0.691784,0.265223,0.739031,0.560394,0.334802,0.517694,0.64611
3,0.362748,0.49543,0.113876,0.594149,0.612522,0.625204,0.86405,0.260279,0.528873,0.168043,0.715929,0.677014,0.175735,0.63237,0.926715,0.085675,0.120525,0.141746,0.771144,0.48966
4,0.033415,0.340433,0.464971,0.363737,0.025815,0.434129,0.415163,0.89221,0.381701,0.415264,0.790801,0.69693,0.819751,0.944029,0.869965,0.041723,0.81914,0.676051,0.109349,0.872947
