# List, Set and Dict Comprehensions

In [1]:
strings = ['a','as','bat','car','dove','python']

[x.upper() for x in strings if len(x)>2]

['BAT', 'CAR', 'DOVE', 'PYTHON']

In [2]:
unique_length = {len(x) for x in strings}

In [3]:
unique_length

{1, 2, 3, 4, 6}

In [4]:
# express using the map function

set(map(len, strings))

{1, 2, 3, 4, 6}

In [5]:
loc_mapping = {val : index for index, val in enumerate(strings)}

In [6]:
loc_mapping

{'a': 0, 'as': 1, 'bat': 2, 'car': 3, 'dove': 4, 'python': 5}

In [7]:
# Nested list comprehension

# take the example from just how we seperated the genres and the countries in the kaggle data set

all_data = [['John','Haaarry','Emilaaay','Michael'],['Maria','Juan','Javier','Natallia']]

result = [name for names in all_data for name in names if name.count('a')>2]
result

# the for part of the list comprehension are arranged according to their order of nesting, and any filter is put at the end as before 

['Haaarry', 'Emilaaay', 'Natallia']

In [8]:
some_tuple = ((1,2,3),(4,5,6),(7,8,9))
flatten  = [x for tup in some_tuple for x in tup if x%2==0]

In [9]:
flatten

[2, 4, 6, 8]

In [10]:
[[x for x in tup] for tup in some_tuple]

[[1, 2, 3], [4, 5, 6], [7, 8, 9]]

# Functions

In [11]:
def my_function(x,y,z = 1.5):
    if z>1:
        return z * (x+y)
    else:
        return z/(x+y)

In [12]:
my_function(1,2)

4.5

In [13]:
result = my_function(1,2)
result

4.5

In [14]:
# there is no issue with having multiple return statements 

# if python reaches the end of the function without return statement it returns None

def function_without_return(x):
    print(x)

In [15]:
function_without_return("hello")

hello


In [16]:
result = function_without_return("hello")
result

hello


In [17]:
print(result)

None


In [18]:
# each function can have positional arguments and keyword arguments. 
# in my_function, x and y are positional arguments and z is a keyword argument

my_function(5,6, z = 0.7)

0.06363636363636363

In [19]:
my_function(3.14,7,3.5)

35.49

In [20]:
my_function(10,20)

45.0

# NameSpaces, Scope and Local function

In [21]:
def func():
    a = []
    for i in range(5):
        a.append(i)
    print(a)

# a cannot be accessed outside the function

In [22]:
func()

[0, 1, 2, 3, 4]


In [23]:
a = []
def func():
    for i in range(5):
        a.append(i)
# each function call will modify the list

In [24]:
func()

In [25]:
a

[0, 1, 2, 3, 4]

In [26]:
func()

In [27]:
a

[0, 1, 2, 3, 4, 0, 1, 2, 3, 4]

In [28]:
# Returning multiple values

def f():
    a = 5
    b = 6
    c = 7
    return a,b,c
a,b,c = f()

In [29]:
a

5

In [30]:
b

6

In [31]:
c

7

In [32]:
platforms = [
    '  GitHub  ', 'gitHub', 'GitHub!',
    'GITHUB', 'github.com', 'GitHub??',
    'Stack Overflow', 'StackOverflow', 'Stack overflow!', 'stack OVERFLOW',
    'Kaggle', 'kaggle ', 'Kaggle!', 'KAGGLE ',
    'Jupyter', ' Jupyter Notebook ', 'jupyter-notebook', 'Jupyter!',
    'google colab', 'GoogleColab!', 'Colab ', 'colab', 'Colab??',
    'VS Code', 'vsCode', 'V S Code!', 'vscode', 'Visual Studio Code ',
    'PyCharm', 'pycharm', 'Pycharm!!', ' PYCHARM ', 'py charm'
]

In [33]:
import re
def clean_strings(platforms):
    result = []
    for value in platforms:
        value = value.strip()
        value = re.sub('[!#?]','',value)
        value = value.title()
        result.append(value)
    return result
clean_strings(platforms)

['Github',
 'Github',
 'Github',
 'Github',
 'Github.Com',
 'Github',
 'Stack Overflow',
 'Stackoverflow',
 'Stack Overflow',
 'Stack Overflow',
 'Kaggle',
 'Kaggle',
 'Kaggle',
 'Kaggle',
 'Jupyter',
 'Jupyter Notebook',
 'Jupyter-Notebook',
 'Jupyter',
 'Google Colab',
 'Googlecolab',
 'Colab',
 'Colab',
 'Colab',
 'Vs Code',
 'Vscode',
 'V S Code',
 'Vscode',
 'Visual Studio Code',
 'Pycharm',
 'Pycharm',
 'Pycharm',
 'Pycharm',
 'Py Charm']

In [34]:
# an alternative and more structured approach to this is 
# more understandable
# more readable

def remove_punctuation(value):
    return re.sub('[!#?]','',value)

clean_ops = [str.strip,remove_punctuation,str.title]

def cleanest_strings(platforms,ops):
    result = []
    for value in platforms:
        for func in ops:
            value = func(value)
        result.append(value)
    return result
cleanest_strings(platforms,clean_ops)

['Github',
 'Github',
 'Github',
 'Github',
 'Github.Com',
 'Github',
 'Stack Overflow',
 'Stackoverflow',
 'Stack Overflow',
 'Stack Overflow',
 'Kaggle',
 'Kaggle',
 'Kaggle',
 'Kaggle',
 'Jupyter',
 'Jupyter Notebook',
 'Jupyter-Notebook',
 'Jupyter',
 'Google Colab',
 'Googlecolab',
 'Colab',
 'Colab',
 'Colab',
 'Vs Code',
 'Vscode',
 'V S Code',
 'Vscode',
 'Visual Studio Code',
 'Pycharm',
 'Pycharm',
 'Pycharm',
 'Pycharm',
 'Py Charm']

In [35]:
def apply_to_list(some_list, f):
    return [f(x) for x in some_list]
ints = [1,2,3,4,5]
apply_to_list(ints, lambda x : x**2)

[1, 4, 9, 16, 25]

In [36]:
apply_to_list(ints, lambda x : x**3)
# you see the benefit i could have passed x**2 in the apply_to_list function but i purposely passed a custom operator and now 
# i can get done whatever i want to by using the lambda function

[1, 8, 27, 64, 125]

In [37]:
def add_numbers(x,y):
    return x+y

add_five = lambda y : add_numbers(5,y)

print(add_five(5))
print(add_numbers(5,5))
# the second argument in add_numbers is said to be curried.
# all we have to do is define a new function that calls an existing function

10
10


In [38]:
some_dict = {'a': 1, 'b': 2, 'c': 3}
for key in some_dict:
    print(key)

# we had to use the for loop for iterating through the dictionary

a
b
c


In [39]:
# but we can use iter to avoid using for loop 

dict_iterator = iter(some_dict)

In [40]:
dict_iterator

<dict_keyiterator at 0x29673638900>

In [41]:
list(dict_iterator)

['a', 'b', 'c']

In [42]:
# A generator is a convenient way, similar to writing a normal function, to construct a new iterable object.
# A normal func executes and returns a single result at a time, generators return a sequence of multiple results lazily, pausing after each one until the next is requested

# use yeild instead of return

def squares(n = 10):
    print('Generating squares from 1 to {0}'.format(n**2))
    for i in range(1,n+1):
        yield i ** 2
        

In [43]:
gen =  squares()

In [44]:
gen
# no code is immediately executed when you call a generator

<generator object squares at 0x000002967356FAE0>

In [45]:
for x in gen:
    print(x,end = ' ')

Generating squares from 1 to 100
1 4 9 16 25 36 49 64 81 100 

In [46]:
# Generator expressions

# another way to make a generator is by using a generator expression. This is a generator analogue to list, dict, and set comprehensions.

gen = (x**2 for x in range(100))
gen

<generator object <genexpr> at 0x0000029670FC9220>

In [47]:
for x in gen:
    print(x)

0
1
4
9
16
25
36
49
64
81
100
121
144
169
196
225
256
289
324
361
400
441
484
529
576
625
676
729
784
841
900
961
1024
1089
1156
1225
1296
1369
1444
1521
1600
1681
1764
1849
1936
2025
2116
2209
2304
2401
2500
2601
2704
2809
2916
3025
3136
3249
3364
3481
3600
3721
3844
3969
4096
4225
4356
4489
4624
4761
4900
5041
5184
5329
5476
5625
5776
5929
6084
6241
6400
6561
6724
6889
7056
7225
7396
7569
7744
7921
8100
8281
8464
8649
8836
9025
9216
9409
9604
9801


In [48]:
# generator expression can be used instead of a list comprehension as a function argument in some cases:

sum(x**2 for x in range(100))

328350

In [49]:
dict((i,i**2) for i in range(5))

{0: 0, 1: 1, 2: 4, 3: 9, 4: 16}

# Files and Operating System

In [50]:
path = 'data/segismundo.txt'
f = open(path)

In [51]:
for line in f:
    pass

In [52]:
lines = [x.rstrip() for x in open(path)]

In [53]:
lines

['SueÃ±a el rico en su riqueza,',
 'que mÃ¡s cuidados le ofrece;',
 '',
 'sueÃ±a el pobre que padece',
 'su miseria y su pobreza;',
 '',
 'sueÃ±a el que a medrar empieza,',
 'sueÃ±a el que afana y pretende,',
 'sueÃ±a el que agravia y ofende,',
 '',
 'y en el mundo, en conclusiÃ³n,',
 'todos sueÃ±an lo que son,',
 'aunque ninguno lo entiende.',
 '']

In [54]:
f.close() # recommended to close the file

In [55]:
# or you can use the with function which automatically closes the file when exiting the  with block

with open(path) as f:
    lines = [x.rstrip() for x in f]

In [56]:
lines

['SueÃ±a el rico en su riqueza,',
 'que mÃ¡s cuidados le ofrece;',
 '',
 'sueÃ±a el pobre que padece',
 'su miseria y su pobreza;',
 '',
 'sueÃ±a el que a medrar empieza,',
 'sueÃ±a el que afana y pretende,',
 'sueÃ±a el que agravia y ofende,',
 '',
 'y en el mundo, en conclusiÃ³n,',
 'todos sueÃ±an lo que son,',
 'aunque ninguno lo entiende.',
 '']

In [57]:
f = open(path)

In [58]:
f.read(10)

'SueÃ±a el '

In [59]:
f2= open(path,'rb') # binary mode

In [60]:
f2.read(10)

# the read mode advances the file handels position by the number of bytes read.

b'Sue\xc3\xb1a el '

In [61]:
# tell gives you the current position

f.tell()

10

In [62]:
f2.tell()

10

In [63]:
f.seek(3)
# seek changes the file position to the indicated byte in this file

3

In [64]:
f.read(1)

'Ã'

In [65]:
f.close()

In [66]:
f2.close()

# Conclusion