In [2]:
def section(text):
    print('\033[1m{}\033[0m'.format(text))

---
### Fonctions
- Opération _atomique_ utilisant des entrées et des sorties (optional)
- Simplifie la lisibilité du code en réutilisant des morceaux
- Certaines sont _built-in_ d'autres _user-defined_ et d'autres _imported_
- Les fonctions ont un large spectre de robustesse (_warning_, _error_, ```_return None```)
- Toujours vérifier si une fonction existe avant d'essayer de le faire soit même

---

In [4]:
section('built-in functions')

## Built-in functions (Integer, Float)
a = -10
b = 12.5

print(float(a), int(b), min(a,b), max(a,b), abs(a))

## Built-in functions (String)
text = 'All we have to ___ {0} what to do with the time that {0} given to {1}'.format('is', 'us')
text = text.replace('___', 'decide')
text = text.split(' ')
text = '-'.join(text)
print(text)

## Built-in function (Other) + iterator
var = range(10)
print(var, list(var)) # What is the difference?
##
var_1 = ('1896', '1900', '1904')
var_2 = ('Athen', 'Paris', 'St-Louis')
var = zip(var_1, var_2)
print(var, list(var)) # What is the difference?

[1mbuilt-in function[0m
-10.0 12 -10 12.5 10
All-we-have-to-decide-is-what-to-do-with-the-time-that-is-given-to-us
range(0, 10) [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
<zip object at 0x7f1549b022c0> [('1896', 'Athen'), ('1900', 'Paris'), ('1904', 'St-Louis')]


In [5]:
section('Imports (Part 1)')

import os
# from os.path import join as pjoin

## Path manipulation
path = "/home/User/"
filename = os.path.join(path, 'Desktop', 'file.txt')
print(filename)

basename = os.path.basename(filename)
basename, ext = os.path.splitext(basename)
print(basename, ext)

# Some are more complex than others
print('\nWALK')
for root, dirs, files in os.walk("data/", topdown=False):
    for name in files:
        if os.path.splitext(name)[1] != '.png':
            print(os.path.join(root, name))
        
print('\nGLOB')
from glob import glob
for i, filename in enumerate(glob('data/*.png')):
    print(i, filename)

[1mImports (Part 1)[0m
/home/User/Desktop/file.txt
file .txt

WALK
data/bids/func/sub-X_ses-Y_task-rest_run-02_bold.nii.gz
data/bids/func/sub-X_ses-Y_task-rest_run-01_bold.nii.gz
data/bids/func/sub-X_ses-Y_task-rest_run-02_bold.json
data/bids/func/sub-X_ses-Y_task-rest_run-01_bold.json
data/bids/dwi/sub-X_ses-Y_run-01_dwi.json
data/bids/dwi/sub-X_ses-Y_run-01_dwi_mask.nii.gz
data/bids/dwi/sub-X_ses-Y_run-02_dwi.bvec
data/bids/dwi/sub-X_ses-Y_run-01_dwi.bval
data/bids/dwi/sub-X_ses-Y_run-01_dwi.nii.gz
data/bids/dwi/sub-X_ses-Y_run-02_dwi_mask.nii.gz
data/bids/dwi/sub-X_ses-Y_run-02_dwi.json
data/bids/dwi/sub-X_ses-Y_run-02_dwi.bval
data/bids/dwi/sub-X_ses-Y_run-01_dwi.bvec
data/bids/dwi/sub-X_ses-Y_run-02_dwi.nii.gz
data/bids/swi/sub-X_ses-Y_minIP.nii.gz
data/bids/swi/sub-X_ses-Y_swi.json
data/bids/swi/sub-X_ses-Y_swi.nii.gz
data/bids/swi/sub-X_ses-Y_minIP.json
data/bids/anat/sub-X_ses-Y_T2w.json
data/bids/anat/sub-X_ses-Y_T1w.nii.gz
data/bids/anat/sub-X_ses-Y_T2star.nii.gz
data/bids/

In [99]:
section('Imports (Part 2)')

import random
import math
import itertools

## Random number generator (RNG): What is it?
# random.seed(0) 
print(random.randint(0,100), random.randint(0,100))
tmp_list = [0, 1, 2, 3, 4]
random.shuffle(tmp_list) # In-place
print(tmp_list)

print(math.ceil(1.2), math.floor(1.2))
print(math.pow(4, 2), math.sqrt(16))

str_list = [['a', 'b', 'c', 'd'], ['1', '2', '3', '4']]
var = itertools.chain(*str_list)
print(var, list(var))  # What is the difference?

var = itertools.combinations(str_list[0], r=2)
print(list(var))

var = itertools.product(str_list[0], str_list[0])
print(list(var))

70 61
[1, 4, 0, 2, 3]
2 1
16.0 4.0
<itertools.chain object at 0x7f1ffa41b050> ['a', 'b', 'c', 'd', '1', '2', '3', '4']
[('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'), ('b', 'd'), ('c', 'd')]
[('a', 'a'), ('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'a'), ('b', 'b'), ('b', 'c'), ('b', 'd'), ('c', 'a'), ('c', 'b'), ('c', 'c'), ('c', 'd'), ('d', 'a'), ('d', 'b'), ('d', 'c'), ('d', 'd')]


---

### Bonnes pratiques
- Ordonner ses _imports_ en ordre alphabétique
- Éviter les noms _built-in_
- Séparer les librairies _built-in_ et externe
- Ne pas utiliser ```from numpy import *```
- Si une opération vous apparait très simple et courante, vérifier si une librairie existe

Par exemple:
```from numpy.linalg import norm```
- A function is a small chunk of code that performs a specific tasks, an algorithm. (norm)
- A module is basically a bunch of related code saved in a file with the extension ```.py``` (linalg)
- a Python packages are basically a directory of a collection of modules. (linalg vs numpy, voir GitHub)
- while a package is a collection of modules, a library is a collection of packages (numpy)

https://learnpython.com/blog/python-modules-packages-libraries-frameworks/

---

In [7]:
section('User-defined functions')

## Scope
def my_func():
    x = 10
    print('Value inside function #1: {}'.format(x))
def your_func(x):
    print('Value inside function #2: {}'.format(x))

x = 20
my_func()
your_func(x)
print('Value outside function: {}'.format(x))


## Return 
def greet_1(name):
    """
    This function greets to the person passed in as a parameter
    name: str
    return: None
    """
    print('Hello, {}. Good morning!'.format(name))

def greet_2(name):
    """
    This function greets to the person passed in as a parameter
    name: str
    return: str
    """
    return 'Hello, {}. Good morning!'.format(name)

print(greet_1('Francois')) # What is happening here?
print(greet_2('Maxime'))


[1mUser-defined functions[0m
Value inside function #1: 10
Value inside function #2: 20
Value outside function: 20
Hello, Francois. Good morning!
None
Hello, Maxime. Good morning!


In [6]:
section('User-defined functions (Optimisation)')

from time import time

def is_prime_1(x):
    is_prime = True
    for i in range(2, x-1):
        if x % i == 0:
            is_prime = False
    return is_prime

def is_prime_2(x):
    is_prime = True
    for i in range(3, int(x**0.5) + 1, 2):
        if x % i == 0:
            is_prime = False
    return is_prime

def is_prime_3(x):
    if x == 2 or x == 3:
        return True
    if x % 2 == 0 or x < 2:
        return False

    for i in range(3, int(x**0.5) + 1, 2):
        if x % i == 0:
            return False
    return True

# Try with 65345941, why does #3 slow down ?
timer = time()
print(is_prime_1(65345940), 'Computed in: {}ms'.format((time()-timer)*1000))

timer = time()
print(is_prime_2(65345940), 'Computed in: {}ms'.format((time()-timer)*1000))

timer = time()
print(is_prime_3(65345940), 'Computed in: {}ms'.format((time()-timer)*1000))

[1mUser-defined functions (Optimisation)[0m
False Computed in: 2761.936664581299ms
False Computed in: 0.2269744873046875ms
False Computed in: 0.030040740966796875ms


In [9]:
section('User-defined vs built-in functions')

import random
from time import time

## Data preparation
ages = [1, 12, 15, 18, 21, 25, 30, 36, 45, 54, 65, 75, 85, 90]
names = ['Dave', 'Mike', 'Steve', 'Kevin', 'Roger', 'Blanche',
         'Rose', 'Violette', 'Ginette', 'Sarah', 'Julie', 'Arthur',
         'Lucie', 'Marie']
wealth = [10000, -10000, 20000, -25000, 35000, -50000, 0, 0, 1000,
          -2500, 500000, 1900050, -156547, -6236987]

print(len(ages), len(names), len(wealth))
random.seed(0)
random.shuffle(ages)
random.shuffle(names)
random.shuffle(wealth)
attributes = zip(ages, names, wealth)
## To Try ! Why does it disappears?
# print(list(attributes))
# print(list(attributes))

def sort_att(lst, att=0, reverse=False):
    lst = list(lst)
    
    ordered_lst = []
    while len(ordered_lst) < len(lst):
        min_val = 9999999999
        min_pos = 0
        for pos, val in enumerate(lst):
            if val[att] < min_val and val not in ordered_lst:
                min_pos = pos
                min_val = val[att]
        ordered_lst.append(lst[min_pos])
        
    return ordered_lst[::-1] if reverse else ordered_lst

## To Try: Optional parameters
timer = time()
print(sort_att(attributes), '{}s\n'.format(time()-timer))

from operator import itemgetter
attributes = zip(ages, names, wealth)
timer = time()
print(sorted(attributes, key=itemgetter(0), reverse=True), '{}s'.format(time()-timer))

[1mUser-defined vs built-in functions[0m
14 14 14
[(1, 'Lucie', -2500), (12, 'Kevin', -10000), (15, 'Dave', 35000), (18, 'Violette', 500000), (21, 'Mike', -25000), (25, 'Rose', 10000), (30, 'Steve', 0), (36, 'Julie', 0), (45, 'Arthur', 1000), (54, 'Sarah', 1900050), (65, 'Marie', -156547), (75, 'Blanche', 20000), (85, 'Roger', -6236987), (90, 'Ginette', -50000)] 7.43865966796875e-05s

[(90, 'Ginette', -50000), (85, 'Roger', -6236987), (75, 'Blanche', 20000), (65, 'Marie', -156547), (54, 'Sarah', 1900050), (45, 'Arthur', 1000), (36, 'Julie', 0), (30, 'Steve', 0), (25, 'Rose', 10000), (21, 'Mike', -25000), (18, 'Violette', 500000), (15, 'Dave', 35000), (12, 'Kevin', -10000), (1, 'Lucie', -2500)] 4.9114227294921875e-05s


In [10]:
section('Read file from disk')

import os

def parse(filename):
    with open(filename) as f:
        lines = f.readlines()

    words = []
    caracter_to_remove = ["'", '...', '>', '<', '~', ',', '.', ':', ';', '\n', '!', '?',
                         '(', ')', '-', '[', ']', '{', '}', '#', '$', '%', '&', '*', '+',
                         '/', '=', '|', '_']
    for line in lines:
        if line == '\n':
            continue

        for car_to_rem in caracter_to_remove:
            line = line.replace(car_to_rem, ' ')
        words.extend(line.lower().split())

    return [word for word in words if not word.isdigit()]

word_list = parse(os.path.join('data', 'sherlock_holmes.txt'))
print('Number of words: {}'.format(len(word_list)))
print('Number of unique words: {}'.format(len(set(word_list))))
random_list = random.sample(word_list, 100)

# Find 2 SIMPLE way to make that operation faster
# ---- START ----
def find_in_list(val, lst):
    in_list = False
    for elem in lst:
        if val == elem:
            in_list = True
    return in_list
timer = time()
for pick in random_list:
    _ = find_in_list(pick, word_list)
print('Finding 100 words in a list took {} sec.'.format(round(time() - timer, 6)))
# ---- END ----


[1mRead file from disk[0m
Number of words: 1085775
Number of unique words: 28145
Finding 100 words in a list took 2.424075 sec.


---

### Bonnes pratiques
- Lors d'opération de I/O toujours vérifier si les fichiers existent avant de commencer
- Vérifier si le fichier est vide ou non
- Toujours utiliser un _with statement_
- Lors d'itération sur des données inconnues, faire attention au premier et dernier éléments
- Éviter la conversion en liste si possible (iterator)
- Si une entrée est fourni par l'utilisateur, toujours vous assurez de la conformité des données (type, longeur, dimension, etc.)
- Ne pas continuer l'exécution si les entrées sont invalides
- Écrire des avertissements et des erreurs faciles à comprendre
- Documenter les fonctions, les scripts et le code en général (comme si un inconnu allait devoir le lire)

---

In [47]:
section('Others patterns in Python')

## List Comprehensions
my_even_list = [i for i in range(10) if i % 2 == 0]
print(my_even_list)

## Unpacking
def get_info(id):
    info_dict = {1902: ('Francois', '1991')}
    return info_dict[id]

name, birthdate = get_info(1902)
print(name, birthdate)

## Merge dictionnaries
dict1 = { 'a': 1, 'b': 2 }
dict2 = { 'b': 3, 'c': 4 }

merged = { **dict1, **dict2 } # Python < 3.9
merged = dict1 | dict2 # Python >= 3.9 only
print(merged)

## Use keys, values and items for dictionnaries
print(merged.values(), merged.keys())
for key, value in merged.items():
    print(key, value)

## Using the Pythonic way
l1 = ['a', 'b', 'c', 'd', 'e']
l2 = ['A', 'B', 'C', 'D', 'E']
# Bad
for i in range(len(l1)):
    print(l1[i], l2[i])
print('_')
# Very Bad
for i in range(len(l1)):
    print(l1[len(l1)-i-1], l2[len(l1)-i-1])
print('_')
# Medium
l1, l2 = l1[::-1], l2[::-1]
for tup in zip(l1, l2):
    print(tup[0], tup[1])
print('_')
# Good
for str1, str2 in zip(reversed(l1), reversed(l2)):
    print(str1, str2)
    
## Using Map (part of  the pythonic way)
def capitalize(s):
    return s.upper()
    
mylist = list(map(capitalize, ['sentence', 'fragment']))
print(mylist)

list_of_ints = list(map(int, "1234567"))
print(list_of_ints)

## Understand the power of Python built-in function
def de_capitalize(s):
    return s.lower()
test = 'My first cat was my favorite cat in my opinion'.split()
print(max(set(map(de_capitalize, test)), key = test.count))

## Understand the power of Python built-in librairies
from collections import Counter
print(Counter("aaaaabbbbcccccddz"))

## Dont name variable if you don't need it
for i, _ in enumerate(test):
    print(i)
    
## Multi-line print
s1 = """Multi line strings can be put
        between triple quotes. It's not ideal
        when formatting your code though"""

print (s1)
# Multi line strings can be put
#         between triple quotes. It's not ideal
#         when formatting your code though
        
s2 = ("You can also concatenate multiple\n"
        "strings this way, but you'll have to\n"
        "explicitly put in the newlines")

print(s2)

## Use one-liner for ternary operator For conditional assignment
text = 'Sucess!' if len(test) == 10 else 'Failed!'
print(text)

## Chaining conditions
x = 20
if 5 <= x < 15:
    print('Case A')
elif 15 <= x < 25:
    print('Case B')
else:
    print('Case C')

[0, 2, 4, 6, 8]
Francois 1991
{'a': 1, 'b': 3, 'c': 4}
dict_values([1, 3, 4]) dict_keys(['a', 'b', 'c'])
a 1
b 3
c 4
a A
b B
c C
d D
e E
_
e E
d D
c C
b B
a A
_
e E
d D
c C
b B
a A
_
a A
b B
c C
d D
e E
['SENTENCE', 'FRAGMENT']
[1, 2, 3, 4, 5, 6, 7]
my
Counter({'a': 5, 'c': 5, 'b': 4, 'd': 2, 'z': 1})
0
1
2
3
4
5
6
7
8
9
Multi line strings can be put
        between triple quotes. It's not ideal
        when formatting your code though
You can also concatenate multiple
strings this way, but you'll have to
explicitly put in the newlines
Sucess!
Case B
