# Python

In [15]:
# Types of variables
print(type(5))                              # int
print(type(5.0))                            # float
print(type("5"))                            # str
print(type(["apple", "banana", "cherry"]))  # list (of str, int, list, etc)
print(type((2, 3)))                         # tuple (immutable)
print(type({"name" : "John", "age" : 36}))  # dict
print(type({4, 5, 6}))                      # set
print(type(True))                           # bool

<class 'int'>
<class 'float'>
<class 'str'>
<class 'list'>
<class 'tuple'>
<class 'dict'>
<class 'set'>
<class 'bool'>


In [2]:
# List
l = [8,6,4,2]
print(l)

l.sort() # reverse() can also be used in this case
print(l)

l.append(8) # add a value to the list
print(l)

print(l.index(2)) # return index of a value

l.pop(0) # warning: pop modify the list permanently
print(l)

print(l.count(8)) # return the number of items

[8, 6, 4, 2]
[2, 4, 6, 8]
[2, 4, 6, 8, 8]
0
[4, 6, 8, 8]
2


In [3]:
# List comprehension
[x**2 for x in range(10)]

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [4]:
# Dictionnary
d = {
    1: 1,
    3: 9,
    2: 4,
    4: 16,
}

print(d[3]) # print a specific entry
d[5] = 25 # add an entry
print(d)
print(d.keys()) # return all keys
print(d.values()) # same with values

# Sort a dict by val
print({k: v for k, v in sorted(d.items(), key=lambda item: item[1])})

# Sort a dict by key
print(sorted(d.items()))

9
{1: 1, 3: 9, 2: 4, 4: 16, 5: 25}
dict_keys([1, 3, 2, 4, 5])
dict_values([1, 9, 4, 16, 25])
{1: 1, 2: 4, 3: 9, 4: 16, 5: 25}
[(1, 1), (2, 4), (3, 9), (4, 16), (5, 25)]


In [5]:
# Sets (list of unique items)
print({'a', 'b', 'c', 'd'}.union({'b', 'c', 'd', 'e'}))
print({'a', 'b', 'c', 'd'}.intersection({'b', 'c', 'd', 'e'}))
print({'a', 'b', "c"}.difference({'a', 'c'}))
print({'a', 'c'}.issubset({'a', 'b'}))


{'b', 'd', 'e', 'c', 'a'}
{'b', 'c', 'd'}
{'b'}
False


# Functions

In [6]:
# Basic function
def square(x=2): # optionnaly asign a value to the input variable
  return x ** 2

print(square(9))
print(square()) # will compute the default value

81
4


In [7]:
# Forcing an input and/or an output
def square(x: float) -> float: # force a type
  assert type(x) is float # crash if x not a float
  return x ** 2

square(3.0)

9.0

In [8]:
# Using loops
def multiply(x, y):
  res = 0
  for i in range(y):
    res += x
  return res

multiply(12, 15)

180

In [9]:
# Lambda functions
f = lambda x : x**2
f(9)

81

In [10]:
# Handle exceptions 
try:
    (lambda x : x/0)(1)
except Exception as e:
    print("ERROR: ", end=" ")
    print(str(e))

ERROR:  division by zero


# Strings

In [11]:
# Strings
line = ' the quick brown fox jumped over a lazy dog  '
print(line.find('fox')) # return the starting position of the word
print(line.strip(" ")) # remove preceding and trailing whitespaces
print(line.split(" ")) # split on whitespace by default
#.splitlines() will split on line change

# combining types
print(str(75) + "... is the number of cats in my house")

# pretty print text and variables
pi = 3.141592654
print(f'pi = {pi:.3f}')

17
the quick brown fox jumped over a lazy dog
['', 'the', 'quick', 'brown', 'fox', 'jumped', 'over', 'a', 'lazy', 'dog', '', '']
75... is the number of cats in my house
pi = 3.142


# Regex (Regular Expression)
A visual guide to Regex:
https://amitness.com/regex/

Regex crash course:
https://colauttilab.github.io/PythonCrashCourse/2_regex.html

Common patterns:
https://digitalfortress.tech/tricks/top-15-commonly-used-regex/


### Identifiers
| Character| Description | Example Pattern Code | Example Match |
| --- | --- | --- | --- |
| \d | A digit | file_\d\d | file_25 |
| \w | Alphanumeric | \w-\w\w\w | A-b_1 |
| \s | Whitespace | a\sb\sc | a b c |
| \D | A non digit | \D\D\D | ABC |
| \S | Non-alphanumeric | \W\W\W\W\W | *-+=) |
| \W | Non-whitespace| \S\S\S\S | Yoyo |

### Quantifiers
| Character| Description | Example Pattern Code | Example Match |
| --- | --- | --- | --- |
| + | Occurs one or more times | Version \w-\w+| Version A-b1_1 |
| {3} | Occurs exactly 3 times< | \D{3}| abc |
| {2,4} | Occurs 2 to 4 times | \d{2,4} | 123 |
| {3,} | Occurs 3 or more | \w{3,} | anycharacters |
| \* | Occurs zero or more times | A\*B\*C* | AAACC |
| ? | Once or none| plurals? | plural |

In [28]:
import re
text = "To email Guido, try guido@python.org or the older address guido@google.com!"
# find a specific caracter 
carac = re.compile('@')
print(carac.search(text))

# split on whitespaces
spaces = re.compile('\s+')
print(spaces.split(text))

# find matching pattern (group caracters with [])
email = re.compile('\w+@\w+\.[a-z]{3}')
print(email.findall(text))
# email.search(text) will only return the first value

# replace
print(email.sub('--@--.--', text))

# remove punctuation (one liner)
print(''.join(re.findall(r'[^\!.,?]+', text))) 
#[\W] will remove all special characters


<re.Match object; span=(25, 26), match='@'>
['To', 'email', 'Guido,', 'try', 'guido@python.org', 'or', 'the', 'older', 'address', 'guido@google.com!']
['guido@python.org', 'guido@google.com']
To email Guido, try --@--.-- or the older address --@--.--!
To email Guido try guido@pythonorg or the older address guido@googlecom


# Input / Output files

In [13]:
import os
# list all file in current directory (ls in bash)
print(os.listdir())
# get current working directory (pwd in bash)
print(os.getcwd())

['cities.csv', 'data', 'm1a-Python.ipynb', 'm1b-Algorithms.ipynb', 'm2a-Pandas.ipynb', 'm2b-Vizualisation.ipynb']
c:\Users\user\git\work\summary cb-ds-1


In [14]:
# Ouput a txt or csv file
with open('cities.csv', 'w') as f:
    f.write(
    """city, population
new york, 8244910
los angeles, 3819702""")

# Read file
with open('cities.csv', 'r') as f:
    # return a str
    #cities = f.read()

    # or return a list
    lines = f.readlines()
    cities_list = []
    for line in lines:
        fields = line.replace('\n', '').strip().split(',')
        cities_list.append(fields)

#print(cities)
print(cities_list)

[['city', ' population'], ['new york', ' 8244910'], ['los angeles', ' 3819702']]


# Classes

In [15]:
class BankAccount():
    # define the class, variables and default values
    def __init__(self, initial_deposit=0):
        self.balance = initial_deposit
        self.investment = 0

    # methods are used to modify the class
    def deposit(self, x):
        self.balance += x

    def withdraw(self, x):
        self.balance -= x

    # some methods are reserved to use operators
    def __add__(self, x):
        self.balance += x

    def __sub__(self, x):
        self.balance -= x

# sub-classes can inherit properties and methods
class Rrsp(BankAccount):
    def invest(self, x):
        self.investment += x
        self.balance -= x
        
    def show(self):
        return print(f"investment = {self.investment}, balance = {self.balance}")

In [16]:
account = BankAccount()
print(account.balance)
account.deposit(5)
print(account.balance)
account + 10
print(account.balance)
account - 7
print(account.balance)

0
5
15
8


In [17]:
rrsp = Rrsp()
rrsp + 15 # inherited method from BankAccount
rrsp.invest(5)
rrsp.show()

investment = 5, balance = 10


# Numpy

In [18]:
import numpy as np
#np.__version__

In [19]:
# Get dimensions of an array
a = np.array([[2,3,4], [6,7,8]])
np.shape(a)
#np.shape(a)[0] # return cols (2)
#np.shape(a)[1] # return rows (3)

(2, 3)

In [20]:
# Generate arrays
print(np.zeros(10, dtype=int)) # can also use eye, ones and full
print(np.random.rand(5)) # return 5 random values between 0 and 1
print(np.random.randint(0, 9, 10)) # return 10 random values between 0 and 9
print(np.arange(1, 10)) # return a range
print(np.linspace(-1,1,10).reshape(2,5)) # return equaly spaced range

[0 0 0 0 0 0 0 0 0 0]
[0.48243097 0.20126146 0.0809347  0.98036049 0.92561925]
[4 4 4 0 6 6 4 4 3 8]
[1 2 3 4 5 6 7 8 9]
[[-1.         -0.77777778 -0.55555556 -0.33333333 -0.11111111]
 [ 0.11111111  0.33333333  0.55555556  0.77777778  1.        ]]


In [21]:
# Operations on arrays
a = np.array((4, 3, 2, 1))

a.sum()               # Sum
a.mean()              # Mean
a.max()               # Max
a.argmax()            # Returns the index of the maximal element
a.cumsum()            # Cumulative sum of the elements of a
a.cumprod()           # Cumulative product of the elements of a
a.var()               # Variance
a.std()               # Standard deviation
a.T                   # Transpose an array
a.sort()              # Sort an array

In [22]:
# Indexing
x = np.array([
    [3, 5, 2, 4],
    [9, 6, 8, 0],
    [1, 6, 3, 7]]
)
# index (row, column)
print(x[2, -1])

# can also work with [start : stop : step]
print(x[0:,2])

7
[2 8 3]


In [23]:
# Advanced operations
x = np.arange(1, 6)
print(x)
print(x.cumsum()) # or np.add.accumulate(x)
print(x.cumprod())

[1 2 3 4 5]
[ 1  3  6 10 15]
[  1   2   6  24 120]


In [24]:
# Filtering
x = np.arange(0, 10)
np.where(x > 5) # or x[x >= 5 ]

(array([6, 7, 8, 9], dtype=int64),)

# Magic commands

%magic

%timeit myFunction() - return the time of execution

%run myCode.py - run code file

%who - list current variables
