# SD212: Graph mining

# Python basics

This notebook presents some Python basics:
* lists, sets, dictionaries
* numpy arrays
* files (for loading / saving data)

Recall that you can use:
* `tab` for completion
* ? for inline help

In [1]:
# example
sorted?

## Import

In [2]:
import numpy as np

## List

In [3]:
names = ['Alice', 'Bernard', 'Carole', 'David']

In [4]:
names[-2:]

['Carole', 'David']

In [5]:
names.append('Elodie')

In [6]:
names += ['Ferdinand', 'Gabrielle']

In [7]:
names

['Alice', 'Bernard', 'Carole', 'David', 'Elodie', 'Ferdinand', 'Gabrielle']

In [8]:
names_sublist = [name for name in names if 'i' not in name]

In [9]:
names_sublist

['Bernard', 'Carole']

## Set

In [10]:
set(names)

{'Alice', 'Bernard', 'Carole', 'David', 'Elodie', 'Ferdinand', 'Gabrielle'}

In [11]:
set(names_sublist) <= set(names)

True

In [12]:
set(names) - set(names_sublist)

{'Alice', 'David', 'Elodie', 'Ferdinand', 'Gabrielle'}

In [13]:
names += ['Alice']

In [14]:
len(names)

8

In [15]:
len(set(names))

7

In [16]:
a = set(names)

In [17]:
b = {'Hector', 'Irina'}

In [18]:
a & b

set()

In [19]:
a | b

{'Alice',
 'Bernard',
 'Carole',
 'David',
 'Elodie',
 'Ferdinand',
 'Gabrielle',
 'Hector',
 'Irina'}

In [20]:
b.add('Alice')

In [21]:
a & b

{'Alice'}

## Dictionary

In [22]:
len(names)

8

In [23]:
length = {name: len(name) for name in names}

In [24]:
len(length)

7

In [25]:
list(length.keys())

['Alice', 'Bernard', 'Carole', 'David', 'Elodie', 'Ferdinand', 'Gabrielle']

In [26]:
list(length.values())

[5, 7, 6, 5, 6, 9, 9]

In [27]:
for name in length:
    print(name, length[name])

Alice 5
Bernard 7
Carole 6
David 5
Elodie 6
Ferdinand 9
Gabrielle 9


In [28]:
length_to_add = {name: len(name) for name in b}

In [29]:
length.update(length_to_add)

In [30]:
for name in length:
    print(name, length[name])

Alice 5
Bernard 7
Carole 6
David 5
Elodie 6
Ferdinand 9
Gabrielle 9
Hector 6
Irina 5


## Numpy

In [31]:
vector = np.array([5, 1, 2, 4])

In [32]:
np.zeros_like(vector, dtype=int)

array([0, 0, 0, 0])

In [33]:
vector[-2:]

array([2, 4])

In [34]:
np.sort(vector)

array([1, 2, 4, 5])

In [35]:
np.argsort(vector)

array([1, 2, 3, 0])

In [36]:
np.argsort(-vector)

array([0, 3, 2, 1])

In [37]:
matrix = np.arange(12).reshape(3, -1)

In [26]:
matrix

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [27]:
matrix.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [28]:
matrix.sum(axis = 0)

array([12, 15, 18, 21])

In [29]:
matrix.sum(axis = 1)

array([ 6, 22, 38])

In [30]:
matrix.dot(vector)

array([ 17,  65, 113])

In [31]:
matrix[:2]

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [32]:
matrix[:2][:,1:]

array([[1, 2, 3],
       [5, 6, 7]])

In [33]:
indices = np.array([0, 1, 1, 2, 0])

In [34]:
vector[indices]

array([5, 1, 1, 2, 5])

In [35]:
v = vector[indices]

In [36]:
np.unique(v)

array([1, 2, 5])

In [37]:
np.unique(v, return_counts=True)

(array([1, 2, 5]), array([2, 1, 2]))

In [38]:
vector = np.arange(-3, 10)

In [39]:
vector > 0

array([False, False, False, False,  True,  True,  True,  True,  True,
        True,  True,  True,  True])

In [40]:
np.sum(vector > 0)

9

In [41]:
vector[vector > 0]

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [42]:
index = np.argwhere(vector > 0).ravel()

In [44]:
index

array([ 4,  5,  6,  7,  8,  9, 10, 11, 12])

In [43]:
vector[index]

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [60]:
np.repeat(np.arange(5), np.arange(5))

array([1, 2, 2, 3, 3, 3, 4, 4, 4, 4])

In [61]:
np.repeat(np.arange(5), 2 * np.arange(5))

array([1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4])

In [None]:
np.add.reduceat(np.arange(10), [0, 1, 2, 3, 4])

In [None]:
np.add.reduceat(np.arange(10), [0, 4, 6])

In [None]:
np.add.reduceat(np.arange(12).reshape(4, -1), [0, 2, 3])

In [None]:
np.add.reduceat(np.arange(12).reshape(4, -1), [0, 2], axis=1)

In [17]:
np.random.choice(6)

3

In [20]:
np.random.choice([1,3,6])

1

In [39]:
np.random.choice([1,3,6], p=[0.1, 0.8, 0.1], size=4)

array([3, 6, 3, 3])

In [None]:
np.save('vector', vector)

In [None]:
np.load('vector.npy')

## Files

In [None]:
with open('names.txt', 'w') as f:
    for name in names:
        f.write(name + '\n')

In [None]:
names_load = []
with open('names.txt', 'r') as f:
    for row in f:
        # remove \n
        names_load.append(row[:-1])

In [None]:
names_load