# Snippets

Some useful piece of code.

## Argsort

Get the indices of the items in an array sorted in decreasing order (largest to smallest) _acording to the value of the items_.

In [1]:
import numpy as np

# We create an array.
x = np.array([4, 3, 5, 1, 2])
# Argsort sort in increasing order, we use a trick: [::-1]
decreasing_indices = np.argsort(x)[::-1]

print(x[decreasing_indices])

[5 4 3 2 1]


## Defaultdict

Initialise a dictionary with a default type at each potential key.

In [2]:
from collections import defaultdict

# We create a dict where each potential key is an int
d = defaultdict(int)

# We add one to the key 'a'
d['a'] += 1
# We add five to the key 'b'
d['b'] += 5

# The dict has indeed the correct values
print(d)
# It has 0 for another key that we have never set
print(d['c'])

defaultdict(<class 'int'>, {'b': 5, 'a': 1})
0


## Sparse matrices

In [3]:
import numpy as np
from scipy.sparse import csr_matrix

# Define the size of the matrix
m = 10000
n = 100

# Set some values
values = [1,
          42,
          3.14156926536,
          100,
          1000]

# Define their position in the matrix
rows = [0,
        5,
        100,
        2673,
        9999]
columns = [0,
           0,
           0,
           62,
           99]

X = csr_matrix((values, (rows, columns)), shape=(m, n))
print(X)

  (0, 0)	1.0
  (5, 0)	42.0
  (100, 0)	3.14156926536
  (2673, 62)	100.0
  (9999, 99)	1000.0


## Get value with index from RDD

In [None]:
# Create an RDD
values = sc.parallelize(['a', 'b', 'c', 'd'])

# Map each value with its index (a -> 0, b -> 1, ...)
values_with_index = values.zipWithIndex()

print(values_with_index.take(4))

## Spark's sparse vectors 

In [None]:
from pyspark.mllib.linalg import Vectors 

# d-dimensional vector
d = 1000

# Set indices
indices = [0, 10, 100, 999]
values = [1, 3.14, 42, 11]

# Create the sparse vector
v = Vectors.sparse(d, indices, values)

print(v)

## Save JSON

You could have a look at the `utils.py` file.

In [1]:
from utils import save_json

dictionaries = [
    {'a': 0},
    {'b': 1},
    {'c': 2}
]

save_json(dictionaries, 'dictionaries.txt')

## Load JSON

In [2]:
from utils import load_json

dictionaries = load_json('dictionaries.txt')
print(dictionaries)

[{'a': 0}, {'b': 1}, {'c': 2}]
