## Import Packages

In [1]:
import numpy as np
from itertools import product
from pandas import DataFrame

## Random Function

In [2]:
def fakedata(arg, min=0, max=1, size=None, decimals=6):
    """Generates random data.
    
    Parameters
    -----------
    arg : One of 'int', 'float', or 1D list or 1D tuple
    min : Lowest number to be drawn from the distribution
    max : Highest number to be drawn from the distribution.
          [This description is approximate. See Examples and documentation for np.random.randint and np.random.random_sample]
    size : int or tuple of ints, optional
    decimals : Number of decimal digits

    Returns
    --------
    samples : single item or ndarray of random data
    
    Examples
    --------
    >>> fakedata('float', min=50, max=70, size=2, decimals=1)
    array([55.8, 62.4])
    >>> fakedata('int', min=1000, max=5000)
    3322
    >>> fakedata(['A','B','C'], size = 8)
    array(['A', 'C', 'C', 'B', 'B', 'B', 'C', 'A'], dtype='<U1')
    
    """

    assert arg == 'int' or arg == 'float' or type(arg) == list or type(arg) == tuple 
    if arg == 'int':
        assert min < max
        return np.random.randint(min, high=max, size=size)
    elif arg == 'float':
        assert min < max
        return np.around(min+(max-min)*np.random.random_sample(size=size), decimals=decimals)
    else:
        return np.random.choice(arg, size)

## Lambda function

In [3]:
np.random.seed(0);
a = fakedata('int', min=0, max=9, size=10); a

array([5, 0, 3, 3, 7, 3, 5, 2, 4, 7])

In [4]:
# Very simple application
mysum = lambda x: np.sum(x)

In [5]:
mysum(a)

39

In [6]:
# Note duplicate computation of (x**2 + 0.01*x + 10)
myfunc = lambda x: np.average(x**2 + 0.01*x + 10)/np.std(x**2 + 0.01*x + 10)

In [7]:
myfunc(a)

1.774482433174956

In [8]:
# Note duplicate computation of (x**2 + 0.01*x + 10) is avoided
(lambda x: (lambda y: np.average(y)/np.std(y))(x**2 + 0.01*x + 10))(a)

1.774482433174956

In [9]:
ppcounts = lambda c1, c2:\
                            ('A',c1/(c1+c2)) if c1 > c2 else\
                           (('B',c2/(c1+c2)) if c2 > c1 else\
                           (('X',0.5) if c1 > 0 else\
                            ('X',np.nan)))

In [10]:
ppcounts(1, 2)

('B', 0.6666666666666666)

In [11]:
ppcounts(2, 1)

('A', 0.6666666666666666)

In [12]:
ppcounts(2, 2)

('X', 0.5)

In [13]:
ppcounts(0, 0)

('X', nan)

## np.add.at

#### Potential application: Updating counts during histogram construction

In [14]:
ainit = np.zeros(10)
a = np.zeros(10)
indices = [2, 4, 6, 8]
np.add.at(a, indices, 1)

In [15]:
DataFrame([ainit,a], index=['Input array','Output array'])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
Input array,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Output array,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0


In [16]:
ainit = np.zeros(10)
a = np.zeros(10)
indices = [2, 4, 6, 8, 2, 4]
np.add.at(a, indices, 10)

In [17]:
DataFrame([ainit,a], index=['Input array','Output array'])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
Input array,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Output array,0.0,0.0,20.0,0.0,20.0,0.0,10.0,0.0,10.0,0.0


##### Two dimensional application

In [18]:
np.random.seed(1)
ainit2 = np.zeros((5, 5)).astype('int32')
a2 = np.zeros((5, 5)).astype('int32')
indices2 = fakedata('int', min=0, max=5, size=(20, 2)).T
np.add.at(a2, tuple(indices2), 1)

In [19]:
DataFrame(indices2)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,3,0,3,0,4,1,4,4,4,4,4,1,1,1,0,1,0,2,0,1
1,4,1,0,1,4,2,2,3,2,2,1,0,1,1,4,0,3,1,3,1


In [20]:
DataFrame(ainit2)

Unnamed: 0,0,1,2,3,4
0,0,0,0,0,0
1,0,0,0,0,0
2,0,0,0,0,0
3,0,0,0,0,0
4,0,0,0,0,0


In [21]:
DataFrame(a2)

Unnamed: 0,0,1,2,3,4
0,0,2,0,2,1
1,2,3,1,0,0
2,0,1,0,0,0
3,1,0,0,0,1
4,0,1,3,1,1


## np.clip

#### Potential application: Ensuring that ONLY valid bins are accessed during histogram construction

In [22]:
a = np.arange(-6,6)
b = np.clip(a, 0, 4)

In [23]:
DataFrame([a,b], index=['Input array','Clipped array'])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
Input array,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5
Clipped array,0,0,0,0,0,0,0,1,2,3,4,4


In [24]:
a = np.arange(5,15)
b = np.clip(a, 10, 12)

In [25]:
DataFrame([a,b], index=['Input array','Clipped array'])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
Input array,5,6,7,8,9,10,11,12,13,14
Clipped array,10,10,10,10,10,10,11,12,12,12


## product

#### Potential application: Setting all the parameters of a Bayesian classifier

In [26]:
list(product(['a', 'b', 'c'],['X', 'Y']))

[('a', 'X'), ('a', 'Y'), ('b', 'X'), ('b', 'Y'), ('c', 'X'), ('c', 'Y')]

In [27]:
fun1 = lambda arg: arg+1

In [28]:
fun2 = lambda arg: arg+2

In [29]:
fun3 = lambda arg: arg+3

In [30]:
applyfun = lambda funarg: funarg[0](funarg[1])

In [31]:
list(map(applyfun, product([fun1, fun2, fun3],[5, 6, 7, 8])))

[6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, 11]

In [32]:
func1 = lambda arr: [np.sum(arr), 1]

In [33]:
func2 = lambda arr: [np.trace(arr), 2]

In [34]:
func3 = lambda arr: [np.linalg.det(arr), 3]

In [35]:
np.random.seed(0);
arr = fakedata('int', min=0, max=9, size=(2, 3, 3)); arr

array([[[5, 0, 3],
        [3, 7, 3],
        [5, 2, 4]],

       [[7, 6, 8],
        [8, 1, 6],
        [7, 7, 8]]])

In [36]:
list(map(applyfun, product([func1, func2, func3],arr)))

[[32, 1], [58, 1], [16, 2], [16, 2], [23.0, 3], [21.999999999999996, 3]]

## Dictionaries

#### Potential application: Applying Kesler's construction

In [37]:
np.random.seed(0);
a = fakedata('int', min=0, max=9, size=10); a

array([5, 0, 3, 3, 7, 3, 5, 2, 4, 7])

In [38]:
k = [chr(i) for i in range(65, 65+10)]; k

['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']

In [39]:
v = a; v

array([5, 0, 3, 3, 7, 3, 5, 2, 4, 7])

In [40]:
mydict = dict(zip(k, v)); mydict

{'A': 5,
 'B': 0,
 'C': 3,
 'D': 3,
 'E': 7,
 'F': 3,
 'G': 5,
 'H': 2,
 'I': 4,
 'J': 7}

In [41]:
somekey = 'D'

In [42]:
mydict[somekey]

3

In [43]:
fewkeys = ['D', 'A', 'J']

In [44]:
[mydict[k] for k in fewkeys] # Works, but looping may be undesirable

[3, 5, 7]

In [45]:
mydict[fewkeys] # Does not work. See workaround using 'itemgetter' below

TypeError: unhashable type: 'list'

In [46]:
from operator import itemgetter

In [47]:
itemgetter(*fewkeys)(mydict) #This works

(3, 5, 7)

In [48]:
# Get a list of keys
list(mydict)

['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']

In [49]:
# Create random letters
np.random.seed(0);
randomletters = [chr(i) for i in fakedata('int', min=65, max=65+26, size=10)]; randomletters

['M', 'P', 'V', 'A', 'D', 'D', 'H', 'J', 'T', 'V']

In [50]:
# Try to retrieve values given above random letters
[mydict[c] if c in mydict else 'No key '+c for c in randomletters]

['No key M', 'No key P', 'No key V', 5, 3, 3, 2, 7, 'No key T', 'No key V']

## np.unique

#### Potential application: Building confusion matrices

In [51]:
np.random.seed(0);
a = fakedata('int', min=0, max=9, size=10); a

array([5, 0, 3, 3, 7, 3, 5, 2, 4, 7])

In [52]:
np.unique(a)

array([0, 2, 3, 4, 5, 7])

In [53]:
np.unique(a, return_counts=True)

(array([0, 2, 3, 4, 5, 7]), array([1, 1, 3, 1, 2, 2], dtype=int64))

## np.ix_

#### Potential application: Getting submatrices within a given matrix

In [54]:
np.random.seed(0);
a = fakedata('int', min=0, max=9, size=(5, 4)); DataFrame(a)

Unnamed: 0,0,1,2,3
0,5,0,3,3
1,7,3,5,2
2,4,7,6,8
3,8,1,6,7
4,7,8,1,5


In [55]:
indexmat = np.ix_([0, 4, 3], [2, 1])

In [56]:
a[indexmat]

array([[3, 0],
       [1, 8],
       [6, 1]])

## np.argmax

#### Potential application: Applying inverse Kesler's construction

In [57]:
np.random.seed(0);
a = fakedata('int', min=0, max=9, size=(5, 4)); DataFrame(a)

Unnamed: 0,0,1,2,3
0,5,0,3,3
1,7,3,5,2
2,4,7,6,8
3,8,1,6,7
4,7,8,1,5


In [58]:
np.argmax(a, axis=0)

array([3, 4, 2, 2], dtype=int64)

In [59]:
DataFrame(np.argmax(a, axis=1))

Unnamed: 0,0
0,0
1,0
2,3
3,0
4,1


## np.c_

#### Potential application: Augmenting a data matrix

In [60]:
np.random.seed(0);
a = fakedata('int', min=0, max=9, size=(10, 2)); a

array([[5, 0],
       [3, 3],
       [7, 3],
       [5, 2],
       [4, 7],
       [6, 8],
       [8, 1],
       [6, 7],
       [7, 8],
       [1, 5]])

In [61]:
np.random.seed(1);
b = fakedata('int', min=0, max=9, size=(10, 2)); b

array([[5, 8],
       [5, 0],
       [0, 1],
       [7, 6],
       [2, 4],
       [5, 2],
       [4, 2],
       [4, 7],
       [7, 1],
       [7, 0]])

In [62]:
np.c_[a, b]

array([[5, 0, 5, 8],
       [3, 3, 5, 0],
       [7, 3, 0, 1],
       [5, 2, 7, 6],
       [4, 7, 2, 4],
       [6, 8, 5, 2],
       [8, 1, 4, 2],
       [6, 7, 4, 7],
       [7, 8, 7, 1],
       [1, 5, 7, 0]])