### df to dict

In [6]:
import numpy as np
import pandas as pd
import pandasql as ps


df = pd.DataFrame([[1234, 'Customer A', '123 Street', np.nan],
               [1234, 'Customer A', np.nan, '333 Street'],
               [1233, 'Customer B', '444 Street', '333 Street'],
              [1233, 'Customer B', '444 Street', '666 Street']], columns=
['ID', 'Customer', 'Billing Address', 'Shipping Address'])

df

Unnamed: 0,ID,Customer,Billing Address,Shipping Address
0,1234,Customer A,123 Street,
1,1234,Customer A,,333 Street
2,1233,Customer B,444 Street,333 Street
3,1233,Customer B,444 Street,666 Street


In [9]:
# method 1
{row[0]: row[1] for row in df.values}

{1233: 'Customer B', 1234: 'Customer A'}

In [11]:
# another method
df.to_dict()

{'Billing Address': {0: '123 Street',
  1: nan,
  2: '444 Street',
  3: '444 Street'},
 'Customer': {0: 'Customer A',
  1: 'Customer A',
  2: 'Customer B',
  3: 'Customer B'},
 'ID': {0: 1234, 1: 1234, 2: 1233, 3: 1233},
 'Shipping Address': {0: nan,
  1: '333 Street',
  2: '333 Street',
  3: '666 Street'}}

### SQL query on pandas DataFrame

In [4]:
import numpy as np
import pandas as pd
import pandasql as ps


df = pd.DataFrame([[1234, 'Customer A', '123 Street', np.nan],
               [1234, 'Customer A', np.nan, '333 Street'],
               [1233, 'Customer B', '444 Street', '333 Street'],
              [1233, 'Customer B', '444 Street', '666 Street']], columns=
['ID', 'Customer', 'Billing Address', 'Shipping Address'])

df

Unnamed: 0,ID,Customer,Billing Address,Shipping Address
0,1234,Customer A,123 Street,
1,1234,Customer A,,333 Street
2,1233,Customer B,444 Street,333 Street
3,1233,Customer B,444 Street,666 Street


In [5]:
ps.sqldf('select ID from df', locals())

Unnamed: 0,ID
0,1234
1,1234
2,1233
3,1233


### numpy - pad

- https://docs.scipy.org/doc/numpy-1.12.0/reference/generated/numpy.pad.html
- CNN에서 padding할 때 사용

- Parameters
 - array: padding할 array
 - pad_width: 각 axis마다 (좌측에 몇개, 우측에 몇개) padding할지를 지정한다. 
 - mode: 'constant' 만 써봄. 

- axis 순서는 가장 바깥쪽부터 list에 element를 append하는 것 처럼 순차적으로 padding 한다.

In [22]:
ts2 = np.array([1, 1, 1])
ts2

array([1, 1, 1])

In [23]:
np.pad(ts2, (2, 3), 'constant', constant_values=(2, 3))

array([2, 2, 1, 1, 1, 3, 3, 3])

> 아래 코드에서는 0번째 axis의 좌측에 6을 1개 우측에 7을 2개, 1번째 axis의 좌측에 8을 4개 우측에 9를 5개 padding 하고 있다.

In [20]:
ts1 = np.array([[1, 1, 1, 1], [2, 2, 2, 2]]); ts1

array([[1, 1, 1, 1],
       [2, 2, 2, 2]])

In [21]:
np.pad(ts1, ((1, 2), (4,5)), 'constant', constant_values = ((6, 7), (8, 9)))

array([[8, 8, 8, 8, 6, 6, 6, 6, 9, 9, 9, 9, 9],
       [8, 8, 8, 8, 1, 1, 1, 1, 9, 9, 9, 9, 9],
       [8, 8, 8, 8, 2, 2, 2, 2, 9, 9, 9, 9, 9],
       [8, 8, 8, 8, 7, 7, 7, 7, 9, 9, 9, 9, 9],
       [8, 8, 8, 8, 7, 7, 7, 7, 9, 9, 9, 9, 9]])

### numpy -  Matrix multiplication and product with other
- multiplication과 product는 다르다.
- multiplication은 곱셈연산자나 np.multiply를 사용해야한다.
- product는 np.dot을 사용한다.

In [44]:
X = np.array([[1, 1, 1], [2, 2, 2]])  # (2, 3)
Y = np.array([[0.1, 0.1, 0.1], [0.01, 0.01, 0.01]])  # (2, 3)
Z = np.array([[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]])  # (3, 2)
print('X.shape:{}, Y.shape:{}, Z.shape:{}'.format(X.shape, Y.shape, Z.shape))

X.shape:(2, 3), Y.shape:(2, 3), Z.shape:(3, 2)


In [45]:
X*Y

array([[ 0.1 ,  0.1 ,  0.1 ],
       [ 0.02,  0.02,  0.02]])

In [46]:
np.multiply(X, Y)

array([[ 0.1 ,  0.1 ,  0.1 ],
       [ 0.02,  0.02,  0.02]])

In [47]:
# error because of shape miss match
np.multiply(X, Z)

ValueError: operands could not be broadcast together with shapes (2,3) (3,2) 

In [48]:
np.dot(X, Z)

array([[ 0.6,  0.6],
       [ 1.2,  1.2]])

In [49]:
# error because of shape miss match
np.dot(X, Y)

ValueError: shapes (2,3) and (2,3) not aligned: 3 (dim 1) != 2 (dim 0)

### numpy - Matrix sum itself

In [50]:
X = np.array([[1, 1, 1], [2, 2, 2]])  # (2, 3)

In [56]:
s1 = np.sum(X, axis=0, keepdims=True)  # (1, 3)
print(s1, s1.shape)

[[3 3 3]] (1, 3)


In [58]:
s2 = np.sum(X, axis=0, keepdims=False)  # (3, )
print(s2, s2.shape)

[3 3 3] (3,)


In [65]:
# if don't specify axis, then will sum all of the elements
np.sum(X)

9

### numpy - Matrix sum with other

In [61]:
X = np.array([[1, 1, 1], [2, 2, 2]])  # (2, 3)
Y = np.array([[0.1, 0.1, 0.1], [0.01, 0.01, 0.01]])  # (2, 3)
K = np.array([1])

In [62]:
X + Y

array([[ 1.1 ,  1.1 ,  1.1 ],
       [ 2.01,  2.01,  2.01]])

In [63]:
X + K

array([[2, 2, 2],
       [3, 3, 3]])

### numpy - zeros
- https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.zeros.html

- Parameters
 - shape : 결과의 shape
 - dtype : 결과 element의 data-type
 - order : ?

In [2]:
np.zeros(5)

array([ 0.,  0.,  0.,  0.,  0.])

In [3]:
np.zeros((2, 3))

array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

.

### numpy - max, mean

In [22]:
X = np.ones((2, 3, 4)); X.shape

(2, 3, 4)

In [29]:
Y = np.max(X, axis=(1, 2)); Y

array([ 1.,  1.])

In [30]:
Y.shape

(2,)

### numpy.tile(A, reps)
- https://docs.scipy.org/doc/numpy/reference/generated/numpy.tile.html
- A를 reps만큼 반복한다.
 - A: array_like
 - reps : array_list(optional)

In [9]:
import numpy as np

a = np.array([0, 1, 2])
print('a:', a, a.shape)

np.tile(a, reps=(1, 2, 3, 4)).shape

a: [0 1 2] (3,)


(1, 2, 3, 12)