# NumPy/SciPy

## 모양 변형

In [1]:
import numpy as np

In [3]:
np.lookfor('shape')

Search results for 'shape'
--------------------------
numpy.shape
    Return the shape of an array.
numpy.reshape
    Gives a new shape to an array without changing its data.
numpy.flip
    Reverse the order of elements in an array along the given axis.
numpy.full
    Return a new array of given shape and type, filled with `fill_value`.
numpy.isin
    Calculates `element in test_elements`, broadcasting over `element` only.
numpy.ones
    Return a new array of given shape and type, filled with ones.
numpy.empty
    Return a new array of given shape and type, without initializing entries.
numpy.zeros
    Return a new array of given shape and type, filled with zeros.
numpy.resize
    Return a new array with the specified shape.
numpy.ndarray
    ndarray(shape, dtype=float, buffer=None, offset=0,
numpy.ndindex
    An N-dimensional iterator object to index arrays.
numpy.squeeze
    Remove single-dimensional entries from the shape of an array.
numpy.ma.shape
    Return the shape of an array.

In [4]:
np.lookfor('* array')

Search results for '* array'
----------------------------
numpy.exp
    Calculate the exponential of all elements in the input array.
numpy.cbrt
    Return the cube-root of an array, element-wise.
numpy.exp2
    Calculate `2**p` for all `p` in the input array.
numpy.fmax
    Element-wise maximum of array elements.
numpy.fmin
    Element-wise minimum of array elements.
numpy.modf
    Return the fractional and integral parts of an array, element-wise.
numpy.rint
    Round elements of the array to the nearest integer.
numpy.sqrt
    Return the non-negative square-root of an array, element-wise.
numpy.expm1
    Calculate ``exp(x) - 1`` for all elements in the array.
numpy.isnan
    Test element-wise for NaN and return result as a boolean array.
numpy.isnat
    Test element-wise for NaT (not a time) and return result as a boolean array.
numpy.log10
    Return the base 10 logarithm of the input array, element-wise.
numpy.log1p
    Return the natural logarithm of one plus the input array, ele

- lookfor 와 info 는 중요한 명령어이다.

In [5]:
a = np.array(100)

In [6]:
# 이건 0 차
a 

array(100)

In [7]:
a = np.array([100])

In [9]:
# 이건 1차
a

array([100])

In [12]:
a = np.array([[1,2,3],[4,5,6]])

In [13]:
# 이건 2차
a

array([[1, 2, 3],
       [4, 5, 6]])

- axis = 0 은 low
- axis = 1 은 column

In [38]:
a = np.arange(27)

In [39]:
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26])

In [40]:
# 1차. 왜 ,(콤마)로 표현되어 있을까?
a.shape

(27,)

In [41]:
# reshape 주의해야할 점. 뮤터블이 아님! 자기자신이 안바뀜. 
a.reshape(3,9)

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8],
       [ 9, 10, 11, 12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23, 24, 25, 26]])

In [21]:
# 그래서 안바뀜. 
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26])

In [22]:
# 자기 자신을 변화시키기 위해서는 재할당이 필요하다.
# 파이썬으로 따지ㅁㄴ
b = a.reshape(3,9)

In [23]:
b[0][0] = 100

In [24]:
a

array([100,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26])

In [25]:
# copy()를 사용해 막을 수 있다.
b = a.reshape(3,9).copy()

In [26]:
b[0][0] = 50

In [27]:
a

array([100,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26])

In [28]:
b = a.reshape(-1,9)

In [29]:
b

array([[100,   1,   2,   3,   4,   5,   6,   7,   8],
       [  9,  10,  11,  12,  13,  14,  15,  16,  17],
       [ 18,  19,  20,  21,  22,  23,  24,  25,  26]])

In [32]:
b = a.reshape(3,-100)

In [33]:
b

array([[100,   1,   2,   3,   4,   5,   6,   7,   8],
       [  9,  10,  11,  12,  13,  14,  15,  16,  17],
       [ 18,  19,  20,  21,  22,  23,  24,  25,  26]])

In [42]:
a.resize((3,9))

In [43]:
a

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8],
       [ 9, 10, 11, 12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23, 24, 25, 26]])

- reshape 재할당
- resize 자기 자신을 바꿈 (개수를 안 맞춰도 됨)

In [61]:
a = np.arange(27)

In [53]:
a.resize((3,5))

In [54]:
a

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8],
       [ 9, 10, 11, 12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23, 24, 25, 26]])

In [58]:
# 크기를 작게 주면 짤리고, 많이 주면 0이 채워진다.
a.resize((3,5))
a

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

----------------------

### 배열 합치고, 분할하기

- split() : list 반환
- hsplit() : split(axis=1)
- vsplit() : split(axis=0)

In [62]:
a = a.reshape(3,9)

In [63]:
a

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8],
       [ 9, 10, 11, 12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23, 24, 25, 26]])

In [64]:
np.split(a,1)

[array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8],
        [ 9, 10, 11, 12, 13, 14, 15, 16, 17],
        [18, 19, 20, 21, 22, 23, 24, 25, 26]])]

In [65]:
np.split(a,(1,2))

[array([[0, 1, 2, 3, 4, 5, 6, 7, 8]]),
 array([[ 9, 10, 11, 12, 13, 14, 15, 16, 17]]),
 array([[18, 19, 20, 21, 22, 23, 24, 25, 26]])]

In [66]:
np.split(a,(2,2))

[array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8],
        [ 9, 10, 11, 12, 13, 14, 15, 16, 17]]),
 array([], shape=(0, 9), dtype=int32),
 array([[18, 19, 20, 21, 22, 23, 24, 25, 26]])]

In [67]:
np.split(a,(2,5))

[array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8],
        [ 9, 10, 11, 12, 13, 14, 15, 16, 17]]),
 array([[18, 19, 20, 21, 22, 23, 24, 25, 26]]),
 array([], shape=(0, 9), dtype=int32)]

In [68]:
# -(마이너스) 값은 0 이 기준점이 된다.
np.split(a,(-2,5))

[array([[0, 1, 2, 3, 4, 5, 6, 7, 8]]),
 array([[ 9, 10, 11, 12, 13, 14, 15, 16, 17],
        [18, 19, 20, 21, 22, 23, 24, 25, 26]]),
 array([], shape=(0, 9), dtype=int32)]

In [69]:
np.split(a,(5,5), axis=1)

[array([[ 0,  1,  2,  3,  4],
        [ 9, 10, 11, 12, 13],
        [18, 19, 20, 21, 22]]),
 array([], shape=(3, 0), dtype=int32),
 array([[ 5,  6,  7,  8],
        [14, 15, 16, 17],
        [23, 24, 25, 26]])]

In [70]:
np.hsplit(a,(5,))

[array([[ 0,  1,  2,  3,  4],
        [ 9, 10, 11, 12, 13],
        [18, 19, 20, 21, 22]]), array([[ 5,  6,  7,  8],
        [14, 15, 16, 17],
        [23, 24, 25, 26]])]

In [71]:
np.vsplit(a,(2,))

[array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8],
        [ 9, 10, 11, 12, 13, 14, 15, 16, 17]]),
 array([[18, 19, 20, 21, 22, 23, 24, 25, 26]])]

In [72]:
b = np.arange(5)

In [73]:
c = np.arange(5)

In [74]:
np.stack((b,c), 1)

array([[0, 0],
       [1, 1],
       [2, 2],
       [3, 3],
       [4, 4]])

In [75]:
np.hstack((b,c))

array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4])

In [79]:
c = np.arange(5,10)

In [80]:
np.vstack((b,c))

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [81]:
# r_ : row
np.r_[b,c]

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [82]:
# c_ : column
np.c_[b,c]

array([[0, 5],
       [1, 6],
       [2, 7],
       [3, 8],
       [4, 9]])

In [83]:
# s_ 와 같은 기능 : slice 객체를 만들어줌
[1,2,3,4,5,][slice(1,5)]

[2, 3, 4, 5]

In [84]:
np.arange(10)[np.s_[1]]

1

In [85]:
np.column_stack([b,c])

array([[0, 5],
       [1, 6],
       [2, 7],
       [3, 8],
       [4, 9]])

In [86]:
np.row_stack([b,c])

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

In [96]:
a = np.arange(6)

In [88]:
a

array([0, 1, 2, 3, 4, 5])

In [90]:
# a 를 2차원으로 바꾸기
a.reshape(6,1)

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5]])

In [91]:
a = np.array([[0,1,2,3,4,5]])

In [92]:
# 이렇게 변화시키고 싶을 때는 어떻게 해야하냐
a

array([[0, 1, 2, 3, 4, 5]])

In [97]:
a[np.newaxis]

array([[0, 1, 2, 3, 4, 5]])

In [93]:
np.newaxis is None

True

In [98]:
a = np.arange(6)
a = a.reshape(2,3)
a

array([[0, 1, 2],
       [3, 4, 5]])

In [99]:
# 3차원
a[np.newaxis]

array([[[0, 1, 2],
        [3, 4, 5]]])

In [100]:
# 차원 증가
a[:,np.newaxis]

array([[[0, 1, 2]],

       [[3, 4, 5]]])

In [101]:
a[:,:,np.newaxis]

array([[[0],
        [1],
        [2]],

       [[3],
        [4],
        [5]]])

In [102]:
# shape 을 자유자재로 변하게 시키기 위해 np.newaxis 를 쓴다.
a = np.arange(6)
a[:,np.newaxis]

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5]])

In [119]:
# np.expand_dims() : 차원이 1개만 가능하다.
a = np.arange(6)
np.expand_dims(a,1)

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5]])

In [108]:
a = np.arange(27).reshape(3,3,3)
a

array([[[ 0,  1,  2],
        [ 3,  4,  5],
        [ 6,  7,  8]],

       [[ 9, 10, 11],
        [12, 13, 14],
        [15, 16, 17]],

       [[18, 19, 20],
        [21, 22, 23],
        [24, 25, 26]]])

In [117]:
np.swapaxes(a, 0, 2)

array([[[ 0,  9, 18],
        [ 3, 12, 21],
        [ 6, 15, 24]],

       [[ 1, 10, 19],
        [ 4, 13, 22],
        [ 7, 16, 25]],

       [[ 2, 11, 20],
        [ 5, 14, 23],
        [ 8, 17, 26]]])

In [107]:
np.moveaxis(a, 0, 2)

array([[[ 0,  9, 18],
        [ 1, 10, 19],
        [ 2, 11, 20]],

       [[ 3, 12, 21],
        [ 4, 13, 22],
        [ 5, 14, 23]],

       [[ 6, 15, 24],
        [ 7, 16, 25],
        [ 8, 17, 26]]])

-----------------------------

In [120]:
# 정형화 데이터
x = np.array([('Rex', 9, 81.0), ('Fido', 3, 27.0)],
             dtype=[('name', 'U10'), ('age', 'i4'), ('weight', 'f4')])
x

array([('Rex', 9, 81.), ('Fido', 3, 27.)],
      dtype=[('name', '<U10'), ('age', '<i4'), ('weight', '<f4')])

In [121]:
# DB 처럼 사용할 수 있다.
x[0]

('Rex', 9, 81.)

In [122]:
from collections import namedtuple

In [123]:
Address = namedtuple('Address',('name', 'age', 'weight'))

In [125]:
a1 = Address('Rex', 27, 81.0)

In [126]:
a1

Address(name='Rex', age=27, weight=81.0)

In [131]:
a1[0]

'Rex'

In [127]:
dir(a1)

['__add__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getnewargs__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__module__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmul__',
 '__setattr__',
 '__sizeof__',
 '__slots__',
 '__str__',
 '__subclasshook__',
 '_asdict',
 '_fields',
 '_fields_defaults',
 '_make',
 '_replace',
 'age',
 'count',
 'index',
 'name',
 'weight']

In [133]:
x['name']

array(['Rex', 'Fido'], dtype='<U10')

In [136]:
'name' in dir(x)

False

---------------------------------

In [138]:
# 연습 데이터 불러오기
import seaborn as sns

In [140]:
tips = sns.load_dataset('tips')

In [141]:
tips

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
5,25.29,4.71,Male,No,Sun,Dinner,4
6,8.77,2.00,Male,No,Sun,Dinner,2
7,26.88,3.12,Male,No,Sun,Dinner,4
8,15.04,1.96,Male,No,Sun,Dinner,2
9,14.78,3.23,Male,No,Sun,Dinner,2


In [143]:
# key 방식으로 데이터 뽑기
tips['day']

0       Sun
1       Sun
2       Sun
3       Sun
4       Sun
5       Sun
6       Sun
7       Sun
8       Sun
9       Sun
10      Sun
11      Sun
12      Sun
13      Sun
14      Sun
15      Sun
16      Sun
17      Sun
18      Sun
19      Sat
20      Sat
21      Sat
22      Sat
23      Sat
24      Sat
25      Sat
26      Sat
27      Sat
28      Sat
29      Sat
       ... 
214     Sat
215     Sat
216     Sat
217     Sat
218     Sat
219     Sat
220     Fri
221     Fri
222     Fri
223     Fri
224     Fri
225     Fri
226     Fri
227     Sat
228     Sat
229     Sat
230     Sat
231     Sat
232     Sat
233     Sat
234     Sat
235     Sat
236     Sat
237     Sat
238     Sat
239     Sat
240     Sat
241     Sat
242     Sat
243    Thur
Name: day, Length: 244, dtype: category
Categories (4, object): [Thur, Fri, Sat, Sun]

In [144]:
# attribute 방식!
tips.smoker

0       No
1       No
2       No
3       No
4       No
5       No
6       No
7       No
8       No
9       No
10      No
11      No
12      No
13      No
14      No
15      No
16      No
17      No
18      No
19      No
20      No
21      No
22      No
23      No
24      No
25      No
26      No
27      No
28      No
29      No
      ... 
214    Yes
215    Yes
216    Yes
217    Yes
218    Yes
219    Yes
220    Yes
221    Yes
222    Yes
223     No
224    Yes
225    Yes
226    Yes
227     No
228     No
229    Yes
230    Yes
231    Yes
232     No
233     No
234    Yes
235     No
236    Yes
237    Yes
238     No
239     No
240    Yes
241    Yes
242     No
243     No
Name: smoker, Length: 244, dtype: category
Categories (2, object): [Yes, No]

In [145]:
# size 의 데이터를 불러오지 않고 파이썬 내의 size 값이 나온다.
# 그래서 key 값으로 불러와야 한다.
tips.size

1708

In [151]:
# 이름을 숫자로 넣을 수가 없을까...? 이름에 무조건 문자열만 들어갈 수 있다.
x = np.array([('Rex', 9, 81.0), ('Fido', 3, 27.0)],
             dtype=[(0, 'U10'), ('age', 'i4'), ('weight', 'f4')])

TypeError: data type not understood

In [152]:
tips.iloc[0]

total_bill     16.99
tip             1.01
sex           Female
smoker            No
day              Sun
time          Dinner
size               2
Name: 0, dtype: object

In [153]:
tips.loc[0]

total_bill     16.99
tip             1.01
sex           Female
smoker            No
day              Sun
time          Dinner
size               2
Name: 0, dtype: object

## pandas

- pandas : 직사각형 표 형태의 data 타입
- 저장방식, 사용방식이 쉽다

    가로줄 : column
    세로줄(첫번째 줄) : index

In [154]:
tips

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
5,25.29,4.71,Male,No,Sun,Dinner,4
6,8.77,2.00,Male,No,Sun,Dinner,2
7,26.88,3.12,Male,No,Sun,Dinner,4
8,15.04,1.96,Male,No,Sun,Dinner,2
9,14.78,3.23,Male,No,Sun,Dinner,2


In [155]:
# fancy indexing 으로 동시에 두 개의 데이터 추출가능
tips[['day', 'time']]

Unnamed: 0,day,time
0,Sun,Dinner
1,Sun,Dinner
2,Sun,Dinner
3,Sun,Dinner
4,Sun,Dinner
5,Sun,Dinner
6,Sun,Dinner
7,Sun,Dinner
8,Sun,Dinner
9,Sun,Dinner


In [156]:
tips.tip +5

0       6.01
1       6.66
2       8.50
3       8.31
4       8.61
5       9.71
6       7.00
7       8.12
8       6.96
9       8.23
10      6.71
11     10.00
12      6.57
13      8.00
14      8.02
15      8.92
16      6.67
17      8.71
18      8.50
19      8.35
20      9.08
21      7.75
22      7.23
23     12.58
24      8.18
25      7.34
26      7.00
27      7.00
28      9.30
29      8.00
       ...  
214    11.50
215     6.10
216     8.00
217     6.50
218     6.44
219     8.09
220     7.20
221     8.48
222     6.92
223     8.00
224     6.58
225     7.50
226     7.00
227     8.00
228     7.72
229     7.88
230     7.00
231     8.00
232     8.39
233     6.47
234     8.00
235     6.25
236     6.00
237     6.17
238     9.67
239    10.92
240     7.00
241     7.00
242     6.75
243     8.00
Name: tip, Length: 244, dtype: float64

In [157]:
tips.tip.sum()

731.5799999999999

In [162]:
# index 기반
tips.loc[3:6]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
5,25.29,4.71,Male,No,Sun,Dinner,4
6,8.77,2.0,Male,No,Sun,Dinner,2


In [163]:
# 파이썬 index 기반
tips.iloc[3:6]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
5,25.29,4.71,Male,No,Sun,Dinner,4


In [167]:
tips.loc[3:6,['day', 'smoker']]

Unnamed: 0,day,smoker
3,Sun,No
4,Sun,No
5,Sun,No
6,Sun,No


In [169]:
tips.loc[3:6,['smoker','tip']]

Unnamed: 0,smoker,tip
3,No,3.31
4,No,3.61
5,No,4.71
6,No,2.0


In [170]:
tips.iloc[3:6, :-2]

Unnamed: 0,total_bill,tip,sex,smoker,day
3,23.68,3.31,Male,No,Sun
4,24.59,3.61,Female,No,Sun
5,25.29,4.71,Male,No,Sun


In [171]:
tips.iloc[3:6, ::-1]

Unnamed: 0,size,time,day,smoker,sex,tip,total_bill
3,2,Dinner,Sun,No,Male,3.31,23.68
4,4,Dinner,Sun,No,Female,3.61,24.59
5,4,Dinner,Sun,No,Male,4.71,25.29


In [172]:
tips[['tip', 'sex']]

Unnamed: 0,tip,sex
0,1.01,Female
1,1.66,Male
2,3.50,Male
3,3.31,Male
4,3.61,Female
5,4.71,Male
6,2.00,Male
7,3.12,Male
8,1.96,Male
9,3.23,Male


In [175]:
%time tips.iat[3,5]

Wall time: 0 ns


'Dinner'

In [176]:
%time tips.at[3,'time']

Wall time: 998 µs


'Dinner'