In [1]:
import numpy as np

x = np.array([1,2,3])
print(x)

[1 2 3]


In [3]:
# 크기가 (1,3)인 2차원
np.array([[1,2,3]])

array([[1, 2, 3]])

In [8]:
# 크기가 (1,3)인 2차원 열벡터
np.array([[1],[2],[3]])

array([[1],
       [2],
       [3]])

In [9]:
from scipy import linalg
s = np.array([2,3,1])

In [10]:
# 단위 벡터

s / linalg.norm(s)

array([0.53452248, 0.80178373, 0.26726124])

#### Norm

In [11]:
x = np.array([1,-3])
x / linalg.norm(x)

array([ 0.31622777, -0.9486833 ])

##### Norm2

In [12]:
x / (x**2).sum()**0.5

array([ 0.31622777, -0.9486833 ])

##### 영벡터 (zeros)

In [13]:
np.zeros(5)     # 실수(float)로 반환

array([0., 0., 0., 0., 0.])

In [14]:
np.zeros(5).shape

(5,)

In [15]:
s = (2,2)
np.zeros(s)

array([[0., 0.],
       [0., 0.]])

##### 선형 독립, 선형 종속
###### Rank

In [16]:
# matrix_rank()

x= np.array([[2,7],[5,1]])
np.linalg.matrix_rank(x)

2

In [17]:
x_2 = np.array([[2,4],[3,6]])
np.linalg.matrix_rank(x_2)

1

In [18]:
x_3 = np.array([[1,2],[3,5]])
np.linalg.matrix_rank(x_3)

2

In [21]:
# 벡터의 내적과 외적

x = [2,3]
y = [3,1]

# 벡터의 덧셈, 뺄샘(zip 함수 활용)

z = [i + j for i, j in zip(x,y)]
print(z)

[5, 4]


In [24]:
z_2 = [i-j for i, j in zip(x,y)]
print(z_2)

[-1, 2]


In [26]:
# 벡터의 곱셈(inner product, outer product)

x = [3,4]
c = 8

z = [c * i for i in x]
print(z)

[24, 32]


In [27]:
# 내적(inner product)

u = np.array([6,6])
v = np.array([12,0])

np.dot(u,v)

72

In [22]:
# zip 함수

list(zip([1,2,3],[4,5,6]))

[(1, 4), (2, 5), (3, 6)]

In [23]:
list(zip([1,2,3],[4,5,6],[7,8,9]))

[(1, 4, 7), (2, 5, 8), (3, 6, 9)]

#### 벡터의 내적 원리

In [5]:
import numpy as np

A = np.arange(1*2*3).reshape((1,2,3))
B1 = np.arange(1*2*3).reshape((1,2,3))
B2 = np.arange(1*2*3).reshape((1,3,2))
B3 = np.arange(1*2*3).reshape((2,1,3))
B4 = np.arange(1*2*3).reshape((2,3,1))
B5 = np.arange(1*2*3).reshape((3,1,2))
B6 = np.arange(1*2*3).reshape((3,2,1))

In [7]:
np.dot(A,B1)

ValueError: shapes (1,2,3) and (1,2,3) not aligned: 3 (dim 2) != 2 (dim 1)

In [8]:
np.dot(A, B2)

array([[[[10, 13]],

        [[28, 40]]]])

In [11]:
np.dot(A, B3)

ValueError: shapes (1,2,3) and (2,1,3) not aligned: 3 (dim 2) != 1 (dim 1)

In [12]:
np.dot(A, B4)

array([[[[ 5],
         [14]],

        [[14],
         [50]]]])

In [13]:
np.dot(A, B5)

ValueError: shapes (1,2,3) and (3,1,2) not aligned: 3 (dim 2) != 1 (dim 1)

In [14]:
np.dot(A, B6)

ValueError: shapes (1,2,3) and (3,2,1) not aligned: 3 (dim 2) != 2 (dim 1)

In [16]:
import numpy as np

u = np.array([3,-6])
v = np.array([-7,9])

uv = np.dot(u,v)
print(uv)

-75


#### 벡터의 외적(cross() 함수 적용)

In [17]:
a = (1,3,5)
b = (2,4,6)

def cross(a,b):
    c = [a[1]*b[2] - a[2]*b[1],
         a[2]*b[0] - a[0]*b[2],
         a[0]*b[1] - a[1]*b[0]]
    return c
print(cross(a,b))

[-2, 4, -2]


In [18]:
# numpy 이용 벡터 외적 구하기
import numpy as np

print(np.cross(a,b))

[-2  4 -2]


#### 직교벡터

In [19]:
import numpy as np

# 크기가 (1,2)인 2차원 배열
a = np.array([1,2])
print(a)

[1 2]


In [21]:
np.linalg.norm(a)    # a의 길이를 단위 벡터로 출력

2.23606797749979

In [22]:
# L1 norm

np.linalg.norm(a, ord=1)

3.0

In [24]:
# L2 norm  ==> default

np.linalg.norm(a, ord=2)

2.23606797749979

#### 유클리디언 거리 구하기

In [26]:
from scipy.spatial import distance

p1 = (1,2,3)
p2 = (4,5,6)

# p1, p2 사이 유클리디언 거리

distance.euclidean(p1,p2)

5.196152422706632

#### 맨하튼 거리 구하기

In [27]:
from math import *

p1 = (1,2,3)
p2 = (4,5,6)

# p1, p2 사이 맨해튼 거리

def manhattan_dist(x,y):
    return sum(abs(a-b) for a, b in zip(x,y))

In [28]:
manhattan_dist(p1,p2)

9

#### 코사인 유사도 - 문서 유사도

In [29]:
from numpy import dot
from numpy.linalg import norm
import numpy as np

def cos_sim(A, B):
    return dot(A,B)/(norm(A)*norm(B))

In [30]:
doc1 = np.array([1,1,1,1,0])
doc2 = np.array([1,0,1,0,1])
doc3 = np.array([2,1,1,1,1])

In [31]:
print(cos_sim(doc1, doc2))
print(cos_sim(doc1, doc3))   # 문서1과 문서3의 유사도가 가장 높음
print(cos_sim(doc2, doc3))

0.5773502691896258
0.8838834764831843
0.8164965809277259
