# Naive Bayesian classification

In [2]:
import numpy as np

x_train = np.array([[10, 50], [20, 30], [25, 30], [20, 60], [15, 70], [40, 40], [30, 45], [20, 45], [40, 30], [7, 35]])
y_train = np.array([-1, 1, 1, -1, -1, 1, 1, -1, 1, -1])

# calculate math expectation (_1 == -1)
mw1, ml1 = np.mean(x_train[y_train == 1], axis=0)
mw_1, ml_1 = np.mean(x_train[y_train == -1], axis=0)

# variances by selections
sw1, sl1 = np.var(x_train[y_train == 1], axis=0, ddof=1)
sw_1, sl_1 = np.var(x_train[y_train == -1], axis=0, ddof=1)

print('MathExp (length, width) for class "1": ', ml1, mw1)
print('MathExp (length, width) for class "-1": ', ml_1, mw_1)

print('Variance by selection (length, width) for class "1": ', sw1, sl1)
print('Variance by selection (length, width) for class "-1": ', sw_1, sl_1)

MathExp (length, width) for class "1":  35.0 31.0
MathExp (length, width) for class "-1":  52.0 14.4
Variance by selection (length, width) for class "1":  80.0 50.0
Variance by selection (length, width) for class "-1":  34.3 182.5


## Test

In [None]:
x = [10, 40] # width, length

# np.log = ln
a_1 = lambda x: -np.log(sw_1 * sl_1) - (x[1] - ml_1) ** 2 / (2 * sl_1) - (x[0] - mw_1) ** 2 / (2 * sw_1)
a1 = lambda x: -np.log(sw1 * sl1) - (x[1] - ml1) ** 2 / (2 * sl1) - (x[0] - mw1) ** 2 / (2 * sw1)

y = np.argmax([a_1(x), a1(x)]) # take 0 or 1 - indexes in y_train (by first input in y_train-array (=[-1, 1]))
print(f'x with width == {x[0]} and length == {x[1]} we classify as {y_train[y]} (1: ladybug, -1: caterpillar)')
print(y)


x with width == 10 and length == 40 we classify as -1 (1: ladybug, -1: caterpillar)
0


## Test by train selection

In [11]:
pr = []
for x in x_train:
    pr.append(y_train[np.argmax([a_1(x), a1(x)])])
pr = np.array(pr)
Q = np.mean(pr != y_train) # share of errors
print(Q)

0.0


# Task 1

In [None]:
import numpy as np

data_x = [(7.2, 2.5), (6.4, 2.2), (6.3, 1.5), (7.7, 2.2), (6.2, 1.8), (5.7, 1.3), (7.1, 2.1), (5.8, 2.4), (5.2, 1.4), (5.9, 1.5), (7.0, 1.4), (6.8, 2.1), (7.2, 1.6), (6.7, 2.4), (6.0, 1.5), (5.1, 1.1), (6.6, 1.3), (6.1, 1.4), (6.7, 2.1), (6.4, 1.8), (5.6, 1.3), (6.9, 2.3), (6.4, 1.9), (6.9, 2.3), (6.5, 2.2), (6.0, 1.5), (5.6, 1.1), (5.6, 1.5), (6.0, 1.0), (6.0, 1.8), (6.7, 2.5), (7.7, 2.3), (5.5, 1.1), (5.8, 1.0), (6.9, 2.1), (6.6, 1.4), (6.3, 1.6), (6.1, 1.4), (5.0, 1.0), (7.7, 2.0), (4.9, 1.7), (7.2, 1.8), (6.8, 1.4), (6.1, 1.2), (5.8, 1.9), (6.3, 2.5), (5.7, 2.0), (6.5, 1.8), (7.6, 2.1), (6.3, 1.5), (6.7, 1.4), (6.4, 2.3), (6.2, 2.3), (6.3, 1.9), (5.5, 1.3), (7.9, 2.0), (6.7, 1.8), (6.4, 1.3), (6.5, 2.0), (6.5, 1.5), (6.9, 1.5), (5.6, 1.3), (5.8, 1.2), (6.7, 2.3), (6.0, 1.6), (5.7, 1.2), (5.7, 1.0), (5.5, 1.0), (6.1, 1.4), (6.3, 1.8), (5.7, 1.3), (6.1, 1.3), (5.5, 1.3), (6.3, 1.3), (5.9, 1.8), (7.7, 2.3), (6.5, 2.0), (5.6, 2.0), (6.7, 1.7), (5.7, 1.3), (5.5, 1.2), (5.0, 1.0), (5.8, 1.9), (6.2, 1.3), (6.2, 1.5), (6.3, 2.4), (6.4, 1.5), (7.4, 1.9), (6.8, 2.3), (5.6, 1.3), (5.8, 1.2), (7.3, 1.8), (6.7, 1.5), (6.3, 1.8), (6.0, 1.6), (6.4, 2.1), (6.1, 1.8), (5.9, 1.8), (5.4, 1.5), (4.9, 1.0)]
data_y = [1, 1, 1, 1, 1, -1, 1, 1, -1, -1, -1, 1, 1, 1, -1, -1, -1, 1, 1, 1, -1, 1, 1, 1, 1, 1, -1, -1, -1, 1, 1, 1, -1, -1, 1, -1, -1, -1, -1, 1, 1, 1, -1, -1, 1, 1, 1, 1, 1, -1, -1, 1, 1, 1, -1, 1, 1, -1, 1, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1, 1, -1, -1, -1, -1, 1, 1, 1, 1, -1, -1, -1, -1, 1, -1, -1, 1, -1, 1, 1, -1, -1, 1, -1, 1, -1, 1, 1, -1, -1, -1]

x_train = np.array(data_x)
y_train = np.array(data_y)

# математические ожидания
mx11, mx12 = np.mean(x_train[y_train == -1], axis=0)
mx21, mx22 = np.mean(x_train[y_train == 1], axis=0)

# дисперсии
Dx11, Dx12 = np.var(x_train[y_train == -1], axis=0)
Dx21, Dx22 = np.var(x_train[y_train == 1], axis=0)

lm1 = 1     # штраф неверной классификации 1-го класса (-1)
lm2 = 1     # штраф неверной классификации 2-го класса (+1)
P1 = 0.5    # априорная вероятность появления образов 1-го класса
P2 = 1 - P1 # априорная вероятность появления образов 2-го класса

a_1 = lambda x: np.log(lm1 * P1) - np.log(2 * np.pi * Dx11 * Dx12) - (x[0] - mx11) ** 2 / (2 * Dx11) - (x[1] - mx12) ** 2 / (2 * Dx12)
a1 = lambda x: np.log(lm2 * P2) - np.log(2 * np.pi * Dx21 * Dx22) - (x[0] - mx21) ** 2 / (2 * Dx21) - (x[1] - mx22) ** 2 / (2 * Dx22)

predict = []
classes = [-1, 1] # in np.argmax([a_1(x), a1(x)]) class -1 first (a_1(x)), second is +1 (a1(x)), in 'classes' i did same [-1, 1]
for x in x_train:  
    ind = np.argmax([a_1(x), a1(x)])
    predict.append(classes[ind]) # -1 or 1 (we get 0 or 1)

predict = np.array(predict)
Q = np.mean(predict != y_train)
print(Q)

0.06


## Task 2

Bayes' theorem can be used to solve not only classification problems, but also regression problems. Let there be a function of the form:

but in fact use the method of the smallest squares

# <img src=".././photo/condition19.png" alt="photo" width="672" height="600">

In [None]:
import numpy as np

def func(x):
    return 0.5 * x + 0.2 * x ** 2 - 0.05 * x ** 3 + 0.2 * np.sin(4 * x) - 2.5

def model(w, x):
    return w[0] + w[1] * x + w[2] * x ** 2 + w[3] * x ** 3


coord_x = np.arange(-4.0, 6.0, 0.1)

x_train = np.array([[_x**i for i in range(4)] for _x in coord_x]) # обучающая выборка
y_train = func(coord_x) # целевые выходные значения

'''
w.T has dim (1 * 4)
sum(y_i * x_i.T) = sum((w.T * x_i) * x_i.T)
sum(y_i * x_i.T) has dimension (1 * 4) -> 
(X == x_train)
X = column with x_i.T-rows -> X has dim (n * 4) ->
-> y has dim (n * 1) -> y.T has dim (4 * 1) -> 
y.T @ X has dim (1 * n) @ (n * 4) = (1 * 4)

same with sum((w.T * x_i) * x_i.T), w.T doesn't depend on i-indexes ->
w.T @ sum(x_i * x_i.T)
x_i has dimension(4 * 1), x_i.T = (1 * 4)
x_i @ x_i.T = (4 * 4)
X.T @ X = (4 * n) @ (n * 4)
x_i * x_i.T = X.T @ X ->
-> y.T @ X = w.T @ (X.T @ X) ->
w.T = (y.T @ X) @ (X.T @ X)^(-1) = (1 * 4) @ ((4 * n) @ (n * 4))^(-1) = (1 * 4)
'''

w = y_train.T @ x_train @ np.linalg.inv(x_train.T @ x_train)

Q = np.mean([(func(x) - model(w, x)) ** 2 for x in coord_x])

print(w)
print(Q)

[-2.49213311  0.50342222  0.19781693 -0.04986103]
0.019662942949529676


# <img src=".././photo/s3.png" alt="photo" width="672" height="200">

взяли от этого логарифма производную и приравняли к 0, то есть, эквивалентно, нашли градиент по вектору параметров и приравняли к 0 чтобы 