### Биномиальный закон

X ~ B(n,p)\
X ~ Poisson(X=k) | (n -> inf, p -> 0, np -> lambda = const)\
dbinom(k,n,p) = P(X=k)\
pbinom(k,n,p) = P(X<=k)= F(k)\
qbinom(alpha,n,p) = k( P(X<=k)==alpha )

In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn.datasets import load_iris as load

iris = load()

In [3]:
iris

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [4]:
X, y = pd.DataFrame(iris.data), pd.Series(iris.target)

In [5]:
'''Setosa Versicolour Virginica'''
y.unique()

array([0, 1, 2])

In [6]:
X.shape

(150, 4)

$$\hat{y}=\arg\max _{k\in \{1,...,K\}} p(C_k)\prod_{i=1}^{n}p(x_i|C_k)$$

In [77]:
class BayesClassifier:
    def __init__(self):
        classes = None
        X = None
        y = None
        
    def prob(self, X, x):
        return (np.sum(X>=x-X.std()) + np.sum(X<=x+X.std()))/X.shape[0]
    
    def fit(self, X, y):
        '''
        if type != float:
            freq()
        else:
            prob()
        '''
        self.classes = dict.fromkeys(y.unique())
        self.X = X
        self.y = y
        for c in self.classes.keys():
            self.classes[c] = y[y == c].shape[0]/y.shape[0]
            
    def predict(self, x):
        pr = 1
        max = 0
        C = ''
        for c in self.classes.keys():
            for feature in range(self.X.shape[1]):
                pr *= self.prob(self.X.loc[self.y[self.y == c].index, feature],x[feature])
            pr *= self.classes[c]
            if pr > max:
                max = pr
                C = c
            pr = 1
        return [pr,max,C]

In [78]:
class BayesClassifier2:
    def __init__(self):
        classes = None
        features = None
        X = None
        y = None
        
    def prob(self, X, x):
        return (np.sum(X>x-X.std()) + np.sum(X<x+X.std()))/X.shape[0]
    
    def fit(self, X, y):
        self.X = X
        self.y = y
        self.classes = dict(y.value_counts(normalize=True))
        
    def predict(self, x):
        pr = 1
        max = 0
        C = ''
        for c in self.classes.keys():
            for feature in range(self.X.shape[1]):
                if np.isin(x[feature],self.X[feature]):
                    pr *= self.X[feature].value_counts(normalize=True).loc[x[feature]]
                else:
                    freq = self.prob(self.X.loc[self.y[self.y == c].index, feature],x[feature])
                    pr *= freq if freq != 0 else .001
            pr *= self.classes[c]
            if pr > max:
                max = pr
                C = c
            pr = 1
        return [pr,max,C]

In [75]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.02)
_clf = BayesClassifier2()
_clf.fit(X_train, y_train)
for sample in range(X_test.shape[0]):
    print(_clf.predict(X_test.iloc[sample]))
    print(y_test.iloc[sample])

[1, 5.681695218560329e-07, 0]
2
[1, 5.0989572474259357e-08, 0]
2
[1, 1.5704788322071878e-06, 0]
1


In [71]:
from sklearn.datasets import load_breast_cancer as load

data = load()
data_X = pd.DataFrame(data.data)
data_y = pd.Series(data.target)

In [82]:
X_train, X_test, y_train, y_test = train_test_split(data_X, data_y, test_size=0.2, random_state=255)
_clf = BayesClassifier2()
_clf.fit(X_train, y_train)
print(f'''Доля правильных ответов: 
      {100 * (1-sum(
        [abs(int(_clf.predict(X_test.iloc[sample])[-1]) - y_test.iloc[sample])
        for sample in range(X_test.shape[0])])/X_test.shape[0]):3f}%''')

Доля правильных ответов: 
      86.842105%


In [145]:
def minors(X: np.matrix):
    shape = X.shape
    if shape != shape[::-1]:
        raise Exception("matrix is not square")
    for size in (1, shape[0]+1):
        yield np.linalg.det(X[:size,:size])

In [164]:
X = np.matrix([[1,2,3],[4,-5,6],[1,0,-1]])

In [165]:
minor = minors(X)

In [166]:
print(next(minor))
print(next(minor))

1.0
40.0
