In [2]:
import pandas as pd
import numpy as np
import datetime as dt
import time
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

from haversine import haversine

sns.set()
sns.set_style("whitegrid")
sns.set_color_codes()

%matplotlib inline
%config InlineBackend.figure_formats = {'png', 'retina'}

from matplotlib import font_manager, rc
plt.rcParams['axes.unicode_minus'] = False

import platform
if platform.system() == 'Darwin':
    rc('font', family='AppleGothic')
elif platform.system() == 'Windows':
    path = "c:/Windows/Fonts/malgun.ttf"
    font_name = font_manager.FontProperties(fname=path).get_name()
    rc('font', family=font_name)

import scipy as sp
import statsmodels.api as sm # statsmodel 기본 import
import statsmodels.formula.api as smf
import statsmodels.stats.api as sms 
import sklearn as sk
from patsy import dmatrix

import warnings
warnings.filterwarnings("ignore")

##### 베르누이 분포의 모수 추정

In [4]:
np.random.seed(0)
theta0 = 0.6
x = sp.stats.bernoulli(theta0).rvs(1000)
N0, N1 = np.bincount(x, minlength=2)
#N0, N1이 발생하는 개수, minlength : 사건의 종류 개수

N = N0 + N1
theta = N1/N
theta

0.60999999999999999

##### 카테고리 분포의 모수 추정

In [8]:
np.random.seed(0)
theta0 = np.array([0.1, 0.3, 0.6])
x = np.random.choice(np.arange(3), 1000, p=theta0)
N0, N1, N2 = np.bincount(x, minlength=3)
N = N0 + N1 + N2
theta = np.array([N0, N1, N2]) / N
theta

array([ 0.098,  0.317,  0.585])

##### 정규 분포의 모수 추정

In [10]:
np.random.seed(0)
mu0 = 1
sigma0 = 2
x = sp.stats.norm(mu0, sigma0).rvs(1000)
xbar  = x.mean()
s2 = x.std(ddof=1)
xbar, s2

(0.90948658501960922, 1.9750540913890255)

##### 다변수 정규 분포의 모수 추정

In [11]:
np.random.seed(0)
mu0 = np.array([0, 1])
sigma0 = np.array([[1, 0.2], [0.2, 4]])
x = sp.stats.multivariate_normal(mu0, sigma0).rvs(1000)
xbar = x.mean(axis=0)
S2 = np.cov(x, rowvar=0) #<-?
print(xbar)
print(S2)

[-0.0126996   0.95720206]
[[ 0.96100921  0.16283508]
 [ 0.16283508  3.80507694]]
