In [9]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Normal

from tqdm.notebook import tqdm
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from types import SimpleNamespace
from sklearn.preprocessing import MinMaxScaler
import os

import tyxe

In [19]:
train = pd.read_csv('../data/train/train.csv')
train

Unnamed: 0,시점,품목명,품종명,거래단위,등급,평년 평균가격(원),평균가격(원)
0,201801상순,건고추,화건,30 kg,상품,381666.666667,590000.0
1,201801중순,건고추,화건,30 kg,상품,380809.666667,590000.0
2,201801하순,건고추,화건,30 kg,상품,380000.000000,590000.0
3,201802상순,건고추,화건,30 kg,상품,380000.000000,590000.0
4,201802중순,건고추,화건,30 kg,상품,376666.666667,590000.0
...,...,...,...,...,...,...,...
29371,202111중순,대파,대파(일반),10키로묶음,상,0.000000,0.0
29372,202111하순,대파,대파(일반),10키로묶음,상,0.000000,0.0
29373,202112상순,대파,대파(일반),10키로묶음,상,0.000000,0.0
29374,202112중순,대파,대파(일반),10키로묶음,상,0.000000,0.0


In [97]:
import pandas as pd
import numpy as np
import pymc as pm
import matplotlib.pyplot as plt

# 데이터 로드
data = pd.read_csv('../data/train/train.csv')

# 데이터 전처리
data = data.set_index('시점')
data

Unnamed: 0_level_0,품목명,품종명,거래단위,등급,평년 평균가격(원),평균가격(원)
시점,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
201801상순,건고추,화건,30 kg,상품,381666.666667,590000.0
201801중순,건고추,화건,30 kg,상품,380809.666667,590000.0
201801하순,건고추,화건,30 kg,상품,380000.000000,590000.0
201802상순,건고추,화건,30 kg,상품,380000.000000,590000.0
201802중순,건고추,화건,30 kg,상품,376666.666667,590000.0
...,...,...,...,...,...,...
202111중순,대파,대파(일반),10키로묶음,상,0.000000,0.0
202111하순,대파,대파(일반),10키로묶음,상,0.000000,0.0
202112상순,대파,대파(일반),10키로묶음,상,0.000000,0.0
202112중순,대파,대파(일반),10키로묶음,상,0.000000,0.0


In [99]:
# 특정 품목과 품종 선택 (예: 배추)
item = '건고추'
variety = '화건'
data = data[(data['품목명'] == item) & (data['품종명'] == variety)]

# 필요한 컬럼만 선택
data = data[['평균가격(원)']]

# 데이터 정규화
data['평균가격(원)'] = (data['평균가격(원)'] - data['평균가격(원)'].mean()) / data['평균가격(원)'].std()
data

Unnamed: 0_level_0,평균가격(원)
시점,Unnamed: 1_level_1
201801상순,0.164003
201801중순,0.164003
201801하순,0.164003
201802상순,0.164003
201802중순,0.164003
...,...
202111중순,-0.540888
202111하순,-0.498579
202112상순,-0.466845
202112중순,-0.466845


In [101]:
# 모델 정의
with pm.Model() as model:
    # Priors for unknown model parameters
    sigma = pm.HalfNormal('sigma', sigma=1)
    intercept = pm.Normal('Intercept', mu=0, sigma=1)
    x_coeff = pm.Normal('x', mu=0, sigma=1)
    
    # Expected value of outcome
    mu = intercept + x_coeff * np.arange(len(data))
    
    # Likelihood (sampling distribution) of observations
    Y_obs = pm.Normal('Y_obs', mu=mu, sigma=sigma, observed=data['평균가격(원)'])
    
    # Posterior distribution
    trace = pm.sample(1000, return_inferencedata=False)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [sigma, Intercept, x]


Output()

Sampling 4 chains for 1_000 tune and 1_000 draw iterations (4_000 + 4_000 draws total) took 505 seconds.


FutureWarning: The function `traceplot` from PyMC was an alias for `plot_trace` from ArviZ. It was removed in PyMC 4.0. Switch to `pymc.plot_trace` or `arviz.plot_trace`.

In [110]:
import arviz as az
idata = az.from_pymc3(trace)
az.plot_trace(idata)
plt.show()

AttributeError: module 'arviz' has no attribute 'from_pymc3'