In [4]:
####Dynamic Linear Regression model####
import numpy as np
import pandas as pd
import matplotlib.pyplot  as plt
import numpy.matlib
import scipy.linalg
import itertools
import calendar
import statsmodels
import statsmodels.api as sm
from datetime import datetime
from datetime import timedelta
from scipy import sparse
from scipy.stats import norm
from numpy.random import *
from scipy import optimize

np.random.seed(98537)

In [5]:
##多項分布の乱数を生成する関数
def rmnom(pr, n, k, pattern):
    if pattern==1:
        z_id = np.array(np.argmax(np.cumsum(pr, axis=1) >= np.random.uniform(0, 1, n)[:, np.newaxis], axis=1), dtype="int")
        Z = np.diag(np.repeat(1, k))[z_id, ]
        return z_id, Z
    z_id = np.array(np.argmax((np.cumsum(pr, axis=1) >= np.random.uniform(0, 1, n)[:, np.newaxis]), axis=1), dtype="int")
    return z_id
    
#連続した日付を取得する関数
def daterange(_start, _end):
    for n in range((_end - _start).days):
        yield _start + timedelta(n)

In [7]:
####シミュレーションデータを生成####
##日付データを作成
#対象期間の日付と曜日情報を出力
start = datetime.strptime('2014-01-01', '%Y-%m-%d')
end   = datetime.strptime('2019-09-30', '%Y-%m-%d')
get_date = np.array([])
day_of_week = np.array([], dtype="int")
for day in daterange(start, end):
    get_date = np.append(get_date, day)
    day_of_week = np.append(day_of_week, day.weekday())
    
#日付を定義
Timestamp = pd.Series(get_date)
date = pd.DataFrame({"date": Timestamp.astype("U")})
date_df = pd.merge(date, pd.DataFrame({"date": np.unique(date), "date_id": np.arange(date.shape[0])}), on="date", how="inner")
year = pd.DataFrame({"year": Timestamp.astype("U").str[:4]})
unique_year = np.unique(year)
year_df = pd.merge(year, pd.DataFrame({"year": unique_year, "year_id": np.arange(unique_year.shape[0])}), on="year", how="inner")
month = pd.DataFrame({"month": Timestamp.astype("U").str[:7]})
unique_month = np.unique(month)
month_df = pd.merge(month, pd.DataFrame({"month": unique_month, "month_id": np.arange(unique_month.shape[0])}), on="month", how="inner")

#週を定義
freq = pd.Series(day_of_week).value_counts()
argmax_week = freq.index[np.argmax(np.array(freq))]
index = np.array(np.where(day_of_week==0)[0], dtype="int")
if np.min(index)!=0:
    index = np.append(0, index)
if np.max(index)!=day_of_week.shape[0]-1:
    index = np.append(index, day_of_week.shape[0])
weeks = index.shape[0] - 1
week = np.repeat(0, day_of_week.shape[0])
for i in range(weeks):
    if i==0:
        get = np.arange(index[i], index[i+1])
        week[get] = np.repeat(i, get.shape[0])
    else:
        get = np.arange(index[i], index[i+1])
        week[get] = np.repeat(i, get.shape[0])
        
#データフレームを作成
calendar_df = pd.concat((date_df[["date"]], year_df, month_df, pd.DataFrame({"week_id": week, "day_of_week": day_of_week})), axis=1)
calendar_df["month_index"] = calendar_df["month"].str[5:7].astype("int")

In [11]:
##データの生成
#データの定義
days = calendar_df["date"].shape[0]
weeks = np.unique(calendar_df["day_of_week"]).shape[0]
months = np.unique(calendar_df["month_index"]).shape[0]

In [55]:
#周期成分を生成
week = np.array(np.eye(weeks), dtype="int")[np.array(calendar_df["day_of_week"]), 1:]
month = np.array(np.eye(months), dtype="int")[np.array(calendar_df["month_index"])-1, 1:]
k = week.shape[1] + month.shape[1] + 1

#週成分の制約インデックスを定義
week_allocation = [i for i in range(days)]
for i in range(days):
    if i <= weeks-1:
        week_allocation[i] = np.arange(0, i)
    else:
        week_allocation[i] = week_vec[i-weeks+1:i]

In [99]:
##静的および動的パラメータを生成
#状態パラメータ
s = 5
S1 = np.array([0.05, 0.15, 0.6, 0.15, 0.05])
S2 = np.array([[0.82, 0.11, 0.06, 0.01, 0.01],
               [0.11, 0.79, 0.07, 0.02, 0.01],
               [0.01, 0.02, 0.95, 0.02, 0.01],
               [0.01, 0.02, 0.07, 0.79, 0.11],
               [0.01, 0.01, 0.06, 0.11, 0.82]])

#状態推移を生成
z = np.repeat(0, days)
for i in range(days):
    if i==0:
        z[i] = np.argmax(np.random.multinomial(1, S1, 1))
    else:
        z[i] = np.argmax(np.random.multinomial(1, S2[z[i-1], ], 1))
        
#初期値の設定
tau = np.array([0.01, 0.005, 0.01])
trend = np.append(np.random.normal(0, tau[0], 1), np.repeat(0.0, days-1))
ar = np.append(np.random.normal(0, 0.05, 1), np.repeat(0.0, days-1))
seasonal = np.append(np.random.normal(0, 0.25, weeks), np.repeat(0.0, days-weeks))

#時間ごとにパラメータを生成
for i in range(1, days):
    diff_trend = np.sort(np.random.normal(0, tau[0], s))
    trend[i] = trend[i-1] + diff_trend[z[i-1]]
    ar[i] = ar[i-1] + np.random.normal(0, tau[1], 1)
    if i >= weeks-1:
        seasonal[i] = np.sum(-seasonal[week_allocation[i]])# + np.random.normal(0, tau[2], 1)

In [103]:
pd.concat((calendar_df, pd.DataFrame(seasonal)), axis=1)

Unnamed: 0,date,year,year_id,month,month_id,week_id,day_of_week,month_index,0
0,2014-01-01,2014,0,2014-01,0,0,2,1,-0.076503
1,2014-01-02,2014,0,2014-01,0,0,3,1,0.137668
2,2014-01-03,2014,0,2014-01,0,0,4,1,-0.082405
3,2014-01-04,2014,0,2014-01,0,0,5,1,0.242773
4,2014-01-05,2014,0,2014-01,0,0,6,1,-0.251599
5,2014-01-06,2014,0,2014-01,0,1,0,1,0.226154
6,2014-01-07,2014,0,2014-01,0,1,1,1,-0.196088
7,2014-01-08,2014,0,2014-01,0,1,2,1,-0.082405
8,2014-01-09,2014,0,2014-01,0,1,3,1,0.242773
9,2014-01-10,2014,0,2014-01,0,1,4,1,-0.251599


In [98]:
seasonal[6]

0.8050925852996942

In [95]:
weeks-1

6

In [49]:
week_vec[i]

5