In [6]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt

In [7]:
df = pd.read_csv('pcr_positive_daily (2).csv')

In [8]:
df

Unnamed: 0,日付,PCR 検査陽性者数(単日),Unnamed: 2,Unnamed: 3
0,2020/1/16,1,,
1,2020/1/17,0,,
2,2020/1/18,0,,
3,2020/1/19,0,,
4,2020/1/20,0,,
...,...,...,...,...
464,2021/4/24,5433,,
465,2021/4/25,4434,,
466,2021/4/26,3303,,
467,2021/4/27,4958,,


In [9]:
del df['Unnamed: 2']
del df['Unnamed: 3']

In [10]:
df.head()

Unnamed: 0,日付,PCR 検査陽性者数(単日)
0,2020/1/16,1
1,2020/1/17,0
2,2020/1/18,0
3,2020/1/19,0
4,2020/1/20,0


In [11]:
df.shape

(469, 2)

In [12]:
df

Unnamed: 0,日付,PCR 検査陽性者数(単日)
0,2020/1/16,1
1,2020/1/17,0
2,2020/1/18,0
3,2020/1/19,0
4,2020/1/20,0
...,...,...
464,2021/4/24,5433
465,2021/4/25,4434
466,2021/4/26,3303
467,2021/4/27,4958


トレンドや季節性がある場合など、時系列の多くは、平均が一定しません。そのように定常でない時系列を、ARMAモデルを始めとする定常性を仮定した時系列モデルで記述することはできません。差分を取ることでトレンドや季節性を除外し、定常な時系列にすることで、定常性が仮定されるモデルを使った時系列の記述が可能になります。

In [13]:
df["t-1"] = df["PCR 検査陽性者数(単日)"].shift(periods=1)
df["t-2"] = df["PCR 検査陽性者数(単日)"].shift(periods=2)
df["t-3"] = df["PCR 検査陽性者数(単日)"].shift(periods=3)

In [14]:
df.head(10)

Unnamed: 0,日付,PCR 検査陽性者数(単日),t-1,t-2,t-3
0,2020/1/16,1,,,
1,2020/1/17,0,1.0,,
2,2020/1/18,0,0.0,1.0,
3,2020/1/19,0,0.0,0.0,1.0
4,2020/1/20,0,0.0,0.0,0.0
5,2020/1/21,0,0.0,0.0,0.0
6,2020/1/22,0,0.0,0.0,0.0
7,2020/1/23,0,0.0,0.0,0.0
8,2020/1/24,1,0.0,0.0,0.0
9,2020/1/25,1,1.0,0.0,0.0


In [15]:
df.tail(10)

Unnamed: 0,日付,PCR 検査陽性者数(単日),t-1,t-2,t-3
459,2021/4/19,2893,4032.0,4722.0,4519.0
460,2021/4/20,4328,2893.0,4032.0,4722.0
461,2021/4/21,5280,4328.0,2893.0,4032.0
462,2021/4/22,5452,5280.0,4328.0,2893.0
463,2021/4/23,5094,5452.0,5280.0,4328.0
464,2021/4/24,5433,5094.0,5452.0,5280.0
465,2021/4/25,4434,5433.0,5094.0,5452.0
466,2021/4/26,3303,4434.0,5433.0,5094.0
467,2021/4/27,4958,3303.0,4434.0,5433.0
468,2021/4/28,5788,4958.0,3303.0,4434.0


In [16]:
df = df.dropna()

In [17]:
df.head(10)

Unnamed: 0,日付,PCR 検査陽性者数(単日),t-1,t-2,t-3
3,2020/1/19,0,0.0,0.0,1.0
4,2020/1/20,0,0.0,0.0,0.0
5,2020/1/21,0,0.0,0.0,0.0
6,2020/1/22,0,0.0,0.0,0.0
7,2020/1/23,0,0.0,0.0,0.0
8,2020/1/24,1,0.0,0.0,0.0
9,2020/1/25,1,1.0,0.0,0.0
10,2020/1/26,1,1.0,1.0,0.0
11,2020/1/27,0,1.0,1.0,1.0
12,2020/1/28,3,0.0,1.0,1.0


In [18]:
df.columns = ["date", "y", "t-1", "t-2", "t-3"]

In [19]:
w = 0.2
b = 0.1

def model(x):
    pred = w * x + b
    return pred

iterrows()メソッドを使うと、1行ずつ、インデックス名（行名）とその行のデータ（pandas.Series型）のタプル(index, Series)を取得できる。

In [20]:
cnt = 0
for row in df.iterrows():
    x = row[1][["t-1", "t-2", "t-3"]].values
    y = row[1]["y"]
    pred = model(x=np.mean(x))
    print(f"x: {x}, y: {y}, pred: {pred}")

    if cnt == 100:
        break

    cnt += 1

x: [0.0 0.0 1.0], y: 0, pred: 0.16666666666666669
x: [0.0 0.0 0.0], y: 0, pred: 0.1
x: [0.0 0.0 0.0], y: 0, pred: 0.1
x: [0.0 0.0 0.0], y: 0, pred: 0.1
x: [0.0 0.0 0.0], y: 0, pred: 0.1
x: [0.0 0.0 0.0], y: 1, pred: 0.1
x: [1.0 0.0 0.0], y: 1, pred: 0.16666666666666669
x: [1.0 1.0 0.0], y: 1, pred: 0.23333333333333334
x: [1.0 1.0 1.0], y: 0, pred: 0.30000000000000004
x: [0.0 1.0 1.0], y: 3, pred: 0.23333333333333334
x: [3.0 0.0 1.0], y: 1, pred: 0.3666666666666667
x: [1.0 3.0 0.0], y: 3, pred: 0.3666666666666667
x: [3.0 1.0 3.0], y: 1, pred: 0.5666666666666668
x: [1.0 3.0 1.0], y: 0, pred: 0.43333333333333335
x: [0.0 1.0 3.0], y: 0, pred: 0.3666666666666667
x: [0.0 0.0 1.0], y: 0, pred: 0.16666666666666669
x: [0.0 0.0 0.0], y: 2, pred: 0.1
x: [2.0 0.0 0.0], y: 2, pred: 0.23333333333333334
x: [2.0 2.0 0.0], y: 0, pred: 0.3666666666666667
x: [0.0 2.0 2.0], y: 0, pred: 0.3666666666666667
x: [0.0 0.0 2.0], y: 0, pred: 0.23333333333333334
x: [0.0 0.0 0.0], y: 0, pred: 0.1
x: [0.0 0.0 0.0], 

In [21]:
print(df['y'])

3         0
4         0
5         0
6         0
7         0
       ... 
464    5433
465    4434
466    3303
467    4958
468    5788
Name: y, Length: 466, dtype: int64


In [None]:
ss