In [45]:
import pandas as pd
import numpy as np

In [46]:
df_wide = pd.read_csv("../data/time_series/time_series_wide.csv", index_col=0)
df_wide.index = pd.to_datetime(df_wide.index)

In [47]:
df_wide.iloc[:5]

Unnamed: 0,A,B,C
2016-07-01,532,3314,1136
2016-07-02,798,2461,1188
2016-07-03,823,3522,1711
2016-07-04,937,5451,1977
2016-07-05,881,4729,1975


In [48]:
df_long = df_wide.stack().reset_index(1)
df_long.columns = ["id", "value"]
df_long

Unnamed: 0,id,value
2016-07-01,A,532
2016-07-01,B,3314
2016-07-01,C,1136
2016-07-02,A,798
2016-07-02,B,2461
...,...,...
2016-12-30,B,4243
2016-12-30,C,2069
2016-12-31,A,869
2016-12-31,B,4703


In [49]:
# ラグ変数
df_wide = df_long.pivot(index=None, columns='id', values='value')
df_wide

id,A,B,C
2016-07-01,532,3314,1136
2016-07-02,798,2461,1188
2016-07-03,823,3522,1711
2016-07-04,937,5451,1977
2016-07-05,881,4729,1975
...,...,...,...
2016-12-27,840,4573,1850
2016-12-28,943,4511,1764
2016-12-29,978,4599,1787
2016-12-30,907,4243,2069


In [50]:
x = df_wide

In [51]:
x_lag1 = x.shift(1)
x_lag1

id,A,B,C
2016-07-01,,,
2016-07-02,532.0,3314.0,1136.0
2016-07-03,798.0,2461.0,1188.0
2016-07-04,823.0,3522.0,1711.0
2016-07-05,937.0,5451.0,1977.0
...,...,...,...
2016-12-27,840.0,4576.0,1954.0
2016-12-28,840.0,4573.0,1850.0
2016-12-29,943.0,4511.0,1764.0
2016-12-30,978.0,4599.0,1787.0


In [52]:
x_lag7 = x.shift(7)
x_lag7

id,A,B,C
2016-07-01,,,
2016-07-02,,,
2016-07-03,,,
2016-07-04,,,
2016-07-05,,,
...,...,...,...
2016-12-27,890.0,3935.0,2085.0
2016-12-28,754.0,4846.0,2226.0
2016-12-29,992.0,4949.0,2181.0
2016-12-30,854.0,4619.0,2035.0


In [53]:
# 1期前から3期間の移動平均を算出
x_avg3 = x.shift(1).rolling(window=3).mean()
x_avg3

id,A,B,C
2016-07-01,,,
2016-07-02,,,
2016-07-03,,,
2016-07-04,717.666667,3099.000000,1345.000000
2016-07-05,852.666667,3811.333333,1625.333333
...,...,...,...
2016-12-27,817.333333,4956.000000,1976.666667
2016-12-28,797.333333,4726.000000,1878.666667
2016-12-29,874.333333,4553.333333,1856.000000
2016-12-30,920.333333,4561.000000,1800.333333


In [54]:
x_max = x.shift(1).rolling(window=7).max()
x_max

id,A,B,C
2016-07-01,,,
2016-07-02,,,
2016-07-03,,,
2016-07-04,,,
2016-07-05,,,
...,...,...,...
2016-12-27,992.0,5263.0,2226.0
2016-12-28,992.0,5263.0,2226.0
2016-12-29,992.0,5263.0,2181.0
2016-12-30,978.0,5263.0,2144.0


In [55]:
# ラグ変数
train_x = pd.read_csv("../data/time_series/time_series_train.csv")
event_history = pd.read_csv("../data/time_series/time_series_events.csv")

In [56]:
train_x["date"] = pd.to_datetime(train_x["date"])
event_history["date"] = pd.to_datetime(event_history["date"])


In [57]:
train_x

Unnamed: 0,user_id,date,target
0,1,2018-01-01,1
1,1,2018-01-02,1
2,1,2018-01-03,1
3,1,2018-01-04,1
4,1,2018-01-05,0
...,...,...,...
36495,100,2018-12-27,0
36496,100,2018-12-28,1
36497,100,2018-12-29,1
36498,100,2018-12-30,1


In [58]:
event_history

Unnamed: 0,date,event
0,2018-01-03,sale
1,2018-01-03,conpon
2,2018-01-04,points
3,2018-01-05,points
4,2018-05-03,sale
5,2018-05-04,sale
6,2018-05-05,sale
7,2018-05-06,points
8,2018-05-07,points
9,2018-05-08,points


In [63]:
dates = np.sort(train_x["date"].unique())
occurrences = pd.DataFrame(dates, columns=["date"])
print(occurrences)
sale_history = event_history[event_history["event"] == "sale"]
occurrences["sale"] = occurrences["date"].isin(sale_history["date"])
occurrences

          date
0   2018-01-01
1   2018-01-02
2   2018-01-03
3   2018-01-04
4   2018-01-05
..         ...
360 2018-12-27
361 2018-12-28
362 2018-12-29
363 2018-12-30
364 2018-12-31

[365 rows x 1 columns]


Unnamed: 0,date,sale
0,2018-01-01,False
1,2018-01-02,False
2,2018-01-03,True
3,2018-01-04,False
4,2018-01-05,False
...,...,...
360,2018-12-27,False
361,2018-12-28,False
362,2018-12-29,False
363,2018-12-30,True


In [60]:
occurrences["sale"] = occurrences["sale"].cumsum()
occurrences

Unnamed: 0,date,sale
0,2018-01-01,0
1,2018-01-02,0
2,2018-01-03,1
3,2018-01-04,1
4,2018-01-05,1
...,...,...
360,2018-12-27,4
361,2018-12-28,4
362,2018-12-29,4
363,2018-12-30,5


In [61]:
train_x = train_x.merge(occurrences, on="date", how="left")
train_x

Unnamed: 0,user_id,date,target,sale
0,1,2018-01-01,1,0
1,1,2018-01-02,1,0
2,1,2018-01-03,1,1
3,1,2018-01-04,1,1
4,1,2018-01-05,0,1
...,...,...,...,...
36495,100,2018-12-27,0,4
36496,100,2018-12-28,1,4
36497,100,2018-12-29,1,4
36498,100,2018-12-30,1,5
