In [None]:
import pandas as pd
import numpy as np

In [None]:
sales_train = pd.read_csv("../input/competitive-data-science-predict-future-sales/sales_train.csv")
sales_train["date"] = pd.to_datetime(sales_train["date"])
items = pd.read_csv("../input/competitive-data-science-predict-future-sales/items.csv")
items = items.drop("item_name", axis=1)
sales_train

In [None]:
# ヒストグラムと散布図

# import seaborn as sns
# from matplotlib import pyplot as plt

# sns.pairplot(sales_train)
# plt.show()

In [None]:
# 外れ値（売上と価格）
sales_train = sales_train[sales_train["item_cnt_day"] <= 2000]
sales_train = sales_train[sales_train["item_price"] <= 100000]

In [None]:
# 売り上げデータにアイテムカテゴリーを追加
train_merge = pd.merge(sales_train, items, on="item_id")
train_merge

In [None]:
# item_idごとに平均を出す
item_price_mean = train_merge.groupby("item_id").mean()
item_price_mean = item_price_mean[["item_price"]]
item_price_mean["item_price_mean"] = item_price_mean["item_price"]
item_price_mean = item_price_mean.drop("item_price", axis=1)
item_price_mean

In [None]:
# item_categoryごとに平均を出す
item_category_mean = train_merge.groupby("item_category_id").mean()
item_category_mean = item_category_mean[["item_price"]]
item_category_mean["item_category_mean"] = item_category_mean["item_price"]
item_category_mean = item_category_mean.drop("item_price", axis=1)
item_category_mean

In [None]:
# item_categoryごとの平均をitem_idに付ける
item_category_mean_2 = pd.merge(item_category_mean, items, on="item_category_id")
item_category_mean_2 = item_category_mean_2.set_index("item_id")
item_category_mean_2 = item_category_mean_2.drop("item_category_id", axis=1)
item_category_mean_2

In [None]:
# 月、店、アイテムでまとめる
grouped = sales_train.groupby(["date_block_num", "shop_id", "item_id"])

In [None]:
# 月の売り上げの合計
train_sum = grouped.sum()
train_sum["item_cnt_month"] = train_sum["item_cnt_day"]
train_sum = train_sum.drop(["item_price", "item_cnt_day"], axis=1)

train_sum

In [None]:
train_sum.describe()

In [None]:
# 月ごとに合計個数
grouped_month = sales_train.groupby(["date_block_num"])
month_sum = grouped_month.sum()
month_sum = month_sum.drop(["shop_id", "item_id", "item_price"], axis=1)

In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
 
# 折れ線グラフを出力

x = month_sum.index.values
y = month_sum["item_cnt_day"]
plt.plot(x, y)

In [None]:
# testから1行ずつ読みだして、shop_id, item_idを取り出す

def lstm_(shop_id, item_id):
    cnt = []
    item = train_sum.xs(shop_id, level="shop_id").xs(item_id, level="item_id")
    for month in range(0, 34):
        try:
            count = item.at[month, 'item_cnt_month']
            cnt.append(count)
        except KeyError:
            cnt.append(0)
    print(cnt, "(", len(cnt), ")")
    return cnt

lstm_(shop_id, item_id)

In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
import warnings
import scipy

from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.layers.recurrent import LSTM
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

In [None]:
# データ形状の調整
N = 12
data_value = train_sum["item_cnt_month"].values
data_lstm = data_value[N: , np.newaxis]

for i in range(1, N + 1):
    data_lstm = np.concatenate([data_value[N - i: -i, np.newaxis], data_lstm], axis = 1)
    
data_lstm = (data_lstm + 22) / (1644 + 22)


In [None]:
data_lstm_trn = data_lstm[:-3]
data_lstm_tst = data_lstm[-3:]

X_trn = data_lstm_trn[:,:-1,np.newaxis]
X_tst = data_lstm_tst[:,:-1,np.newaxis]
y_trn = data_lstm_trn[:,-1]
y_tst = data_lstm_tst[:,-1]

In [None]:
length_of_sequence = N
in_out_neurons = 1

In [None]:
model = Sequential()
model.add(LSTM(32, batch_input_shape=(None, length_of_sequence, in_out_neurons), return_sequences=False))
model.add(Dense(1))
model.add(Activation("relu"))

In [None]:
optimizer = Adam(lr = 0.001)
model.compile(loss="mean_squared_error", optimizer=optimizer)

history = model.fit(X_trn, y_trn,
          batch_size = 10,
          epochs = 3,
          validation_data=(X_tst, y_tst))

In [None]:
# Accuracyの履歴のプロット
def plot_history_acc(rec):
#   plt.plot(rec.history['acc'],"o-",label="train") # below Keras 2.3.x
#   plt.plot(rec.history['val_acc'],"o-",label="test") # below Keras 2.3.x
    plt.plot(rec.history['accuracy'],"o-",label="train")  # above Keras 2.3.x
    plt.plot(rec.history['val_accuracy'],"o-",label="test")  # above  Keras 2.3.x
    plt.title('accuracy history')
    plt.xlabel('epochs')
    plt.ylabel('accuracy')
    plt.legend(loc="lower right")
    plt.show()
    
# 損失関数値の履歴のプロット
def plot_history_loss(rec):
    plt.plot(rec.history['loss'],"o-",label="train",)
    plt.plot(rec.history['val_loss'],"o-",label="test")
    plt.title('loss history')
    plt.xlabel('epochs')
    plt.ylabel('loss')
    plt.legend(loc='upper right')
    plt.show()