In [None]:
# !pip install tensorflow-gpu
import pickle
import seaborn as sns
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import gc
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
from sklearn.metrics import accuracy_score, confusion_matrix, recall_score, precision_score, f1_score, roc_auc_score, roc_curve, log_loss

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


トレーニングデータの説明変数とテストデータをロード

In [None]:
lgbm_path = '/content/drive/MyDrive/apartment_price_forecasting/data/stacking/20220916/lgbm/'
goss_path = '/content/drive/MyDrive/apartment_price_forecasting/data/stacking/20220916/goss/'

with open(lgbm_path + 'preds_train_lgbm.pkl', 'rb') as f:
    preds_train_lgbm = pickle.load(f)

with open(lgbm_path + 'preds_test_lgbm.pkl', 'rb') as f:
    preds_test_lgbm = pickle.load(f)

with open(goss_path + 'preds_train_goss.pkl', 'rb') as f:
    preds_train_goss = pickle.load(f)

with open(goss_path + 'preds_test_goss.pkl', 'rb') as f:
    preds_test_goss = pickle.load(f)

In [None]:
train_x = np.concatenate((preds_train_lgbm, preds_train_goss), axis=0).transpose()
test_x = np.concatenate((preds_test_lgbm, preds_test_goss), axis=0).transpose()

トレーニングデータの目的変数をロード

In [None]:
pickle_path = '/content/drive/MyDrive/apartment_price_forecasting/data/training_data/df_concat_ch:layout_cnst_usage_no_ext.pkl'
with open(pickle_path, 'rb') as f:
    df_concat = pickle.load(f)

In [None]:
rm_col = ['parking', 'shop', 'others', 'plant', 'warehouse', 'workshop', 'house', 'office', 'usage_unknown', 'usage_le', 'construction_structure_le', 'layout_le']
df_concat = df_concat.drop(rm_col, axis=1)

In [None]:
pickle_path = '/content/drive/MyDrive/apartment_price_forecasting/data/training_data/'
with open(pickle_path + 'df_train_ext.pkl', 'rb') as f:
    df_train_ext = pickle.load(f)

with open(pickle_path + 'df_test_ext.pkl', 'rb') as f:
    df_test_ext = pickle.load(f)

In [None]:
df_train_ext = df_train_ext[['ID', 'passengers_2017', 'lon', 'lat', 'r2_price']]
df_test_ext = df_test_ext[['ID', 'passengers_2017', 'lon', 'lat', 'r2_price']]

In [None]:
df_train = pd.merge(df_concat.query('y >= 0'), df_train_ext, on='ID', how='left')
df_test = pd.merge(df_concat.query('not y >= 0'), df_test_ext, on='ID', how='left').drop('y', axis=1)

In [None]:
train_y = np.array(df_train['y']).reshape(-1, 1)

###スタッキング第二層：線形回帰モデル

In [None]:
from sklearn.linear_model import LinearRegression

シンプルな線形回帰

In [None]:
idx = int(train_x.shape[0] * 0.7)
x_train = train_x[:idx, :]
x_valid = train_x[idx:, :]
y_train = train_y[:idx, :]
y_valid = train_y[idx:, :]

In [None]:
# メタモデルの学習 
meta_model = LinearRegression()
meta_model.fit(x_train, y_train)
valid_pred = meta_model.predict(x_valid)
mae_avg = np.abs(valid_pred - y_valid).sum(axis=0) / valid_pred.shape[0]
print(mae_avg)
pred_test = meta_model.predict(test_x)

k-foldで分割して線形回帰

In [None]:
from sklearn.model_selection import KFold
kf = KFold(n_splits=4, shuffle=True, random_state=71)

pred_list = []

for train_index, valid_index in kf.split(train_x):
    x_train = train_x[train_index, :]
    y_train = train_y[train_index, :]

    # メタモデルの学習 
    meta_model = LinearRegression()
    meta_model.fit(x_train, y_train)
    pred_list.append(meta_model.predict(test_x))

In [None]:
pred_test =np.array(pred_list).mean(axis=0)

In [None]:
data_path = '/content/drive/MyDrive/apartment_price_forecasting/data/sample_submission.csv'
sample_sub = pd.read_csv(data_path)
sample_sub['取引価格（総額）_log'] = pred_test

In [None]:
save_path = '/content/drive/MyDrive/apartment_price_forecasting/data/'
sample_sub.to_csv(save_path + 'test_submission_stacking_linear.csv', index=False)

### スタッキング第二層：SVR

In [None]:
from sklearn.svm import SVR
from sklearn.model_selection import KFold
kf = KFold(n_splits=4, shuffle=True, random_state=71)

pred_list = []

for train_index, valid_index in kf.split(train_x):
    x_train = train_x[train_index, :]
    y_train = train_y[train_index, :]

    # メタモデルの学習 
    regr = SVR()
    regr.fit(x_train, y_train)
    pred_list.append(regr.predict(test_x))

  y = column_or_1d(y, warn=True)


In [None]:
pred_test =np.array(pred_list).mean(axis=0)

### スタッキング第二層：NNモデル

In [None]:
model = models.Sequential()
model.add(Dense(5, activation='relu', input_shape=(train_x.shape[1],))) #activationは活性化関数
# model.add(Dropout(0.2))
model.add(Dense(3, activation='relu'))
model.add(Dense(1))

#モデルを構築
model.compile(optimizer=tf.optimizers.Adam(0.005), loss='mae', metrics=['mae'])

#EaelyStoppingの設定
early_stopping =  EarlyStopping(monitor='val_loss', min_delta=0.0, patience=2)

In [None]:
#モデルを学習させる
from sklearn.model_selection import KFold
kf = KFold(n_splits=4, shuffle=True, random_state=71)

pred_list = []
log_list = []
model_list = []
cnt = 1
for train_index, valid_index in kf.split(train_x):
    print('--------------------------- training No. %d' %cnt + ' ---------------------------')
    x_train = train_x[train_index, :]
    y_train = train_y[train_index, :]
    x_valid = train_x[valid_index, :]
    y_valid = train_y[valid_index, :]

    # メタモデルの学習 
    log = model.fit(x_train, y_train, epochs=3000, batch_size=500, verbose=True, validation_data=(x_valid, y_valid), callbacks=[early_stopping])
    pred_list.append(model.predict(test_x))
    log_list.append(log)
    model_list.append(model)
    
    cnt += 1

--------------------------- training No. 1 ---------------------------
Epoch 1/3000
Epoch 2/3000
Epoch 3/3000
Epoch 4/3000
Epoch 5/3000
Epoch 6/3000
--------------------------- training No. 2 ---------------------------
Epoch 1/3000
Epoch 2/3000
Epoch 3/3000
Epoch 4/3000
Epoch 5/3000
Epoch 6/3000
Epoch 7/3000
--------------------------- training No. 3 ---------------------------
Epoch 1/3000
Epoch 2/3000
Epoch 3/3000
Epoch 4/3000
--------------------------- training No. 4 ---------------------------
Epoch 1/3000
Epoch 2/3000
Epoch 3/3000
Epoch 4/3000
Epoch 5/3000
Epoch 6/3000
Epoch 7/3000


In [None]:
y = np.array(pred_list).mean(axis=0).reshape(-1, 1)

In [None]:
data_path = '/content/drive/MyDrive/apartment_price_forecasting/data/sample_submission.csv'
sample_sub = pd.read_csv(data_path)
sample_sub['取引価格（総額）_log'] = y

In [None]:
save_path = '/content/drive/MyDrive/apartment_price_forecasting/data/'
sample_sub.to_csv(save_path + 'test_submission_stacking_NN_20220920.csv', index=False)

In [None]:
best_list = []

In [None]:
best_list.append(pred_list[1])
best_list.append(pred_list[2])

In [None]:
y = np.array(pred_list).mean(axis=0).reshape(-1, 1)
y.shape

(21005, 1)