In [1]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim

In [2]:
import os
import sys
import glob
import pickle
import random

In [3]:
import numpy as np
import pandas as pd
import scipy.stats as stats
from pathlib import Path
import matplotlib.pyplot as plt # visualization
import seaborn as sns


In [4]:
from sklearn.model_selection import StratifiedKFold, GroupKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import RidgeCV
from sklearn.svm import SVR, LinearSVR
from sklearn.ensemble import StackingRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.cross_decomposition import PLSRegression

In [5]:
sys.path.append('../../')
import src.utils as utils

In [6]:
import multiprocessing
import scipy.interpolate
import scipy.sparse
from tqdm import tqdm

from indoor_location_competition_20.io_f import read_data_file
import indoor_location_competition_20.compute_f as compute_f

In [7]:
DATA_DIR = Path("/home/knikaido/work/Indoor-Location-Navigation/data/")
WIFI_DIR = DATA_DIR / 'indoorunifiedwifids_original'
ENSAMBLE_DIR = DATA_DIR / 'ensamble'
OUTPUT_DIR = Path('./output/')

In [8]:
sub = pd.read_csv(DATA_DIR/'indoor-location-navigation/sample_submission.csv', index_col=0)
test_df = pd.read_csv(WIFI_DIR / 'test_10_th10000.csv')
test_df

Unnamed: 0,ssid_0,ssid_1,ssid_2,ssid_3,ssid_4,ssid_5,ssid_6,ssid_7,ssid_8,ssid_9,...,frequency_94,frequency_95,frequency_96,frequency_97,frequency_98,frequency_99,wp_tmestamp,path_id,site_id,site_path_timestamp
0,b9f0208be00bd8b337be7f12e02e3a3ce846e22b,ab150ecf6d972b476aeab16317bed6189d9f7cce,b6ffe5619e02871fcd04f61c9bb4b5c53a3f46b7,b7e6027447eb1f81327d66cfd3adbe557aabf26c,d839a45ebe64ab48b60a407d837fb01d3c0dfef9,7182afc4e5c212133d5d7d76eb3df6c24618302b,da39a3ee5e6b4b0d3255bfef95601890afd80709,01e78c97f2c04cdbc7c4159158bb125a9bb558ff,b9f0208be00bd8b337be7f12e02e3a3ce846e22b,7182afc4e5c212133d5d7d76eb3df6c24618302b,...,5785,5785,5785,5785,5180,2417,1578474563646,046cfa46be49fc10834815c6,5a0546857ecc773753327266,5a0546857ecc773753327266_046cfa46be49fc1083481...
1,da39a3ee5e6b4b0d3255bfef95601890afd80709,d839a45ebe64ab48b60a407d837fb01d3c0dfef9,b6ffe5619e02871fcd04f61c9bb4b5c53a3f46b7,b7e6027447eb1f81327d66cfd3adbe557aabf26c,d839a45ebe64ab48b60a407d837fb01d3c0dfef9,7182afc4e5c212133d5d7d76eb3df6c24618302b,b9f0208be00bd8b337be7f12e02e3a3ce846e22b,7182afc4e5c212133d5d7d76eb3df6c24618302b,b7e6027447eb1f81327d66cfd3adbe557aabf26c,b6ffe5619e02871fcd04f61c9bb4b5c53a3f46b7,...,5180,5180,5180,5180,5180,2452,1578474572654,046cfa46be49fc10834815c6,5a0546857ecc773753327266,5a0546857ecc773753327266_046cfa46be49fc1083481...
2,b6ffe5619e02871fcd04f61c9bb4b5c53a3f46b7,da39a3ee5e6b4b0d3255bfef95601890afd80709,b7e6027447eb1f81327d66cfd3adbe557aabf26c,d839a45ebe64ab48b60a407d837fb01d3c0dfef9,7182afc4e5c212133d5d7d76eb3df6c24618302b,b9f0208be00bd8b337be7f12e02e3a3ce846e22b,da39a3ee5e6b4b0d3255bfef95601890afd80709,b9f0208be00bd8b337be7f12e02e3a3ce846e22b,b6ffe5619e02871fcd04f61c9bb4b5c53a3f46b7,d839a45ebe64ab48b60a407d837fb01d3c0dfef9,...,2452,2452,2457,5745,2437,2437,1578474578963,046cfa46be49fc10834815c6,5a0546857ecc773753327266,5a0546857ecc773753327266_046cfa46be49fc1083481...
3,da39a3ee5e6b4b0d3255bfef95601890afd80709,da39a3ee5e6b4b0d3255bfef95601890afd80709,b9f0208be00bd8b337be7f12e02e3a3ce846e22b,d839a45ebe64ab48b60a407d837fb01d3c0dfef9,b6ffe5619e02871fcd04f61c9bb4b5c53a3f46b7,b7e6027447eb1f81327d66cfd3adbe557aabf26c,b7e6027447eb1f81327d66cfd3adbe557aabf26c,7182afc4e5c212133d5d7d76eb3df6c24618302b,5731b8e08abc69d4c4d685c58164059207c93310,d839a45ebe64ab48b60a407d837fb01d3c0dfef9,...,5240,5240,5260,2432,2432,5240,1578474582400,046cfa46be49fc10834815c6,5a0546857ecc773753327266,5a0546857ecc773753327266_046cfa46be49fc1083481...
4,da39a3ee5e6b4b0d3255bfef95601890afd80709,da39a3ee5e6b4b0d3255bfef95601890afd80709,7182afc4e5c212133d5d7d76eb3df6c24618302b,b9f0208be00bd8b337be7f12e02e3a3ce846e22b,b6ffe5619e02871fcd04f61c9bb4b5c53a3f46b7,b7e6027447eb1f81327d66cfd3adbe557aabf26c,26571eb257b6a34b6beb93c05768e751d19279a6,d839a45ebe64ab48b60a407d837fb01d3c0dfef9,d839a45ebe64ab48b60a407d837fb01d3c0dfef9,da39a3ee5e6b4b0d3255bfef95601890afd80709,...,5300,5300,5300,5300,5260,5260,1578474585965,046cfa46be49fc10834815c6,5a0546857ecc773753327266,5a0546857ecc773753327266_046cfa46be49fc1083481...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10113,0f927dce74ec3475c7a39299e5bffab222ca665d,5d998a8668536c4f51004c25f474117fe9555f78,8e69018f6343506344ab13646ccd9447fc2ffb19,efa1fe97c4b2a6c57d98de9c236a109032ceb839,da39a3ee5e6b4b0d3255bfef95601890afd80709,5d998a8668536c4f51004c25f474117fe9555f78,c72257cf75d6a411225948bbc4e37ad554e7beda,da39a3ee5e6b4b0d3255bfef95601890afd80709,5d998a8668536c4f51004c25f474117fe9555f78,da39a3ee5e6b4b0d3255bfef95601890afd80709,...,5785,5805,5200,5200,5280,5785,1573731143256,fd64de8c4a2fc5ebb0e9f412,5dc8cea7659e181adb076a3f,5dc8cea7659e181adb076a3f_fd64de8c4a2fc5ebb0e9f...
10114,8e69018f6343506344ab13646ccd9447fc2ffb19,0f927dce74ec3475c7a39299e5bffab222ca665d,efa1fe97c4b2a6c57d98de9c236a109032ceb839,da39a3ee5e6b4b0d3255bfef95601890afd80709,5d998a8668536c4f51004c25f474117fe9555f78,5d998a8668536c4f51004c25f474117fe9555f78,da39a3ee5e6b4b0d3255bfef95601890afd80709,8e69018f6343506344ab13646ccd9447fc2ffb19,5d998a8668536c4f51004c25f474117fe9555f78,0eb4ecd03a3128e62bdff60e6f686d88e783441e,...,5805,5200,5280,5785,5260,5785,1573731146426,fd64de8c4a2fc5ebb0e9f412,5dc8cea7659e181adb076a3f,5dc8cea7659e181adb076a3f_fd64de8c4a2fc5ebb0e9f...
10115,8e69018f6343506344ab13646ccd9447fc2ffb19,5d998a8668536c4f51004c25f474117fe9555f78,8e69018f6343506344ab13646ccd9447fc2ffb19,5d998a8668536c4f51004c25f474117fe9555f78,c72257cf75d6a411225948bbc4e37ad554e7beda,da39a3ee5e6b4b0d3255bfef95601890afd80709,5d998a8668536c4f51004c25f474117fe9555f78,da39a3ee5e6b4b0d3255bfef95601890afd80709,5d998a8668536c4f51004c25f474117fe9555f78,5d998a8668536c4f51004c25f474117fe9555f78,...,5180,5220,5280,5220,5300,5785,1573731151563,fd64de8c4a2fc5ebb0e9f412,5dc8cea7659e181adb076a3f,5dc8cea7659e181adb076a3f_fd64de8c4a2fc5ebb0e9f...
10116,5d998a8668536c4f51004c25f474117fe9555f78,8e69018f6343506344ab13646ccd9447fc2ffb19,5d998a8668536c4f51004c25f474117fe9555f78,8e69018f6343506344ab13646ccd9447fc2ffb19,da39a3ee5e6b4b0d3255bfef95601890afd80709,aa449fabc4dcb24836d950b5cff91f08e574c3a7,c72257cf75d6a411225948bbc4e37ad554e7beda,da39a3ee5e6b4b0d3255bfef95601890afd80709,efa1fe97c4b2a6c57d98de9c236a109032ceb839,6381b3137285875eda5bc77d9b8cc7650f0f5d7d,...,5280,5200,5805,5300,5280,5745,1573731157567,fd64de8c4a2fc5ebb0e9f412,5dc8cea7659e181adb076a3f,5dc8cea7659e181adb076a3f_fd64de8c4a2fc5ebb0e9f...


In [9]:
configs = {
    'loss':{
        'name': 'MSELoss',
        'params':{}
    },
    'optimizer':{
        'name': 'Adam',
        'params':{
            'lr': 0.01,
        }
    },

    'scheduler':{
        'name': 'ReduceLROnPlateau',
        'params':{
            'factor': 0.1,
            'patience': 3,
        }
    },

    'loader':{
        'train':{
            'batch_size': 512,
            'shuffle': True,
            'num_workers': 4,
        },
        'valid':{
            'batch_size': 512,
            'shuffle': False,
            'num_workers': 4,
        },
        'test':{
            'batch_size': 512,
            'shuffle': False,
            'num_workers': 4,
        }
    }
}

In [10]:
# config
config = configs

# globals variable
SEED = 777
MAX_EPOCHS = 500
N_SPLITS = 5
DEBUG = False
# EXP_MESSAGE = config['globals']['exp_message']

EXP_NAME = 63
IS_SAVE = True

utils.set_seed(SEED)

In [11]:
def mean_position_error(xhat, yhat, fhat, x, y, f):
    intermediate = np.sqrt(np.power(xhat-x, 2) + np.power(yhat-y, 2)) + 15 * np.abs(fhat-f)
    return intermediate.sum()/xhat.shape[0]

In [12]:
oof_path_list = sorted(glob.glob(str(ENSAMBLE_DIR / 'oof/*.csv')))
sub_path_list = sorted(glob.glob(str(ENSAMBLE_DIR / 'sub/*.csv')))

In [13]:
oof_path_list, sub_path_list

(['/home/knikaido/work/Indoor-Location-Navigation/data/ensamble/oof/exp114_oof.csv',
  '/home/knikaido/work/Indoor-Location-Navigation/data/ensamble/oof/oof38.csv',
  '/home/knikaido/work/Indoor-Location-Navigation/data/ensamble/oof/oof56.csv',
  '/home/knikaido/work/Indoor-Location-Navigation/data/ensamble/oof/oof_exp1041_2-over_20.csv'],
 ['/home/knikaido/work/Indoor-Location-Navigation/data/ensamble/sub/exp114_sub.csv',
  '/home/knikaido/work/Indoor-Location-Navigation/data/ensamble/sub/sub38.csv',
  '/home/knikaido/work/Indoor-Location-Navigation/data/ensamble/sub/sub56.csv',
  '/home/knikaido/work/Indoor-Location-Navigation/data/ensamble/sub/sub_exp1041_2-over_20.csv'])

In [14]:
train_df = pd.read_csv(WIFI_DIR / 'train_10_th10000_base25_withpassedtime_withdelta.csv')
train_df['site_path_timestamp'] = train_df['site_id'].astype(str) + '_' + train_df['path_id'].astype(str) + '_' + train_df['wp_tmestamp'].astype(str)
train_ans = train_df[['site_path_timestamp', 'x', 'y']]
train_ans = train_ans.groupby('site_path_timestamp').mean().reset_index()
train_ans

Unnamed: 0,site_path_timestamp,x,y
0,5a0546857ecc773753327266_5d10a1669c50c70008fe8...,78.218190,38.999737
1,5a0546857ecc773753327266_5d10a1669c50c70008fe8...,76.251840,30.510840
2,5a0546857ecc773753327266_5d10a1669c50c70008fe8...,78.364815,10.530369
3,5a0546857ecc773753327266_5d10a1669c50c70008fe8...,79.231520,7.736414
4,5a0546857ecc773753327266_5d10a1669c50c70008fe8...,90.919060,13.197551
...,...,...,...
70730,5dc8cea7659e181adb076a3f_5dd7c1189191710006b56...,204.210920,106.165436
70731,5dc8cea7659e181adb076a3f_5dd7c119c5b77e0006b16...,204.210920,106.165436
70732,5dc8cea7659e181adb076a3f_5dd7c119c5b77e0006b16...,195.152830,109.600800
70733,5dc8cea7659e181adb076a3f_5dd7c119c5b77e0006b16...,193.204880,108.001520


In [15]:
oof1 = pd.read_csv(oof_path_list[0])
# oof_split = oof["site_path_timestamp"].apply(lambda x: pd.Series(x.split("_")))
# oof_split.columns = ['site_id', 'path_id', 'waypoint_timestamp']
# oof = pd.concat([oof, oof_split], axis=1)
oof1 = oof1.groupby('site_path_timestamp').mean().reset_index()

oof_merge1 = oof1.merge(train_ans, how='left', on='site_path_timestamp')
print(mean_position_error(
        oof_merge1['oof_x'], oof_merge1['oof_y'], 0,
        oof_merge1['x'], oof_merge1['y'], 0))
oof_merge1

6.258722526197294


Unnamed: 0,site_path_timestamp,id,oof_x,oof_y,x,y
0,5a0546857ecc773753327266_5d10a1669c50c70008fe8...,6779.5,79.114958,34.031473,78.218190,38.999737
1,5a0546857ecc773753327266_5d10a1669c50c70008fe8...,6784.5,81.592564,23.509898,76.251840,30.510840
2,5a0546857ecc773753327266_5d10a1669c50c70008fe8...,6791.5,81.057491,17.869876,78.364815,10.530369
3,5a0546857ecc773753327266_5d10a1669c50c70008fe8...,6797.0,82.680365,14.435582,79.231520,7.736414
4,5a0546857ecc773753327266_5d10a1669c50c70008fe8...,6802.5,84.909554,20.034893,90.919060,13.197551
...,...,...,...,...,...,...
71155,5dc8cea7659e181adb076a3f_5dd7c1189191710006b56...,237824.5,209.614995,105.818230,204.210920,106.165436
71156,5dc8cea7659e181adb076a3f_5dd7c119c5b77e0006b16...,238379.5,205.162055,103.232903,204.210920,106.165436
71157,5dc8cea7659e181adb076a3f_5dd7c119c5b77e0006b16...,238382.5,203.384680,103.233974,195.152830,109.600800
71158,5dc8cea7659e181adb076a3f_5dd7c119c5b77e0006b16...,238387.0,198.570064,100.853285,193.204880,108.001520


In [16]:
oof2 = pd.read_csv(oof_path_list[1]).rename(columns={'x':'oof_x', 'y':'oof_y'})
oof2 = oof2.groupby('site_path_timestamp').mean().reset_index()
# oof2 = oof2[oof2['site_path_timestamp'].isin(oof2['site_path_timestamp'])]

oof_merge2 = oof2.merge(train_ans, how='left', on='site_path_timestamp')
print(mean_position_error(
        oof_merge2['oof_x'], oof_merge2['oof_y'], 0,
        oof_merge2['x'], oof_merge2['y'], 0))
oof_merge2

6.665777203166837


Unnamed: 0,site_path_timestamp,oof_x,oof_y,timestamp,floor,x,y
0,5a0546857ecc773753327266_5d10a1669c50c70008fe8...,81.996185,38.312645,1561368762838,2.0,78.218190,38.999737
1,5a0546857ecc773753327266_5d10a1669c50c70008fe8...,95.181214,25.403005,1561368774057,2.0,76.251840,30.510840
2,5a0546857ecc773753327266_5d10a1669c50c70008fe8...,84.866783,18.974395,1561368793535,2.0,78.364815,10.530369
3,5a0546857ecc773753327266_5d10a1669c50c70008fe8...,81.353754,14.722527,1561368798851,2.0,79.231520,7.736414
4,5a0546857ecc773753327266_5d10a1669c50c70008fe8...,89.134986,20.040224,1561368814782,2.0,90.919060,13.197551
...,...,...,...,...,...,...,...
70730,5dc8cea7659e181adb076a3f_5dd7c1189191710006b56...,211.623670,104.108293,1574420722415,-1.0,204.210920,106.165436
70731,5dc8cea7659e181adb076a3f_5dd7c119c5b77e0006b16...,202.869270,102.279287,1574420726383,-1.0,204.210920,106.165436
70732,5dc8cea7659e181adb076a3f_5dd7c119c5b77e0006b16...,199.769120,103.535991,1574420736142,-1.0,195.152830,109.600800
70733,5dc8cea7659e181adb076a3f_5dd7c119c5b77e0006b16...,203.473558,100.902021,1574420742502,-1.0,193.204880,108.001520


In [17]:
oof3 = pd.read_csv(oof_path_list[2]).rename(columns={'x':'oof_x', 'y':'oof_y'})
oof3 = oof3.groupby('site_path_timestamp').mean().reset_index()
# oof3 = oof3[oof3['site_path_timestamp'].isin(oof1['site_path_timestamp'])]

oof_merge3 = oof3.merge(train_ans, how='left', on='site_path_timestamp')
print(mean_position_error(
        oof_merge3['oof_x'], oof_merge3['oof_y'], 0,
        oof_merge3['x'], oof_merge3['y'], 0))
oof_merge3

6.029105180018331


Unnamed: 0,site_path_timestamp,oof_x,oof_y,timestamp,floor,x,y
0,5a0546857ecc773753327266_5d10a1669c50c70008fe8...,82.841790,34.940205,1561368762838,2.0,78.218190,38.999737
1,5a0546857ecc773753327266_5d10a1669c50c70008fe8...,85.149216,22.246147,1561368774057,2.0,76.251840,30.510840
2,5a0546857ecc773753327266_5d10a1669c50c70008fe8...,95.762950,16.003378,1561368793535,2.0,78.364815,10.530369
3,5a0546857ecc773753327266_5d10a1669c50c70008fe8...,85.683110,13.106474,1561368798851,2.0,79.231520,7.736414
4,5a0546857ecc773753327266_5d10a1669c50c70008fe8...,91.399000,19.798033,1561368814782,2.0,90.919060,13.197551
...,...,...,...,...,...,...,...
69804,5dc8cea7659e181adb076a3f_5dd7c1189191710006b56...,202.411650,100.869780,1574420722415,-1.0,204.210920,106.165436
69805,5dc8cea7659e181adb076a3f_5dd7c119c5b77e0006b16...,205.457350,103.478400,1574420726383,-1.0,204.210920,106.165436
69806,5dc8cea7659e181adb076a3f_5dd7c119c5b77e0006b16...,200.831650,106.462330,1574420736142,-1.0,195.152830,109.600800
69807,5dc8cea7659e181adb076a3f_5dd7c119c5b77e0006b16...,201.989990,106.760635,1574420742502,-1.0,193.204880,108.001520


In [18]:
oof4 = pd.read_csv(oof_path_list[3]).rename(columns={'x':'oof_x', 'y':'oof_y'})
oof4 = oof4.groupby('site_path_timestamp').mean().reset_index()
# oof4 = oof4[oof4['site_path_timestamp'].isin(oof1['site_path_timestamp'])]
oof_merge4 = oof4.merge(train_ans, how='left', on='site_path_timestamp')
print(mean_position_error(
        oof_merge4['oof_x'], oof_merge4['oof_y'], 0,
        oof_merge4['x'], oof_merge4['y'], 0))
oof_merge4

6.1865071009179005


Unnamed: 0,site_path_timestamp,floor,oof_x,oof_y,x,y
0,5a0546857ecc773753327266_5d10a1669c50c70008fe8...,2,79.760246,33.434628,78.218190,38.999737
1,5a0546857ecc773753327266_5d10a1669c50c70008fe8...,2,82.917722,25.986174,76.251840,30.510840
2,5a0546857ecc773753327266_5d10a1669c50c70008fe8...,2,86.344335,16.054341,78.364815,10.530369
3,5a0546857ecc773753327266_5d10a1669c50c70008fe8...,2,84.397659,16.760333,79.231520,7.736414
4,5a0546857ecc773753327266_5d10a1669c50c70008fe8...,2,85.460541,18.962496,90.919060,13.197551
...,...,...,...,...,...,...
69512,5dc8cea7659e181adb076a3f_5dd7c1189191710006b56...,-1,210.815361,103.487339,204.210920,106.165436
69513,5dc8cea7659e181adb076a3f_5dd7c119c5b77e0006b16...,-1,204.402588,104.824085,204.210920,106.165436
69514,5dc8cea7659e181adb076a3f_5dd7c119c5b77e0006b16...,-1,201.625824,105.443407,195.152830,109.600800
69515,5dc8cea7659e181adb076a3f_5dd7c119c5b77e0006b16...,-1,202.255939,102.490321,193.204880,108.001520


In [19]:
sets = list(set(oof1['site_path_timestamp']) & set(oof2['site_path_timestamp']) & set(oof3['site_path_timestamp']) & set(oof4['site_path_timestamp']))

In [20]:
oof1 = oof1[oof1['site_path_timestamp'].isin(sets)]
oof2 = oof2[oof2['site_path_timestamp'].isin(sets)]
oof3 = oof3[oof3['site_path_timestamp'].isin(sets)]
oof4 = oof4[oof4['site_path_timestamp'].isin(sets)]
train_ans = train_ans[train_ans['site_path_timestamp'].isin(sets)]

In [21]:
oof1_x = oof1['oof_x'].reset_index(drop=True)
oof1_x.name = 'oof_x1'
oof2_x = oof2['oof_x'].reset_index(drop=True)
oof2_x.name = 'oof_x2'
oof3_x = oof3['oof_x'].reset_index(drop=True)
oof3_x.name = 'oof_x3'
oof4_x = oof4['oof_x'].reset_index(drop=True)
oof4_x.name = 'oof_x4'
train_ans_x = train_ans['x'].reset_index(drop=True)

oof1_y = oof1['oof_y'].reset_index(drop=True)
oof1_y.name = 'oof_y1'
oof2_y = oof2['oof_y'].reset_index(drop=True)
oof2_y.name = 'oof_y2'
oof3_y = oof3['oof_y'].reset_index(drop=True)
oof3_y.name = 'oof_y3'
oof4_y = oof4['oof_y'].reset_index(drop=True)
oof4_y.name = 'oof_y4'
train_ans_y = train_ans['y'].reset_index(drop=True)

In [22]:
sets_df = pd.Series(sets, name='site_path_timestamp')

In [23]:
sets_split = sets_df.apply(lambda x: pd.Series(x.split("_")))
sets_split.columns = ['site_id', 'path_id', 'waypoint_timestamp']
sets_df = pd.concat([sets_df, sets_split], axis=1)

In [24]:
oof_x = pd.concat([oof1_x, oof2_x, oof3_x, oof4_x, sets_df['path_id']], axis=1)
y_train_x = train_ans_x
oof_y = pd.concat([oof1_y, oof2_y, oof3_y, oof4_y, sets_df['path_id']], axis=1)
y_train_y = train_ans_y

In [25]:
# colormap = plt.cm.RdBu
# plt.figure(figsize=(14,12))
# sns.heatmap(oof_x.astype(float).corr(),linewidths=0.1,vmax=1.0, 
#             square=True, cmap=colormap, linecolor='white', annot=True)

In [26]:
estimators = [
#         ('svr', make_pipeline(StandardScaler(), SVR())),
        ('rf', RandomForestRegressor()),
        ('mlp', MLPRegressor(max_iter=10000)),
        ('plr', PLSRegression()),
#         ('svr', LinearSVR(random_state=SEED))
        ]
reg = StackingRegressor(
    estimators=estimators,
#     final_estimator=PLSRegression(),
    final_estimator=RidgeCV()
)

In [27]:
oofs = np.zeros((len(y_train_x), 2), dtype = np.float32)  # 全てのoofをdfで格納する
predictions = []  # 全ての予測値をdfで格納する
val_scores = []
# skf = StratifiedKFold(n_splits=N_SPLITS, shuffle=True, random_state=SEED)
gkf = GroupKFold(n_splits=N_SPLITS)
# for fold, (trn_idx, val_idx) in enumerate(skf.split(train.loc[:, 'path'], train.loc[:, 'path'])):
for fold, (trn_idx, val_idx) in enumerate(gkf.split(oof_x.loc[:, 'path_id'], groups=oof_x.loc[:, 'path_id'])):
    oof_x_train = oof_x.iloc[trn_idx, :-1]
    oof_x_valid = oof_x.iloc[val_idx, :-1]
    y_x_train = y_train_x[trn_idx]
    y_x_valid = y_train_x[val_idx]
    
    oof_y_train = oof_y.iloc[trn_idx, :-1]
    oof_y_valid = oof_y.iloc[val_idx, :-1]
    y_y_train = y_train_y[trn_idx]
    y_y_valid = y_train_y[val_idx]
    
    model = reg.fit(oof_x_train, y_x_train)
    ans_x_trian = model.predict(oof_x_train)
    ans_x_valid = model.predict(oof_x_valid)
    
    model = reg.fit(oof_y_train, y_y_train)
    ans_y_trian = model.predict(oof_y_train)
    ans_y_valid = model.predict(oof_y_valid)
    
    score_train = mean_position_error(ans_x_trian.reshape(-1), ans_y_trian.reshape(-1), 0, y_x_train, y_y_train, 0)
    score_valid = mean_position_error(ans_x_valid.reshape(-1), ans_y_valid.reshape(-1), 0, y_x_valid, y_y_valid, 0)

    print(f'fold = {fold}, train = {score_train}')
    print(f'fold = {fold}, valid = {score_valid}')
    
    oofs[val_idx, 0] = ans_x_valid.reshape(-1)
    oofs[val_idx, 1] = ans_y_valid.reshape(-1)



fold = 0, train = 5.709293563741813
fold = 0, valid = 5.688815549062892




fold = 1, train = 5.737047525514943
fold = 1, valid = 5.67876280756879




fold = 2, train = 5.7553166907984235
fold = 2, valid = 5.649894186518582




fold = 3, train = 5.789266702045538
fold = 3, valid = 5.616743404836135




fold = 4, train = 5.7709960625877175
fold = 4, valid = 5.583111211496614


In [28]:
mean_position_error(oofs[:, 0], oofs[:, 1], 0, y_train_x, y_train_y, 0)

5.643465419855838

In [30]:
mean_position_error(oofs[:, 0], oofs[:, 1], 0, y_train_x, y_train_y, 0)

5.725275973574795