In [1]:
# 基本ライブラリ
import pandas as pd
import pandas.io.sql as psql
import numpy as np
import numpy.random as rd
import gc
import multiprocessing as mp
import os
import sys
import pickle
from collections import defaultdict
from glob import glob
import math
from datetime import datetime as dt
from pathlib import Path
import scipy.stats as st
import re
import shutil
from tqdm import tqdm_notebook as tqdm
import datetime
ts_conv = np.vectorize(datetime.datetime.fromtimestamp) # 秒ut(10桁) ⇒ 日付

# グラフ描画系
import matplotlib
from matplotlib import font_manager
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib import rc

from matplotlib import animation as ani
from IPython.display import Image

plt.rcParams["patch.force_edgecolor"] = True
#rc('text', usetex=True)
from IPython.display import display # Allows the use of display() for DataFrames
import seaborn as sns
sns.set(style="whitegrid", palette="muted", color_codes=True)
sns.set_style("whitegrid", {'grid.linestyle': '--'})
red = sns.xkcd_rgb["light red"]
green = sns.xkcd_rgb["medium green"]
blue = sns.xkcd_rgb["denim blue"]

#カラム内の文字数。デフォルトは50
pd.set_option("display.max_colwidth", 100)

#行数
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
#
pd.options.display.float_format = '{:,.5f}'.format

%matplotlib inline

In [2]:
import lightgbm as lgb
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold, KFold, RepeatedKFold

from sklearn import metrics
import json

import warnings
warnings.filterwarnings("ignore")


sys.path.append('..')
from lib.line_notif import send_message
from lib.utils import reduce_mem_usage, current_time, unpickle, to_pickle
from lib.utils import one_hot_encoder, apply_agg, multi_combine_categorical_feature
from lib.utils import import_data, get_split_indexer 

In [3]:
sub = pd.read_csv('../input/sample_submission.csv')

In [4]:
DATA_VERSION = "v001"
TRIAL_NO = "005"
save_path = Path(f"../processed/{DATA_VERSION}")
model_path = Path(f"../model/{DATA_VERSION}_{TRIAL_NO}")
submit_path = Path(f"../submit/{DATA_VERSION}_{TRIAL_NO}")

In [5]:
print("start loading...")
test  = unpickle(save_path/"test_002.df.pkl", )
print(f"test loaded.")
test.set_index("id", inplace=True)
print(f"test: {test.shape}")
print(test.shape,)
groups = unpickle(save_path/"lbl_molecule_name.pkl", )


def get_test_data_for_predict(mol_type):
    # mol_type = 1 # int(sys.argv[1]) #1
    print(f"mol_type: {mol_type}")

    if mol_type==0:
        pass
    elif mol_type==1:
        test_type_cut = np.load("../processed/v001/test_type1_cut.npy", )
    elif mol_type==2:
        test_type_cut = np.load("../processed/v001/test_type2_cut.npy", )
    elif mol_type==3:
        test_type_cut = np.load("../processed/v001/test_type3_cut.npy", )
    else:
        assert False, f"mol_type should be 0, 1, 2, 3. mol_type: {mol_type}"

#     if mol_type in [1,2,3]:
#         test_  = test[test_type_cut]
    return test[test_type_cut]

def get_train_data_for_predict(mol_type):
    # mol_type = 1 # int(sys.argv[1]) #1
    print(f"mol_type: {mol_type}")

    if mol_type==0:
        pass
    elif mol_type==1:
        train_type_cut = np.load("../processed/v001/train_type1_cut.npy", )
    elif mol_type==2:
        train_type_cut = np.load("../processed/v001/train_type2_cut.npy", )
    elif mol_type==3:
        train_type_cut = np.load("../processed/v001/train_type3_cut.npy", )
    else:
        assert False, f"mol_type should be 0, 1, 2, 3. mol_type: {mol_type}"

    return train[train_type_cut]

start loading...
test loaded.
test: (2505542, 448)
(2505542, 448)


In [None]:
sub_list = []
n_folds = 5
iterations = [29000, 7000, 20000]

for mol_type, iteration in zip([1,2,3], iterations):
    print(f"mol_type: {mol_type}")
    test_ = get_test_data_for_predict(mol_type=mol_type)
    #bst_list = []
    preds_test = np.zeros((test_.shape[0], n_folds))
    for fold_ in tqdm(range(n_folds)):
        saved_model_path = f'../model/{DATA_VERSION}_{TRIAL_NO}/booster_{mol_type}_{fold_:02d}_{iteration}.model'
        bst = lgb.Booster(model_file=saved_model_path)
        #bst_list += [bst]
        preds_test[:, fold_] = bst.predict(test_)

    sub = pd.DataFrame(index=test_.index)
    sub["scalar_coupling_constant"] = preds_test.mean(axis=1)    
    sub_list.append(sub)
    
sub_all = pd.concat(sub_list, axis=0)    
sub_all.to_csv(submit_path/f"sub_all_{DATA_VERSION}_{TRIAL_NO}.csv")

In [15]:
!head ../submit/v001_005/sub_all.csv

id,scalar_coupling_constant
4658148,162.6267045798958
4658150,150.5427402349208
4658152,91.57319688567192
4658156,92.30873384342375
4658159,82.59355472025568
4658162,90.01716055160892
4658166,90.0016414843507
4658169,82.50557919822882
4658170,110.9599438151333


In [16]:
# !cp ../submit/v001_005/sub_all.csv ../submit/v001_005/sub_all_v001_005.csv

In [18]:
# !kaggle competitions submit -c champs-scalar-coupling -f ../submit/v001_005/sub_all_v001_005.csv -m "first submit"

100%|██████████████████████████████████████| 64.0M/64.0M [00:04<00:00, 14.8MB/s]
Successfully submitted to Predicting Molecular Properties

In [9]:
n_folds = 5
iterations = [29000, 7000, 20000]

importance_list = []
for mol_type, iteration in zip([1,2,3], iterations):
    print(f"mol_type: {mol_type}")
    importance_sub = []
    for fold_ in tqdm(range(n_folds)):
        saved_model_path = f'../model/{DATA_VERSION}_{TRIAL_NO}/booster_{mol_type}_{fold_:02d}_{iteration}.model'
        bst = lgb.Booster(model_file=saved_model_path)
        importance_sub += [pd.DataFrame({"gain":bst.feature_importance(importance_type='gain')},
                                       index=test.columns).sort_index()]
    importance_df = pd.concat(importance_sub, axis=1)
    importance_df.columns = [f"fold_{i}" for i in range(1,6)]
    importance_df["ave"] = importance_df.mean(axis=1)
    importance_df["ratio"] = importance_df["ave"]/importance_df["ave"].sum()
    importance_list += [importance_df.sort_values("ave", ascending=False)]

mol_type: 1


HBox(children=(IntProgress(value=0, max=5), HTML(value='')))


mol_type: 2


HBox(children=(IntProgress(value=0, max=5), HTML(value='')))


mol_type: 3


HBox(children=(IntProgress(value=0, max=5), HTML(value='')))




In [None]:
for i in range(3):
    df_tmp = importance_list[i][importance_list[i].ratio>=0.0001]
    print(importance_list[i].shape, df_tmp.shape)
    np.save(f"../model/{DATA_VERSION}_{TRIAL_NO}/top_importance_{i+1}.npy", np.array(df_tmp.index))
    #display(df_tmp)

In [10]:
for i in range(3):
    df_tmp = importance_list[i][importance_list[i].ratio<0.0001]
    print(importance_list[i].shape, df_tmp.shape)
    np.save(f"../model/{DATA_VERSION}_{TRIAL_NO}/low_importance_{i+1}.npy", np.array(df_tmp.index))
    #display(df_tmp)

(448, 7) (254, 7)
(448, 7) (302, 7)
(448, 7) (137, 7)


In [17]:
ls ../submit/v001_005

sub_all.csv  sub_all_v001_005.csv


# calc oof score

In [6]:
print("start loading...")
train  = unpickle(save_path/"train_002.df.pkl", )
print(f"train loaded.")
train.set_index("id", inplace=True)
print(f"train: {train.shape}")

start loading...
train loaded.
train: (4658147, 449)


In [7]:
y = train["scalar_coupling_constant"]
train.drop("scalar_coupling_constant", axis=1, inplace=True)

In [13]:
train["type"]

Unnamed: 0_level_0,atom_index_0,atom_index_1,type,atom_0,x_0,y_0,z_0,atom_1,x_1,y_1,z_1,dist,dist_x,dist_y,dist_z,type_0,molecule_couples,molecule_dist_mean,molecule_dist_min,molecule_dist_max,molecule_dist_std,atom_0_couples_count,atom_1_couples_count,molecule_atom_index_0_count,molecule_atom_index_1_count,molecule_type_count,molecule_atom_1_count,molecule_type_0_count,molecule_atom_index_0_x_1_mean,molecule_atom_index_0_x_1_mean_diff,molecule_atom_index_0_x_1_mean_div,molecule_atom_index_0_x_1_max,molecule_atom_index_0_x_1_max_diff,molecule_atom_index_0_x_1_max_div,molecule_atom_index_0_x_1_std,molecule_atom_index_0_x_1_std_diff,molecule_atom_index_0_x_1_std_div,molecule_atom_index_0_x_1_min,molecule_atom_index_0_x_1_min_diff,molecule_atom_index_0_x_1_min_div,molecule_atom_index_0_y_1_mean,molecule_atom_index_0_y_1_mean_diff,molecule_atom_index_0_y_1_mean_div,molecule_atom_index_0_y_1_max,molecule_atom_index_0_y_1_max_diff,molecule_atom_index_0_y_1_max_div,molecule_atom_index_0_y_1_std,molecule_atom_index_0_y_1_std_diff,molecule_atom_index_0_y_1_std_div,molecule_atom_index_0_y_1_min,molecule_atom_index_0_y_1_min_diff,molecule_atom_index_0_y_1_min_div,molecule_atom_index_0_z_1_mean,molecule_atom_index_0_z_1_mean_diff,molecule_atom_index_0_z_1_mean_div,molecule_atom_index_0_z_1_max,molecule_atom_index_0_z_1_max_diff,molecule_atom_index_0_z_1_max_div,molecule_atom_index_0_z_1_std,molecule_atom_index_0_z_1_std_diff,molecule_atom_index_0_z_1_std_div,molecule_atom_index_0_z_1_min,molecule_atom_index_0_z_1_min_diff,molecule_atom_index_0_z_1_min_div,molecule_atom_index_0_dist_mean,molecule_atom_index_0_dist_mean_diff,molecule_atom_index_0_dist_mean_div,molecule_atom_index_0_dist_max,molecule_atom_index_0_dist_max_diff,molecule_atom_index_0_dist_max_div,molecule_atom_index_0_dist_std,molecule_atom_index_0_dist_std_diff,molecule_atom_index_0_dist_std_div,molecule_atom_index_0_dist_min,molecule_atom_index_0_dist_min_diff,molecule_atom_index_0_dist_min_div,molecule_atom_index_0_dist_x_mean,molecule_atom_index_0_dist_x_mean_diff,molecule_atom_index_0_dist_x_mean_div,molecule_atom_index_0_dist_x_max,molecule_atom_index_0_dist_x_max_diff,molecule_atom_index_0_dist_x_max_div,molecule_atom_index_0_dist_x_std,molecule_atom_index_0_dist_x_std_diff,molecule_atom_index_0_dist_x_std_div,molecule_atom_index_0_dist_x_min,molecule_atom_index_0_dist_x_min_diff,molecule_atom_index_0_dist_x_min_div,molecule_atom_index_0_dist_y_mean,molecule_atom_index_0_dist_y_mean_diff,molecule_atom_index_0_dist_y_mean_div,molecule_atom_index_0_dist_y_max,molecule_atom_index_0_dist_y_max_diff,molecule_atom_index_0_dist_y_max_div,molecule_atom_index_0_dist_y_std,molecule_atom_index_0_dist_y_std_diff,molecule_atom_index_0_dist_y_std_div,molecule_atom_index_0_dist_y_min,molecule_atom_index_0_dist_y_min_diff,molecule_atom_index_0_dist_y_min_div,molecule_atom_index_0_dist_z_mean,molecule_atom_index_0_dist_z_mean_diff,molecule_atom_index_0_dist_z_mean_div,molecule_atom_index_0_dist_z_max,molecule_atom_index_0_dist_z_max_diff,molecule_atom_index_0_dist_z_max_div,molecule_atom_index_0_dist_z_std,molecule_atom_index_0_dist_z_std_diff,molecule_atom_index_0_dist_z_std_div,molecule_atom_index_0_dist_z_min,molecule_atom_index_0_dist_z_min_diff,molecule_atom_index_0_dist_z_min_div,molecule_atom_index_1_x_1_mean,molecule_atom_index_1_x_1_mean_diff,molecule_atom_index_1_x_1_mean_div,molecule_atom_index_1_x_1_max,molecule_atom_index_1_x_1_max_diff,molecule_atom_index_1_x_1_max_div,molecule_atom_index_1_x_1_std,molecule_atom_index_1_x_1_std_diff,molecule_atom_index_1_x_1_std_div,molecule_atom_index_1_x_1_min,molecule_atom_index_1_x_1_min_diff,molecule_atom_index_1_x_1_min_div,molecule_atom_index_1_y_1_mean,molecule_atom_index_1_y_1_mean_diff,molecule_atom_index_1_y_1_mean_div,molecule_atom_index_1_y_1_max,molecule_atom_index_1_y_1_max_diff,molecule_atom_index_1_y_1_max_div,molecule_atom_index_1_y_1_std,molecule_atom_index_1_y_1_std_diff,molecule_atom_index_1_y_1_std_div,molecule_atom_index_1_y_1_min,molecule_atom_index_1_y_1_min_diff,molecule_atom_index_1_y_1_min_div,molecule_atom_index_1_z_1_mean,molecule_atom_index_1_z_1_mean_diff,molecule_atom_index_1_z_1_mean_div,molecule_atom_index_1_z_1_max,molecule_atom_index_1_z_1_max_diff,molecule_atom_index_1_z_1_max_div,molecule_atom_index_1_z_1_std,molecule_atom_index_1_z_1_std_diff,molecule_atom_index_1_z_1_std_div,molecule_atom_index_1_z_1_min,molecule_atom_index_1_z_1_min_diff,molecule_atom_index_1_z_1_min_div,molecule_atom_index_1_dist_mean,molecule_atom_index_1_dist_mean_diff,molecule_atom_index_1_dist_mean_div,molecule_atom_index_1_dist_max,molecule_atom_index_1_dist_max_diff,molecule_atom_index_1_dist_max_div,molecule_atom_index_1_dist_std,molecule_atom_index_1_dist_std_diff,molecule_atom_index_1_dist_std_div,molecule_atom_index_1_dist_min,molecule_atom_index_1_dist_min_diff,molecule_atom_index_1_dist_min_div,molecule_atom_index_1_dist_x_mean,molecule_atom_index_1_dist_x_mean_diff,molecule_atom_index_1_dist_x_mean_div,molecule_atom_index_1_dist_x_max,molecule_atom_index_1_dist_x_max_diff,molecule_atom_index_1_dist_x_max_div,molecule_atom_index_1_dist_x_std,molecule_atom_index_1_dist_x_std_diff,molecule_atom_index_1_dist_x_std_div,molecule_atom_index_1_dist_x_min,molecule_atom_index_1_dist_x_min_diff,molecule_atom_index_1_dist_x_min_div,molecule_atom_index_1_dist_y_mean,molecule_atom_index_1_dist_y_mean_diff,molecule_atom_index_1_dist_y_mean_div,molecule_atom_index_1_dist_y_max,molecule_atom_index_1_dist_y_max_diff,molecule_atom_index_1_dist_y_max_div,molecule_atom_index_1_dist_y_std,molecule_atom_index_1_dist_y_std_diff,molecule_atom_index_1_dist_y_std_div,molecule_atom_index_1_dist_y_min,molecule_atom_index_1_dist_y_min_diff,molecule_atom_index_1_dist_y_min_div,molecule_atom_index_1_dist_z_mean,molecule_atom_index_1_dist_z_mean_diff,molecule_atom_index_1_dist_z_mean_div,molecule_atom_index_1_dist_z_max,molecule_atom_index_1_dist_z_max_diff,molecule_atom_index_1_dist_z_max_div,molecule_atom_index_1_dist_z_std,molecule_atom_index_1_dist_z_std_diff,molecule_atom_index_1_dist_z_std_div,molecule_atom_index_1_dist_z_min,molecule_atom_index_1_dist_z_min_diff,molecule_atom_index_1_dist_z_min_div,molecule_type_x_1_mean,molecule_type_x_1_mean_diff,molecule_type_x_1_mean_div,molecule_type_x_1_max,molecule_type_x_1_max_diff,molecule_type_x_1_max_div,molecule_type_x_1_std,molecule_type_x_1_std_diff,molecule_type_x_1_std_div,molecule_type_x_1_min,molecule_type_x_1_min_diff,molecule_type_x_1_min_div,molecule_type_y_1_mean,molecule_type_y_1_mean_diff,molecule_type_y_1_mean_div,molecule_type_y_1_max,molecule_type_y_1_max_diff,molecule_type_y_1_max_div,molecule_type_y_1_std,molecule_type_y_1_std_diff,molecule_type_y_1_std_div,molecule_type_y_1_min,molecule_type_y_1_min_diff,molecule_type_y_1_min_div,molecule_type_z_1_mean,molecule_type_z_1_mean_diff,molecule_type_z_1_mean_div,molecule_type_z_1_max,molecule_type_z_1_max_diff,molecule_type_z_1_max_div,molecule_type_z_1_std,molecule_type_z_1_std_diff,molecule_type_z_1_std_div,molecule_type_z_1_min,molecule_type_z_1_min_diff,molecule_type_z_1_min_div,molecule_type_dist_mean,molecule_type_dist_mean_diff,molecule_type_dist_mean_div,molecule_type_dist_max,molecule_type_dist_max_diff,molecule_type_dist_max_div,molecule_type_dist_std,molecule_type_dist_std_diff,molecule_type_dist_std_div,molecule_type_dist_min,molecule_type_dist_min_diff,molecule_type_dist_min_div,molecule_type_dist_x_mean,molecule_type_dist_x_mean_diff,molecule_type_dist_x_mean_div,molecule_type_dist_x_max,molecule_type_dist_x_max_diff,molecule_type_dist_x_max_div,molecule_type_dist_x_std,molecule_type_dist_x_std_diff,molecule_type_dist_x_std_div,molecule_type_dist_x_min,molecule_type_dist_x_min_diff,molecule_type_dist_x_min_div,molecule_type_dist_y_mean,molecule_type_dist_y_mean_diff,molecule_type_dist_y_mean_div,molecule_type_dist_y_max,molecule_type_dist_y_max_diff,molecule_type_dist_y_max_div,molecule_type_dist_y_std,molecule_type_dist_y_std_diff,molecule_type_dist_y_std_div,molecule_type_dist_y_min,molecule_type_dist_y_min_diff,molecule_type_dist_y_min_div,molecule_type_dist_z_mean,molecule_type_dist_z_mean_diff,molecule_type_dist_z_mean_div,molecule_type_dist_z_max,molecule_type_dist_z_max_diff,molecule_type_dist_z_max_div,molecule_type_dist_z_std,molecule_type_dist_z_std_diff,molecule_type_dist_z_std_div,molecule_type_dist_z_min,molecule_type_dist_z_min_diff,molecule_type_dist_z_min_div,molecule_atom_1_x_1_mean,molecule_atom_1_x_1_mean_diff,molecule_atom_1_x_1_mean_div,molecule_atom_1_x_1_max,molecule_atom_1_x_1_max_diff,molecule_atom_1_x_1_max_div,molecule_atom_1_x_1_std,molecule_atom_1_x_1_std_diff,molecule_atom_1_x_1_std_div,molecule_atom_1_x_1_min,molecule_atom_1_x_1_min_diff,molecule_atom_1_x_1_min_div,molecule_atom_1_y_1_mean,molecule_atom_1_y_1_mean_diff,molecule_atom_1_y_1_mean_div,molecule_atom_1_y_1_max,molecule_atom_1_y_1_max_diff,molecule_atom_1_y_1_max_div,molecule_atom_1_y_1_std,molecule_atom_1_y_1_std_diff,molecule_atom_1_y_1_std_div,molecule_atom_1_y_1_min,molecule_atom_1_y_1_min_diff,molecule_atom_1_y_1_min_div,molecule_atom_1_z_1_mean,molecule_atom_1_z_1_mean_diff,molecule_atom_1_z_1_mean_div,molecule_atom_1_z_1_max,molecule_atom_1_z_1_max_diff,molecule_atom_1_z_1_max_div,molecule_atom_1_z_1_std,molecule_atom_1_z_1_std_diff,molecule_atom_1_z_1_std_div,molecule_atom_1_z_1_min,molecule_atom_1_z_1_min_diff,molecule_atom_1_z_1_min_div,molecule_atom_1_dist_mean,molecule_atom_1_dist_mean_diff,molecule_atom_1_dist_mean_div,molecule_atom_1_dist_max,molecule_atom_1_dist_max_diff,molecule_atom_1_dist_max_div,molecule_atom_1_dist_std,molecule_atom_1_dist_std_diff,molecule_atom_1_dist_std_div,molecule_atom_1_dist_min,molecule_atom_1_dist_min_diff,molecule_atom_1_dist_min_div,molecule_atom_1_dist_x_mean,molecule_atom_1_dist_x_mean_diff,molecule_atom_1_dist_x_mean_div,molecule_atom_1_dist_x_max,molecule_atom_1_dist_x_max_diff,molecule_atom_1_dist_x_max_div,molecule_atom_1_dist_x_std,molecule_atom_1_dist_x_std_diff,molecule_atom_1_dist_x_std_div,molecule_atom_1_dist_x_min,molecule_atom_1_dist_x_min_diff,molecule_atom_1_dist_x_min_div,molecule_atom_1_dist_y_mean,molecule_atom_1_dist_y_mean_diff,molecule_atom_1_dist_y_mean_div,molecule_atom_1_dist_y_max,molecule_atom_1_dist_y_max_diff,molecule_atom_1_dist_y_max_div,molecule_atom_1_dist_y_std,molecule_atom_1_dist_y_std_diff,molecule_atom_1_dist_y_std_div,molecule_atom_1_dist_y_min,molecule_atom_1_dist_y_min_diff,molecule_atom_1_dist_y_min_div,molecule_atom_1_dist_z_mean,molecule_atom_1_dist_z_mean_diff,molecule_atom_1_dist_z_mean_div,molecule_atom_1_dist_z_max,molecule_atom_1_dist_z_max_diff,molecule_atom_1_dist_z_max_div,molecule_atom_1_dist_z_std,molecule_atom_1_dist_z_std_diff,molecule_atom_1_dist_z_std_div,molecule_atom_1_dist_z_min,molecule_atom_1_dist_z_min_diff,molecule_atom_1_dist_z_min_div,molecule_type_0_x_1_mean,molecule_type_0_x_1_mean_diff,molecule_type_0_x_1_mean_div,molecule_type_0_x_1_max,molecule_type_0_x_1_max_diff,molecule_type_0_x_1_max_div,molecule_type_0_x_1_std,molecule_type_0_x_1_std_diff,molecule_type_0_x_1_std_div,molecule_type_0_x_1_min,molecule_type_0_x_1_min_diff,molecule_type_0_x_1_min_div,molecule_type_0_y_1_mean,molecule_type_0_y_1_mean_diff,molecule_type_0_y_1_mean_div,molecule_type_0_y_1_max,molecule_type_0_y_1_max_diff,molecule_type_0_y_1_max_div,molecule_type_0_y_1_std,molecule_type_0_y_1_std_diff,molecule_type_0_y_1_std_div,molecule_type_0_y_1_min,molecule_type_0_y_1_min_diff,molecule_type_0_y_1_min_div,molecule_type_0_z_1_mean,molecule_type_0_z_1_mean_diff,molecule_type_0_z_1_mean_div,molecule_type_0_z_1_max,molecule_type_0_z_1_max_diff,molecule_type_0_z_1_max_div,molecule_type_0_z_1_std,molecule_type_0_z_1_std_diff,molecule_type_0_z_1_std_div,molecule_type_0_z_1_min,molecule_type_0_z_1_min_diff,molecule_type_0_z_1_min_div,molecule_type_0_dist_mean,molecule_type_0_dist_mean_diff,molecule_type_0_dist_mean_div,molecule_type_0_dist_max,molecule_type_0_dist_max_diff,molecule_type_0_dist_max_div,molecule_type_0_dist_std,molecule_type_0_dist_std_diff,molecule_type_0_dist_std_div,molecule_type_0_dist_min,molecule_type_0_dist_min_diff,molecule_type_0_dist_min_div,molecule_type_0_dist_x_mean,molecule_type_0_dist_x_mean_diff,molecule_type_0_dist_x_mean_div,molecule_type_0_dist_x_max,molecule_type_0_dist_x_max_diff,molecule_type_0_dist_x_max_div,molecule_type_0_dist_x_std,molecule_type_0_dist_x_std_diff,molecule_type_0_dist_x_std_div,molecule_type_0_dist_x_min,molecule_type_0_dist_x_min_diff,molecule_type_0_dist_x_min_div,molecule_type_0_dist_y_mean,molecule_type_0_dist_y_mean_diff,molecule_type_0_dist_y_mean_div,molecule_type_0_dist_y_max,molecule_type_0_dist_y_max_diff,molecule_type_0_dist_y_max_div,molecule_type_0_dist_y_std,molecule_type_0_dist_y_std_diff,molecule_type_0_dist_y_std_div,molecule_type_0_dist_y_min,molecule_type_0_dist_y_min_diff,molecule_type_0_dist_y_min_div,molecule_type_0_dist_z_mean,molecule_type_0_dist_z_mean_diff,molecule_type_0_dist_z_mean_div,molecule_type_0_dist_z_max,molecule_type_0_dist_z_max_diff,molecule_type_0_dist_z_max_div,molecule_type_0_dist_z_std,molecule_type_0_dist_z_std_diff,molecule_type_0_dist_z_std_div,molecule_type_0_dist_z_min,molecule_type_0_dist_z_min_diff,molecule_type_0_dist_z_min_div
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1,Unnamed: 305_level_1,Unnamed: 306_level_1,Unnamed: 307_level_1,Unnamed: 308_level_1,Unnamed: 309_level_1,Unnamed: 310_level_1,Unnamed: 311_level_1,Unnamed: 312_level_1,Unnamed: 313_level_1,Unnamed: 314_level_1,Unnamed: 315_level_1,Unnamed: 316_level_1,Unnamed: 317_level_1,Unnamed: 318_level_1,Unnamed: 319_level_1,Unnamed: 320_level_1,Unnamed: 321_level_1,Unnamed: 322_level_1,Unnamed: 323_level_1,Unnamed: 324_level_1,Unnamed: 325_level_1,Unnamed: 326_level_1,Unnamed: 327_level_1,Unnamed: 328_level_1,Unnamed: 329_level_1,Unnamed: 330_level_1,Unnamed: 331_level_1,Unnamed: 332_level_1,Unnamed: 333_level_1,Unnamed: 334_level_1,Unnamed: 335_level_1,Unnamed: 336_level_1,Unnamed: 337_level_1,Unnamed: 338_level_1,Unnamed: 339_level_1,Unnamed: 340_level_1,Unnamed: 341_level_1,Unnamed: 342_level_1,Unnamed: 343_level_1,Unnamed: 344_level_1,Unnamed: 345_level_1,Unnamed: 346_level_1,Unnamed: 347_level_1,Unnamed: 348_level_1,Unnamed: 349_level_1,Unnamed: 350_level_1,Unnamed: 351_level_1,Unnamed: 352_level_1,Unnamed: 353_level_1,Unnamed: 354_level_1,Unnamed: 355_level_1,Unnamed: 356_level_1,Unnamed: 357_level_1,Unnamed: 358_level_1,Unnamed: 359_level_1,Unnamed: 360_level_1,Unnamed: 361_level_1,Unnamed: 362_level_1,Unnamed: 363_level_1,Unnamed: 364_level_1,Unnamed: 365_level_1,Unnamed: 366_level_1,Unnamed: 367_level_1,Unnamed: 368_level_1,Unnamed: 369_level_1,Unnamed: 370_level_1,Unnamed: 371_level_1,Unnamed: 372_level_1,Unnamed: 373_level_1,Unnamed: 374_level_1,Unnamed: 375_level_1,Unnamed: 376_level_1,Unnamed: 377_level_1,Unnamed: 378_level_1,Unnamed: 379_level_1,Unnamed: 380_level_1,Unnamed: 381_level_1,Unnamed: 382_level_1,Unnamed: 383_level_1,Unnamed: 384_level_1,Unnamed: 385_level_1,Unnamed: 386_level_1,Unnamed: 387_level_1,Unnamed: 388_level_1,Unnamed: 389_level_1,Unnamed: 390_level_1,Unnamed: 391_level_1,Unnamed: 392_level_1,Unnamed: 393_level_1,Unnamed: 394_level_1,Unnamed: 395_level_1,Unnamed: 396_level_1,Unnamed: 397_level_1,Unnamed: 398_level_1,Unnamed: 399_level_1,Unnamed: 400_level_1,Unnamed: 401_level_1,Unnamed: 402_level_1,Unnamed: 403_level_1,Unnamed: 404_level_1,Unnamed: 405_level_1,Unnamed: 406_level_1,Unnamed: 407_level_1,Unnamed: 408_level_1,Unnamed: 409_level_1,Unnamed: 410_level_1,Unnamed: 411_level_1,Unnamed: 412_level_1,Unnamed: 413_level_1,Unnamed: 414_level_1,Unnamed: 415_level_1,Unnamed: 416_level_1,Unnamed: 417_level_1,Unnamed: 418_level_1,Unnamed: 419_level_1,Unnamed: 420_level_1,Unnamed: 421_level_1,Unnamed: 422_level_1,Unnamed: 423_level_1,Unnamed: 424_level_1,Unnamed: 425_level_1,Unnamed: 426_level_1,Unnamed: 427_level_1,Unnamed: 428_level_1,Unnamed: 429_level_1,Unnamed: 430_level_1,Unnamed: 431_level_1,Unnamed: 432_level_1,Unnamed: 433_level_1,Unnamed: 434_level_1,Unnamed: 435_level_1,Unnamed: 436_level_1,Unnamed: 437_level_1,Unnamed: 438_level_1,Unnamed: 439_level_1,Unnamed: 440_level_1,Unnamed: 441_level_1,Unnamed: 442_level_1,Unnamed: 443_level_1,Unnamed: 444_level_1,Unnamed: 445_level_1,Unnamed: 446_level_1,Unnamed: 447_level_1,Unnamed: 448_level_1
0,1,0,0,0,0.00215,-0.00603,0.00198,0,-0.0127,1.0858,0.008,1.09195,0.00022,1.1921,4e-05,0,10,1.50667,1.09195,1.78316,0.35693,4,4,10,10,10,10,10,-0.0164,-0.0037,1.29145,1.01173,1.02443,-79.67554,0.72791,0.74061,-57.32394,-0.54082,-0.52812,42.59012,1.35875,0.27295,1.25138,1.46375,0.37795,1.34808,0.18228,-0.90353,0.16787,1.0858,0.0,1.0,0.00951,0.00151,1.18833,0.9064,0.8984,113.28556,0.72796,0.71996,90.98326,-0.87664,-0.88464,-109.56683,1.61034,0.51839,1.47474,1.78316,0.6912,1.633,0.34559,-0.74636,0.31649,1.09195,0.0,1.0,0.39773,0.39751,1803.93513,1.01925,1.01903,4622.89087,0.43572,0.4355,1976.24497,0.00022,0.0,1.0,1.88756,0.69545,1.58338,2.16026,0.96816,1.81214,0.46467,-0.72743,0.38979,1.1921,0.0,1.0,0.3975,0.39746,10950.60642,0.81798,0.81794,22534.36389,0.45935,0.45932,12654.62979,0.0,-3e-05,0.07957,-0.0127,0.0,1.0,-0.0127,0.0,1.0,0.0,0.0127,-0.0,-0.0127,0.0,1.0,1.0858,0.0,1.0,1.0858,0.0,1.0,0.0,-1.0858,0.0,1.0858,0.0,1.0,0.008,0.0,1.0,0.008,0.0,1.0,0.0,-0.008,0.0,0.008,0.0,1.0,1.09195,-0.0,1.0,1.09195,0.0,1.0,0.0,-1.09195,0.0,1.09195,-1e-05,0.99999,0.39746,0.39723,1802.68665,1.04945,1.04923,4759.87456,0.45296,0.45274,2054.41294,0.00022,0.0,1.0,0.39745,-0.79466,0.3334,1.1921,0.0,1.0,0.52983,-0.66228,0.44445,0.12399,-1.06811,0.10401,0.39745,0.39742,10949.35623,0.80712,0.80708,22235.13488,0.45899,0.45896,12644.70861,4e-05,0.0,1.0,-0.0127,0.0,1.0,-0.0127,0.0,1.0,0.0,0.0127,-0.0,-0.0127,0.0,1.0,1.0858,0.0,1.0,1.0858,0.0,1.0,0.0,-1.0858,0.0,1.0858,0.0,1.0,0.008,0.0,1.0,0.008,0.0,1.0,0.0,-0.008,0.0,0.008,0.0,1.0,1.09195,-0.0,1.0,1.09195,0.0,1.0,0.0,-1.09195,0.0,1.09195,-1e-05,0.99999,0.39746,0.39723,1802.68665,1.04945,1.04923,4759.87456,0.45296,0.45274,2054.41294,0.00022,0.0,1.0,0.39745,-0.79466,0.3334,1.1921,0.0,1.0,0.52983,-0.66228,0.44445,0.12399,-1.06811,0.10401,0.39745,0.39742,10949.35623,0.80712,0.80708,22235.13488,0.45899,0.45896,12644.70861,4e-05,0.0,1.0,-0.0127,0.0,1.0,-0.0127,0.0,1.0,0.0,0.0127,-0.0,-0.0127,0.0,1.0,1.0858,0.0,1.0,1.0858,0.0,1.0,0.0,-1.0858,0.0,1.0858,0.0,1.0,0.008,0.0,1.0,0.008,0.0,1.0,0.0,-0.008,0.0,0.008,0.0,1.0,1.09195,-0.0,1.0,1.09195,0.0,1.0,0.0,-1.09195,0.0,1.09195,-1e-05,0.99999,0.39746,0.39723,1802.68665,1.04945,1.04923,4759.87456,0.45296,0.45274,2054.41294,0.00022,0.0,1.0,0.39745,-0.79466,0.3334,1.1921,0.0,1.0,0.52983,-0.66228,0.44445,0.12399,-1.06811,0.10401,0.39745,0.39742,10949.35623,0.80712,0.80708,22235.13488,0.45899,0.45896,12644.70861,4e-05,0.0,1.0,-0.0127,0.0,1.0,-0.0127,0.0,1.0,0.0,0.0127,-0.0,-0.0127,0.0,1.0,1.0858,0.0,1.0,1.0858,0.0,1.0,0.0,-1.0858,0.0,1.0858,0.0,1.0,0.008,0.0,1.0,0.008,0.0,1.0,0.0,-0.008,0.0,0.008,0.0,1.0,1.09195,-0.0,1.0,1.09195,0.0,1.0,0.0,-1.09195,0.0,1.09195,-1e-05,0.99999,0.39746,0.39723,1802.68665,1.04945,1.04923,4759.87456,0.45296,0.45274,2054.41294,0.00022,0.0,1.0,0.39745,-0.79466,0.3334,1.1921,0.0,1.0,0.52983,-0.66228,0.44445,0.12399,-1.06811,0.10401,0.39745,0.39742,10949.35623,0.80712,0.80708,22235.13488,0.45899,0.45896,12644.70861,4e-05,0.0,1.0
1,1,2,3,0,0.00215,-0.00603,0.00198,1,1.01173,1.46375,0.00028,1.78312,1.01925,2.16026,0.0,1,10,1.50667,1.09195,1.78316,0.35693,4,1,10,10,10,10,10,-0.0164,-1.02813,-0.01621,1.01173,0.0,1.0,0.72791,-0.28382,0.71947,-0.54082,-1.55255,-0.53454,1.35875,-0.105,0.92827,1.46375,0.0,1.0,0.18228,-1.28147,0.12453,1.0858,-0.37795,0.7418,0.00951,0.00923,34.37691,0.9064,0.90612,3277.22299,0.72796,0.72768,2632.04267,-0.87664,-0.87692,-3169.64422,1.61034,-0.17278,0.9031,1.78316,4e-05,1.00002,0.34559,-1.43753,0.19381,1.09195,-0.69117,0.61238,0.39773,-0.62152,0.39022,1.01925,0.0,1.0,0.43572,-0.58353,0.42749,0.00022,-1.01903,0.00022,1.88756,-0.2727,0.87376,2.16026,0.0,1.0,0.46467,-1.69559,0.2151,1.1921,-0.96816,0.55183,0.3975,0.39749,137615.91638,0.81798,0.81797,283188.62154,0.45935,0.45935,159030.32296,0.0,0.0,1.0,1.01173,0.0,1.0,1.01173,0.0,1.0,,,,1.01173,0.0,1.0,1.46375,0.0,1.0,1.46375,0.0,1.0,,,,1.46375,0.0,1.0,0.00028,0.0,1.0,0.00028,0.0,1.0,,,,0.00028,0.0,1.0,1.78312,0.0,1.0,1.78312,0.0,1.0,,,,1.78312,0.0,1.0,1.01925,0.0,1.0,1.01925,0.0,1.0,,,,1.01925,0.0,1.0,2.16026,0.0,1.0,2.16026,0.0,1.0,,,,2.16026,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,,,,0.0,0.0,1.0,-0.27356,-1.28529,-0.27038,1.01173,0.0,1.0,0.62971,-0.38202,0.62241,-0.54082,-1.55255,-0.53454,1.44543,-0.01832,0.98749,1.46375,0.0,1.0,0.01013,-1.45362,0.00692,1.43793,-0.02582,0.98236,0.16103,0.16075,582.23009,0.9064,0.90612,3277.22299,0.87705,0.87677,3171.11685,-0.87664,-0.87692,-3169.64422,1.78315,3e-05,1.00001,1.78316,4e-05,1.00002,1e-05,-1.78311,1e-05,1.78312,0.0,1.0,1.05988,0.04063,1.03986,2.4104,1.39115,2.36487,1.07999,0.06074,1.05959,0.00029,-1.01896,0.00028,1.05986,-1.1004,0.49062,2.16026,0.0,1.0,1.16089,-0.99937,0.53739,9e-05,-2.16017,4e-05,1.05987,1.05987,366933.88076,3.17924,3.17923,1100669.72479,1.0861,1.0861,376014.20878,0.0,0.0,1.0,-0.27356,-1.28529,-0.27038,1.01173,0.0,1.0,0.62971,-0.38202,0.62241,-0.54082,-1.55255,-0.53454,1.44543,-0.01832,0.98749,1.46375,0.0,1.0,0.01013,-1.45362,0.00692,1.43793,-0.02582,0.98236,0.16103,0.16075,582.23009,0.9064,0.90612,3277.22299,0.87705,0.87677,3171.11685,-0.87664,-0.87692,-3169.64422,1.78315,3e-05,1.00001,1.78316,4e-05,1.00002,1e-05,-1.78311,1e-05,1.78312,0.0,1.0,1.05988,0.04063,1.03986,2.4104,1.39115,2.36487,1.07999,0.06074,1.05959,0.00029,-1.01896,0.00028,1.05986,-1.1004,0.49062,2.16026,0.0,1.0,1.16089,-0.99937,0.53739,9e-05,-2.16017,4e-05,1.05987,1.05987,366933.88076,3.17924,3.17923,1100669.72479,1.0861,1.0861,376014.20878,0.0,0.0,1.0,-0.27356,-1.28529,-0.27038,1.01173,0.0,1.0,0.62971,-0.38202,0.62241,-0.54082,-1.55255,-0.53454,1.44543,-0.01832,0.98749,1.46375,0.0,1.0,0.01013,-1.45362,0.00692,1.43793,-0.02582,0.98236,0.16103,0.16075,582.23009,0.9064,0.90612,3277.22299,0.87705,0.87677,3171.11685,-0.87664,-0.87692,-3169.64422,1.78315,3e-05,1.00001,1.78316,4e-05,1.00002,1e-05,-1.78311,1e-05,1.78312,0.0,1.0,1.05988,0.04063,1.03986,2.4104,1.39115,2.36487,1.07999,0.06074,1.05959,0.00029,-1.01896,0.00028,1.05986,-1.1004,0.49062,2.16026,0.0,1.0,1.16089,-0.99937,0.53739,9e-05,-2.16017,4e-05,1.05987,1.05987,366933.88076,3.17924,3.17923,1100669.72479,1.0861,1.0861,376014.20878,0.0,0.0,1.0
2,1,3,3,0,0.00215,-0.00603,0.00198,1,-0.54082,1.44753,-0.87664,1.78315,0.29481,2.11283,0.77197,1,10,1.50667,1.09195,1.78316,0.35693,4,2,10,10,10,10,10,-0.0164,0.52442,0.03032,1.01173,1.55255,-1.87075,0.72791,1.26872,-1.34594,-0.54082,0.0,1.0,1.35875,-0.08877,0.93867,1.46375,0.01622,1.01121,0.18228,-1.26525,0.12592,1.0858,-0.36172,0.75011,0.00951,0.88615,-0.01085,0.9064,1.78304,-1.03394,0.72796,1.6046,-0.83039,-0.87664,0.0,1.0,1.61034,-0.1728,0.90309,1.78316,1e-05,1.00001,0.34559,-1.43755,0.19381,1.09195,-0.69119,0.61237,0.39773,0.10292,1.3491,1.01925,0.72444,3.4573,0.43572,0.14091,1.47797,0.00022,-0.29459,0.00075,1.88756,-0.22527,0.89338,2.16026,0.04743,1.02245,0.46467,-1.64816,0.21993,1.1921,-0.92073,0.56422,0.3975,-0.37448,0.51491,0.81798,0.046,1.05959,0.45935,-0.31262,0.59504,0.0,-0.77197,0.0,-0.54082,0.0,1.0,-0.54082,0.0,1.0,0.0,0.54082,-0.0,-0.54082,0.0,1.0,1.44753,0.0,1.0,1.44753,0.0,1.0,0.0,-1.44753,0.0,1.44753,0.0,1.0,-0.87664,0.0,1.0,-0.87664,0.0,1.0,0.0,0.87664,-0.0,-0.87664,0.0,1.0,1.78315,1e-05,1.0,1.78316,1e-05,1.00001,1e-05,-1.78314,0.0,1.78315,0.0,1.0,1.35261,1.05779,4.58803,2.4104,2.11559,8.17607,1.49595,1.20113,5.07425,0.29481,0.0,1.0,1.05655,-1.05628,0.50006,2.11283,0.0,1.0,1.49381,-0.61902,0.70702,0.00026,-2.11257,0.00012,0.77048,-0.00149,0.99807,0.77197,0.0,1.0,0.00211,-0.76986,0.00273,0.76899,-0.00298,0.99614,-0.27356,0.26726,0.50582,1.01173,1.55255,-1.87075,0.62971,1.17053,-1.16438,-0.54082,0.0,1.0,1.44543,-0.00209,0.99855,1.46375,0.01622,1.01121,0.01013,-1.4374,0.007,1.43793,-0.00959,0.99337,0.16103,1.03767,-0.18369,0.9064,1.78304,-1.03394,0.87705,1.75369,-1.00046,-0.87664,0.0,1.0,1.78315,-0.0,1.0,1.78316,1e-05,1.00001,1e-05,-1.78313,1e-05,1.78312,-3e-05,0.99998,1.05988,0.76507,3.59511,2.4104,2.11559,8.17607,1.07999,0.78518,3.66334,0.00029,-0.29452,0.00098,1.05986,-1.05297,0.50163,2.16026,0.04743,1.02245,1.16089,-0.95194,0.54945,9e-05,-2.11274,4e-05,1.05987,0.2879,1.37294,3.17924,2.40726,4.11833,1.0861,0.31413,1.40692,0.0,-0.77197,0.0,-0.27356,0.26726,0.50582,1.01173,1.55255,-1.87075,0.62971,1.17053,-1.16438,-0.54082,0.0,1.0,1.44543,-0.00209,0.99855,1.46375,0.01622,1.01121,0.01013,-1.4374,0.007,1.43793,-0.00959,0.99337,0.16103,1.03767,-0.18369,0.9064,1.78304,-1.03394,0.87705,1.75369,-1.00046,-0.87664,0.0,1.0,1.78315,-0.0,1.0,1.78316,1e-05,1.00001,1e-05,-1.78313,1e-05,1.78312,-3e-05,0.99998,1.05988,0.76507,3.59511,2.4104,2.11559,8.17607,1.07999,0.78518,3.66334,0.00029,-0.29452,0.00098,1.05986,-1.05297,0.50163,2.16026,0.04743,1.02245,1.16089,-0.95194,0.54945,9e-05,-2.11274,4e-05,1.05987,0.2879,1.37294,3.17924,2.40726,4.11833,1.0861,0.31413,1.40692,0.0,-0.77197,0.0,-0.27356,0.26726,0.50582,1.01173,1.55255,-1.87075,0.62971,1.17053,-1.16438,-0.54082,0.0,1.0,1.44543,-0.00209,0.99855,1.46375,0.01622,1.01121,0.01013,-1.4374,0.007,1.43793,-0.00959,0.99337,0.16103,1.03767,-0.18369,0.9064,1.78304,-1.03394,0.87705,1.75369,-1.00046,-0.87664,0.0,1.0,1.78315,-0.0,1.0,1.78316,1e-05,1.00001,1e-05,-1.78313,1e-05,1.78312,-3e-05,0.99998,1.05988,0.76507,3.59511,2.4104,2.11559,8.17607,1.07999,0.78518,3.66334,0.00029,-0.29452,0.00098,1.05986,-1.05297,0.50163,2.16026,0.04743,1.02245,1.16089,-0.95194,0.54945,9e-05,-2.11274,4e-05,1.05987,0.2879,1.37294,3.17924,2.40726,4.11833,1.0861,0.31413,1.40692,0.0,-0.77197,0.0
3,1,4,3,0,0.00215,-0.00603,0.00198,1,-0.52381,1.43793,0.9064,1.78316,0.27664,2.08503,0.81798,1,10,1.50667,1.09195,1.78316,0.35693,4,3,10,10,10,10,10,-0.0164,0.50741,0.03131,1.01173,1.53554,-1.93147,0.72791,1.25172,-1.38963,-0.54082,-0.017,1.03246,1.35875,-0.07918,0.94494,1.46375,0.02582,1.01796,0.18228,-1.25565,0.12676,1.0858,-0.35213,0.75511,0.00951,-0.89689,0.01049,0.9064,0.0,1.0,0.72796,-0.17844,0.80313,-0.87664,-1.78304,-0.96717,1.61034,-0.17281,0.90309,1.78316,0.0,1.0,0.34559,-1.43756,0.19381,1.09195,-0.6912,0.61237,0.39773,0.12109,1.43773,1.01925,0.74261,3.68443,0.43572,0.15908,1.57506,0.00022,-0.27642,0.0008,1.88756,-0.19747,0.90529,2.16026,0.07523,1.03608,0.46467,-1.62036,0.22286,1.1921,-0.89293,0.57174,0.3975,-0.42048,0.48595,0.81798,0.0,1.0,0.45935,-0.35863,0.56157,0.0,-0.81797,0.0,-0.52381,0.0,1.0,-0.52381,0.0,1.0,0.0,0.52381,-0.0,-0.52381,0.0,1.0,1.43793,0.0,1.0,1.43793,0.0,1.0,0.0,-1.43793,0.0,1.43793,0.0,1.0,0.9064,0.0,1.0,0.9064,0.0,1.0,0.0,-0.9064,0.0,0.9064,0.0,1.0,1.78315,-1e-05,1.0,1.78316,0.0,1.0,0.0,-1.78315,0.0,1.78315,-1e-05,1.0,0.87827,0.60164,3.17481,2.3579,2.08126,8.5234,1.28882,1.01218,4.65886,0.00029,-0.27635,0.00104,0.69526,-1.38977,0.33345,2.08503,0.0,1.0,1.20357,-0.88146,0.57725,9e-05,-2.08494,4e-05,1.60609,0.78811,1.96349,3.17924,2.36126,3.8867,1.36239,0.54441,1.66555,0.81798,0.0,1.0,-0.27356,0.25026,0.52224,1.01173,1.53554,-1.93147,0.62971,1.15353,-1.20217,-0.54082,-0.017,1.03246,1.44543,0.0075,1.00522,1.46375,0.02582,1.01796,0.01013,-1.4278,0.00704,1.43793,0.0,1.0,0.16103,-0.74537,0.17766,0.9064,0.0,1.0,0.87705,-0.02935,0.96762,-0.87664,-1.78304,-0.96717,1.78315,-1e-05,0.99999,1.78316,0.0,1.0,1e-05,-1.78314,1e-05,1.78312,-4e-05,0.99998,1.05988,0.78324,3.83129,2.4104,2.13376,8.71318,1.07999,0.80336,3.90399,0.00029,-0.27635,0.00104,1.05986,-1.02517,0.50832,2.16026,0.07523,1.03608,1.16089,-0.92414,0.55677,9e-05,-2.08494,4e-05,1.05987,0.24189,1.29572,3.17924,2.36126,3.8867,1.0861,0.26812,1.32779,0.0,-0.81797,0.0,-0.27356,0.25026,0.52224,1.01173,1.53554,-1.93147,0.62971,1.15353,-1.20217,-0.54082,-0.017,1.03246,1.44543,0.0075,1.00522,1.46375,0.02582,1.01796,0.01013,-1.4278,0.00704,1.43793,0.0,1.0,0.16103,-0.74537,0.17766,0.9064,0.0,1.0,0.87705,-0.02935,0.96762,-0.87664,-1.78304,-0.96717,1.78315,-1e-05,0.99999,1.78316,0.0,1.0,1e-05,-1.78314,1e-05,1.78312,-4e-05,0.99998,1.05988,0.78324,3.83129,2.4104,2.13376,8.71318,1.07999,0.80336,3.90399,0.00029,-0.27635,0.00104,1.05986,-1.02517,0.50832,2.16026,0.07523,1.03608,1.16089,-0.92414,0.55677,9e-05,-2.08494,4e-05,1.05987,0.24189,1.29572,3.17924,2.36126,3.8867,1.0861,0.26812,1.32779,0.0,-0.81797,0.0,-0.27356,0.25026,0.52224,1.01173,1.53554,-1.93147,0.62971,1.15353,-1.20217,-0.54082,-0.017,1.03246,1.44543,0.0075,1.00522,1.46375,0.02582,1.01796,0.01013,-1.4278,0.00704,1.43793,0.0,1.0,0.16103,-0.74537,0.17766,0.9064,0.0,1.0,0.87705,-0.02935,0.96762,-0.87664,-1.78304,-0.96717,1.78315,-1e-05,0.99999,1.78316,0.0,1.0,1e-05,-1.78314,1e-05,1.78312,-4e-05,0.99998,1.05988,0.78324,3.83129,2.4104,2.13376,8.71318,1.07999,0.80336,3.90399,0.00029,-0.27635,0.00104,1.05986,-1.02517,0.50832,2.16026,0.07523,1.03608,1.16089,-0.92414,0.55677,9e-05,-2.08494,4e-05,1.05987,0.24189,1.29572,3.17924,2.36126,3.8867,1.0861,0.26812,1.32779,0.0,-0.81797,0.0
4,2,0,0,0,1.01173,1.46375,0.00028,0,-0.0127,1.0858,0.008,1.09195,1.04945,0.14284,6e-05,0,10,1.50667,1.09195,1.78316,0.35693,3,4,10,10,10,10,10,-0.35911,-0.34641,28.28045,-0.0127,0.0,1.0,0.30012,0.31282,-23.63504,-0.54082,-0.52812,42.59012,1.32375,0.23795,1.21915,1.44753,0.36172,1.33314,0.20613,-0.87968,0.18984,1.0858,0.0,1.0,0.01258,0.00458,1.57291,0.9064,0.8984,113.28556,0.89153,0.88353,111.4273,-0.87664,-0.88464,-109.56683,1.55275,0.4608,1.422,1.78316,0.69121,1.633,0.39907,-0.69289,0.36546,1.09195,0.0,1.0,1.93925,0.8898,1.84786,2.4104,1.36094,2.29681,0.77103,-0.27842,0.7347,1.04945,0.0,1.0,0.04792,-0.09492,0.3355,0.14284,0.0,1.0,0.0822,-0.06064,0.57547,0.00026,-0.14258,0.00184,0.53003,0.52997,8883.25849,0.82105,0.821,13760.69124,0.45971,0.45965,7704.62508,6e-05,0.0,1.0,-0.0127,0.0,1.0,-0.0127,0.0,1.0,0.0,0.0127,-0.0,-0.0127,0.0,1.0,1.0858,0.0,1.0,1.0858,0.0,1.0,0.0,-1.0858,0.0,1.0858,0.0,1.0,0.008,0.0,1.0,0.008,0.0,1.0,0.0,-0.008,0.0,0.008,0.0,1.0,1.09195,-0.0,1.0,1.09195,0.0,1.0,0.0,-1.09195,0.0,1.09195,-1e-05,1.0,0.39746,-0.652,0.37873,1.04945,0.0,1.0,0.45296,-0.5965,0.43161,0.00022,-1.04923,0.00021,0.39745,0.2546,2.78238,1.1921,1.04926,8.3455,0.52983,0.38699,3.70915,0.12399,-0.01885,0.86804,0.39745,0.39739,6661.20582,0.80712,0.80706,13527.0793,0.45899,0.45893,7692.59899,4e-05,-2e-05,0.60837,-0.0127,0.0,1.0,-0.0127,0.0,1.0,0.0,0.0127,-0.0,-0.0127,0.0,1.0,1.0858,0.0,1.0,1.0858,0.0,1.0,0.0,-1.0858,0.0,1.0858,0.0,1.0,0.008,0.0,1.0,0.008,0.0,1.0,0.0,-0.008,0.0,0.008,0.0,1.0,1.09195,-0.0,1.0,1.09195,0.0,1.0,0.0,-1.09195,0.0,1.09195,-1e-05,1.0,0.39746,-0.652,0.37873,1.04945,0.0,1.0,0.45296,-0.5965,0.43161,0.00022,-1.04923,0.00021,0.39745,0.2546,2.78238,1.1921,1.04926,8.3455,0.52983,0.38699,3.70915,0.12399,-0.01885,0.86804,0.39745,0.39739,6661.20582,0.80712,0.80706,13527.0793,0.45899,0.45893,7692.59899,4e-05,-2e-05,0.60837,-0.0127,0.0,1.0,-0.0127,0.0,1.0,0.0,0.0127,-0.0,-0.0127,0.0,1.0,1.0858,0.0,1.0,1.0858,0.0,1.0,0.0,-1.0858,0.0,1.0858,0.0,1.0,0.008,0.0,1.0,0.008,0.0,1.0,0.0,-0.008,0.0,0.008,0.0,1.0,1.09195,-0.0,1.0,1.09195,0.0,1.0,0.0,-1.09195,0.0,1.09195,-1e-05,1.0,0.39746,-0.652,0.37873,1.04945,0.0,1.0,0.45296,-0.5965,0.43161,0.00022,-1.04923,0.00021,0.39745,0.2546,2.78238,1.1921,1.04926,8.3455,0.52983,0.38699,3.70915,0.12399,-0.01885,0.86804,0.39745,0.39739,6661.20582,0.80712,0.80706,13527.0793,0.45899,0.45893,7692.59899,4e-05,-2e-05,0.60837,-0.0127,0.0,1.0,-0.0127,0.0,1.0,0.0,0.0127,-0.0,-0.0127,0.0,1.0,1.0858,0.0,1.0,1.0858,0.0,1.0,0.0,-1.0858,0.0,1.0858,0.0,1.0,0.008,0.0,1.0,0.008,0.0,1.0,0.0,-0.008,0.0,0.008,0.0,1.0,1.09195,-0.0,1.0,1.09195,0.0,1.0,0.0,-1.09195,0.0,1.09195,-1e-05,1.0,0.39746,-0.652,0.37873,1.04945,0.0,1.0,0.45296,-0.5965,0.43161,0.00022,-1.04923,0.00021,0.39745,0.2546,2.78238,1.1921,1.04926,8.3455,0.52983,0.38699,3.70915,0.12399,-0.01885,0.86804,0.39745,0.39739,6661.20582,0.80712,0.80706,13527.0793,0.45899,0.45893,7692.59899,4e-05,-2e-05,0.60837


In [11]:
train_base = pd.read_csv('../input/train.csv')

In [12]:
train_base.head()

Unnamed: 0,id,molecule_name,atom_index_0,atom_index_1,type,scalar_coupling_constant
0,0,dsgdb9nsd_000001,1,0,1JHC,84.8076
1,1,dsgdb9nsd_000001,1,2,2JHH,-11.257
2,2,dsgdb9nsd_000001,1,3,2JHH,-11.2548
3,3,dsgdb9nsd_000001,1,4,2JHH,-11.2543
4,4,dsgdb9nsd_000001,2,0,1JHC,84.8074


In [None]:
oof_list = []
n_folds = 5
iterations = [29000, 7000, 20000]
train_idx = []
for mol_type, iteration in zip([1,2,3], iterations):
    print(f"mol_type: {mol_type}")
    train_ = get_train_data_for_predict(mol_type=mol_type)
    #bst_list = []
    train_idx.extend(train_.index.tolist())
    preds_oof = np.zeros((train_.shape[0], n_folds))
    for fold_ in tqdm(range(n_folds)):
        saved_model_path = f'../model/{DATA_VERSION}_{TRIAL_NO}/booster_{mol_type}_{fold_:02d}_{iteration}.model'
        bst = lgb.Booster(model_file=saved_model_path)
        #bst_list += [bst]
        preds_oof[:, fold_] = bst.predict(train_)

    oof = pd.DataFrame(index=train_.index)
    oof["scalar_coupling_constant"] = preds_oof.mean(axis=1)    
    oof_list.append(oof)
    
oof_all = pd.concat(oof_list, axis=0)    


mol_type: 1
mol_type: 1


HBox(children=(IntProgress(value=0, max=5), HTML(value='')))

In [None]:

def group_mean_log_mae(y_true, y_pred, types, floor=1e-9):
    """
    Fast metric computation for this competition: https://www.kaggle.com/c/champs-scalar-coupling
    Code is from this kernel: https://www.kaggle.com/uberkinder/efficient-metric
    """
    maes = (y_true-y_pred).abs().groupby(types).mean()
    return np.log(maes.map(lambda x: max(x, floor))).mean()

In [None]:
score = group_mean_log_mae(y, oof_all[train_idx], train["type"])

In [None]:
print(f"group_mean_log_mae: {score:.5f}")