In [4]:
import pandas as pd
import numpy as np
import os


def save_dataframe(path, dataframe):
    np.save(path + ".data", dataframe.values)
    np.save(path + ".header", dataframe.columns)


def load_dataframe(path):
    data = np.load(path + ".data.npy")
    header = np.load(path + ".header.npy")
    return pd.DataFrame(data=data, columns=header)


def save_dataframe32(path, dataframe, keep=[]):
    col64 = [col_ for col_ in dataframe.columns if col_ in keep]
    col32 = [col_ for col_ in dataframe.columns if col_ not in keep]
    dataframe64 = dataframe[col64]
    dataframe32 = dataframe[col32]
    np.save(path + ".data64", dataframe64.values)
    np.save(path + ".header64", col64)
    np.save(path + ".data32", dataframe32.values.astype(np.float32))
    np.save(path + ".header32", col32)


def load_dataframe32(path, nrows=None):
    path_data32 = path + ".data32.npy"
    path_header32 = path + ".header32.npy"
    path_data64 = path + ".data64.npy"
    path_header64 = path + ".header64.npy"
    result = pd.DataFrame()
    if os.path.exists(path_data32):
        data32 = np.load(path_data32)
        header32 = np.load(path_header32)
        df32 = pd.DataFrame(data=data32, columns=header32)
        result = pd.concat([result, df32], axis=1)
    if os.path.exists(path_data64):
        data64 = np.load(path_data64)
        header64 = np.load(path_header64)
        df64 = pd.DataFrame(data=data64, columns=header64)
        result = pd.concat([result, df64], axis=1)
    if nrows and nrows > 0:
        return result.head(nrows)
    return result


In [2]:
os.listdir("./neptune-features/")

['data_1000_1174_11.data32.npy',
 'data_1000_1174_11.data64.npy',
 'data_1000_1174_11.header32.npy',
 'data_1000_1174_11.header64.npy',
 'data_1000_1174_15.data32.npy',
 'data_1000_1174_15.data64.npy',
 'data_1000_1174_15.header32.npy',
 'data_1000_1174_15.header64.npy',
 'data_1000_1174_19.data32.npy',
 'data_1000_1174_19.data64.npy',
 'data_1000_1174_19.header32.npy',
 'data_1000_1174_19.header64.npy',
 'data_1000_1174_3.data32.npy',
 'data_1000_1174_3.data64.npy',
 'data_1000_1174_3.header32.npy',
 'data_1000_1174_3.header64.npy',
 'data_1000_1174_7.data32.npy',
 'data_1000_1174_7.data64.npy',
 'data_1000_1174_7.header32.npy',
 'data_1000_1174_7.header64.npy',
 'data_200_1174_1.data32.npy',
 'data_200_1174_1.data64.npy',
 'data_200_1174_1.header32.npy',
 'data_200_1174_1.header64.npy',
 'data_200_1174_10.data32.npy',
 'data_200_1174_10.data64.npy',
 'data_200_1174_10.header32.npy',
 'data_200_1174_10.header64.npy',
 'data_200_1174_13.data32.npy',
 'data_200_1174_13.data64.npy',
 'da

## data

In [13]:
data_validate1_fold0 = load_dataframe32("./neptune-features/data_200_1174_1")
print(data_validate1_fold0.shape)
data_validate1_fold0.head()

(200, 1174)


Unnamed: 0,annuity_income_percentage,car_to_birth_ratio,car_to_employ_ratio,children_ratio,credit_to_annuity_ratio,credit_to_goods_ratio,credit_to_income_ratio,days_employed_percentage,income_credit_percentage,income_per_child,...,NAME_HOUSING_TYPE,OCCUPATION_TYPE,ORGANIZATION_TYPE,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,WALLSMATERIAL_MODE,WEEKDAY_APPR_PROCESS_START,nan_count
0,0.15,,,0.0,20.0,1.0,3.0,0.13954,0.333333,135000.0,...,1.0,1.0,39.0,1.0,1.0,1.0,1.0,0.0,3.0,464.0
1,0.07,,,0.0,20.0,1.0,1.4,0.440456,0.714286,112500.0,...,1.0,1.0,26.0,1.0,1.0,1.0,1.0,1.0,6.0,788.0
2,0.243667,,,0.0,34.199726,1.0,8.333333,0.286616,0.12,135000.0,...,1.0,2.0,41.0,1.0,1.0,1.0,1.0,1.0,6.0,819.0
3,0.15,-0.000186,-0.001704,0.0,20.0,1.0,3.0,0.109203,0.333333,180000.0,...,1.0,4.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,765.0
4,0.238422,,,0.0,13.705378,1.105602,3.267667,0.077134,0.306029,202500.0,...,1.0,0.0,1.0,1.0,2.0,1.0,1.0,0.0,1.0,866.0


In [14]:
data_validate2_fold0 = load_dataframe32("./neptune-features/data_200_1174_2")
print(data_validate2_fold0.shape)
data_validate2_fold0.head()

(200, 1174)


Unnamed: 0,annuity_income_percentage,car_to_birth_ratio,car_to_employ_ratio,children_ratio,credit_to_annuity_ratio,credit_to_goods_ratio,credit_to_income_ratio,days_employed_percentage,income_credit_percentage,income_per_child,...,NAME_HOUSING_TYPE,OCCUPATION_TYPE,ORGANIZATION_TYPE,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,WALLSMATERIAL_MODE,WEEKDAY_APPR_PROCESS_START,nan_count
0,0.15,,,0.0,20.0,1.0,3.0,0.13954,0.333333,135000.0,...,1.0,1.0,39.0,1.0,1.0,1.0,1.0,0.0,3.0,464.0
1,0.07,,,0.0,20.0,1.0,1.4,0.440456,0.714286,112500.0,...,1.0,1.0,26.0,1.0,1.0,1.0,1.0,1.0,6.0,788.0
2,0.243667,,,0.0,34.199726,1.0,8.333333,0.286616,0.12,135000.0,...,1.0,2.0,41.0,1.0,1.0,1.0,1.0,1.0,6.0,819.0
3,0.15,-0.000186,-0.001704,0.0,20.0,1.0,3.0,0.109203,0.333333,180000.0,...,1.0,4.0,1.0,1.0,1.0,1.0,1.0,1.0,7.0,765.0
4,0.238422,,,0.0,13.705378,1.105602,3.267667,0.077134,0.306029,202500.0,...,1.0,0.0,1.0,1.0,2.0,1.0,1.0,0.0,1.0,866.0


## index

In [16]:
train_idx_fold0 = np.load("./neptune-features/train_idx_0.npy")
valid_idx_fold0 = np.load("./neptune-features/valid_idx_0.npy")

In [17]:
train_idx_fold0

array([  0,   1,   2,   3,   4,   5,   6,   7,   8,  10,  11,  12,  13,
        14,  15,  16,  17,  19,  20,  21,  22,  23,  24,  25,  26,  27,
        28,  29,  30,  31,  32,  33,  35,  36,  37,  38,  40,  41,  42,
        43,  45,  46,  47,  48,  49,  50,  51,  52,  54,  55,  56,  58,
        60,  61,  62,  63,  64,  65,  67,  68,  70,  72,  73,  74,  75,
        76,  77,  78,  79,  81,  82,  83,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 104,
       105, 106, 107, 108, 109, 111, 112, 113, 114, 115, 116, 117, 118,
       119, 121, 122, 123, 124, 125, 126, 127, 128, 130, 132, 133, 134,
       136, 137, 140, 141, 144, 145, 146, 147, 151, 152, 153, 154, 155,
       156, 157, 158, 159, 160, 161, 162, 164, 165, 166, 167, 168, 170,
       172, 174, 175, 177, 178, 179, 180, 181, 182, 184, 185, 187, 188,
       189, 190, 191, 192, 193, 195, 196, 198, 199, 200, 201, 202, 203,
       204, 205, 206, 207, 209, 211, 213, 214, 216, 217, 219, 22

In [18]:
valid_idx_fold0

array([  9,  18,  34,  39,  44,  53,  57,  59,  66,  69,  71,  80,  84,
       103, 110, 120, 129, 131, 135, 138, 139, 142, 143, 148, 149, 150,
       163, 169, 171, 173, 176, 183, 186, 194, 197, 208, 210, 212, 215,
       218, 224, 227, 235, 238, 240, 242, 245, 255, 258, 274, 278, 280,
       302, 304, 318, 322, 340, 347, 354, 355, 361, 371, 374, 376, 378,
       384, 406, 409, 411, 421, 432, 434, 435, 440, 463, 468, 469, 476,
       477, 482, 493, 498, 499, 505, 506, 514, 517, 520, 525, 533, 535,
       536, 542, 543, 544, 547, 550, 552, 553, 554, 557, 564, 566, 569,
       571, 592, 596, 597, 601, 611, 613, 620, 621, 623, 630, 631, 632,
       633, 634, 636, 646, 653, 654, 668, 671, 679, 682, 685, 690, 694,
       699, 704, 708, 709, 718, 725, 736, 739, 742, 743, 744, 746, 753,
       758, 774, 775, 777, 778, 779, 781, 785, 791, 792, 796, 799, 802,
       803, 804, 818, 836, 841, 845, 846, 852, 854, 856, 866, 871, 873,
       876, 882, 887, 891, 895, 899, 904, 912, 915, 918, 926, 93