好きな用途に利用できるnotebookです。  
データの可視化や簡単なモデルの構築などにご利用下さい。

## 必要なライブラリのimport

In [2]:
import warnings
import time
import sys
import datetime
import pickle
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import lightgbm as lgb
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error

warnings.simplefilter(action='ignore', category=FutureWarning)
pd.set_option('display.max_columns', 500)

In [3]:
# メモリ削減
def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

## データの読み込み

In [4]:
# windows
if os.name == 'nt':
    path = '../../../data/processed/'
    data_path = '../../../data/elo-merchant-category-recommendation/'
else:
    if 'KAGGLE_DATA_PROXY_TOKEN' in os.environ.keys():
        path = '/kaggle/input/elo-merchant-category-recommendaton/'

# FIXME:
PATHを修正すること！！！

In [5]:
# FIXME: PATH
train_path = os.path.join(path,'processed20240618_train.csv')
test_path = os.path.join(path,'processed20240618_test.csv')

new_transactions_path = os.path.join(data_path,'new_merchant_transactions.csv')
historical_transactions_path = os.path.join(data_path,'historical_transactions.csv')



In [6]:
df_train = pd.read_csv(train_path)
df_test = pd.read_csv(test_path)
new_transactions = pd.read_csv(new_transactions_path,
                               parse_dates=['purchase_date'])
historical_transactions = pd.read_csv(historical_transactions_path,
                                      parse_dates=['purchase_date'])

In [7]:
df_original_train = ['card_id','first_active_month','feature_1','feature_2','feature_3']
# 現時点(20240618,hoji_model)でfeature_importanceの高い値を確認する
df_importance_top10 = ['new_purchase_date_max', 'auth_purchase_date_max',
       'auth_month_diff_mean', 'hist_month_diff_mean',
       'new_purchase_amount_max', 'new_purchase_date_ptp',
       'new_purchase_month_mean', 'auth_purchase_date_ptp',
       'new_month_lag_mean', 'purchase_amount_max_mean']
# 何が役に立ってるんだこれ？
df_train[df_importance_top10].head(20)
# 日付の値めちゃくちゃ大きくなってるけどこれでいいのかなあっていうのと(これミリ秒単位らしい)
# month_diffはcurrent_timeじゃなくて基準日決めたほうがいいよねって思う（やったらほんのちょびっとスコア下がったかも）

Unnamed: 0,new_purchase_date_max,auth_purchase_date_max,auth_month_diff_mean,hist_month_diff_mean,new_purchase_amount_max,new_purchase_date_ptp,new_purchase_month_mean,auth_purchase_date_ptp,new_month_lag_mean,purchase_amount_max_mean
0,1525001000.0,1519551000.0,76.744939,76.846154,-0.2961,4742309.0,3.478261,20977987.0,1.478261,-0.158815
1,1522393000.0,1517438000.0,77.731563,77.545455,-0.7017,4887632.0,2.5,33717687.0,1.5,0.510664
2,1524937000.0,1519759000.0,76.731707,77.0,-0.7,0.0,4.0,35635623.0,2.0,-0.62109
3,1524049000.0,1519818000.0,76.636364,,-0.567,3625505.0,3.714286,13375339.0,1.714286,-0.097344
4,1524941000.0,1519850000.0,76.664062,76.4,0.451,4949682.0,3.555556,9405641.0,1.555556,2.259349
5,1505510000.0,1501343000.0,83.8125,83.0,-9e-05,2717302.0,8.75,17274145.0,1.75,-0.240106
6,1523037000.0,1519402000.0,76.884774,76.882353,-0.6885,2697311.0,3.2,35783408.0,1.2,-0.635468
7,1518986000.0,1513885000.0,78.473684,78.666667,-0.6973,1085016.0,2.0,7251596.0,2.0,-0.347811
8,1521581000.0,1512825000.0,77.0,77.0,-0.5806,284431.0,3.0,9751281.0,1.0,-0.515289
9,1523196000.0,1519837000.0,76.848485,76.928571,-0.666,621055.0,4.0,35859021.0,2.0,-0.555582


In [8]:
df_train[df_importance_top10].describe()
# 明日外れ値の処置でもしようかな

Unnamed: 0,new_purchase_date_max,auth_purchase_date_max,auth_month_diff_mean,hist_month_diff_mean,new_purchase_amount_max,new_purchase_date_ptp,new_purchase_month_mean,auth_purchase_date_ptp,new_month_lag_mean,purchase_amount_max_mean
count,179986.0,201917.0,201917.0,170958.0,179986.0,179986.0,179986.0,201917.0,179986.0,201917.0
mean,1520773000.0,1515760000.0,77.891003,77.862506,-0.131695,2543707.0,4.084927,20832360.0,1.476221,-0.115751
std,6347417.0,6509686.0,2.393546,2.342374,1.532163,1743585.0,2.280113,10237200.0,0.3308,1.059925
min,1488579000.0,1485935000.0,76.0,76.0,-0.747,0.0,1.0,152065.0,1.0,-0.745349
25%,1520338000.0,1514720000.0,76.701613,76.714286,-0.648,746530.5,3.0,11649080.0,1.230769,-0.568884
50%,1523342000.0,1518858000.0,76.807018,77.0,-0.513,2845612.0,3.5,19788350.0,1.5,-0.405554
75%,1524643000.0,1519662000.0,77.796875,77.888889,-0.1819,4083677.0,4.0,30944710.0,1.714286,-0.044238
max,1525133000.0,1519862000.0,89.0,89.0,76.7,5343723.0,12.0,36626920.0,2.0,72.958466


In [9]:
df_train[df_importance_top10].isnull().sum()
# なんで欠損してるのか考えたほうがいいなこれ(今度)ー＞多分newにデータはいってないやつがNULLになってるてことだと思うんだよな(newの欠損値がすべて同じのため)(histのNULLもhistにデータ入ってないやつなんじゃないかなあ)


new_purchase_date_max       21931
auth_purchase_date_max          0
auth_month_diff_mean            0
hist_month_diff_mean        30959
new_purchase_amount_max     21931
new_purchase_date_ptp       21931
new_purchase_month_mean     21931
auth_purchase_date_ptp          0
new_month_lag_mean          21931
purchase_amount_max_mean        0
dtype: int64

In [10]:
df_train.describe()

Unnamed: 0,feature_1,feature_2,feature_3,target,elapsed_time,hist_transactions_count,hist_category_1_sum,hist_category_1_mean,hist_category_2_1.0_mean,hist_category_2_2.0_mean,hist_category_2_3.0_mean,hist_category_2_4.0_mean,hist_category_2_5.0_mean,hist_category_3_A_mean,hist_category_3_B_mean,hist_category_3_C_mean,hist_merchant_id_nunique,hist_merchant_category_id_nunique,hist_state_id_nunique,hist_city_id_nunique,hist_subsector_id_nunique,hist_purchase_amount_sum,hist_purchase_amount_mean,hist_purchase_amount_max,hist_purchase_amount_min,hist_purchase_amount_std,hist_installments_sum,hist_installments_mean,hist_installments_max,hist_installments_min,hist_installments_std,hist_purchase_month_mean,hist_purchase_month_max,hist_purchase_month_min,hist_purchase_month_std,hist_purchase_date_ptp,hist_purchase_date_min,hist_purchase_date_max,hist_month_lag_mean,hist_month_lag_max,hist_month_lag_min,hist_month_lag_std,hist_month_diff_mean,auth_transactions_count,auth_category_1_sum,auth_category_1_mean,auth_category_2_1.0_mean,auth_category_2_2.0_mean,auth_category_2_3.0_mean,auth_category_2_4.0_mean,auth_category_2_5.0_mean,auth_category_3_A_mean,auth_category_3_B_mean,auth_category_3_C_mean,auth_merchant_id_nunique,auth_merchant_category_id_nunique,auth_state_id_nunique,auth_city_id_nunique,auth_subsector_id_nunique,auth_purchase_amount_sum,auth_purchase_amount_mean,auth_purchase_amount_max,auth_purchase_amount_min,auth_purchase_amount_std,auth_installments_sum,auth_installments_mean,auth_installments_max,auth_installments_min,auth_installments_std,auth_purchase_month_mean,auth_purchase_month_max,auth_purchase_month_min,auth_purchase_month_std,auth_purchase_date_ptp,auth_purchase_date_min,auth_purchase_date_max,auth_month_lag_mean,auth_month_lag_max,auth_month_lag_min,auth_month_lag_std,auth_month_diff_mean,new_transactions_count,new_category_1_sum,new_category_1_mean,new_category_2_1.0_mean,new_category_2_2.0_mean,new_category_2_3.0_mean,new_category_2_4.0_mean,new_category_2_5.0_mean,new_category_3_A_mean,new_category_3_B_mean,new_category_3_C_mean,new_merchant_id_nunique,new_merchant_category_id_nunique,new_state_id_nunique,new_city_id_nunique,new_subsector_id_nunique,new_purchase_amount_sum,new_purchase_amount_mean,new_purchase_amount_max,new_purchase_amount_min,new_purchase_amount_std,new_installments_sum,new_installments_mean,new_installments_max,new_installments_min,new_installments_std,new_purchase_month_mean,new_purchase_month_max,new_purchase_month_min,new_purchase_month_std,new_purchase_date_ptp,new_purchase_date_min,new_purchase_date_max,new_month_lag_mean,new_month_lag_max,new_month_lag_min,new_month_lag_std,new_month_diff_mean,month_lag_mean,month_lag_std,purchase_amount_count_mean,purchase_amount_count_std,purchase_amount_sum_mean,purchase_amount_sum_std,purchase_amount_mean_mean,purchase_amount_mean_std,purchase_amount_min_mean,purchase_amount_min_std,purchase_amount_max_mean,purchase_amount_max_std,purchase_amount_std_mean,purchase_amount_std_std,installments_count_mean,installments_count_std,installments_sum_mean,installments_sum_std,installments_mean_mean,installments_mean_std,installments_min_mean,installments_min_std,installments_max_mean,installments_max_std,installments_std_mean,installments_std_std,authorized_flag_mean,category_1_purchase_amount_mean,category_1_purchase_amount_min,category_1_purchase_amount_max,category_1_purchase_amount_std,installments_purchase_amount_mean,installments_purchase_amount_min,installments_purchase_amount_max,installments_purchase_amount_std,city_id_purchase_amount_mean,city_id_purchase_amount_min,city_id_purchase_amount_max,city_id_purchase_amount_std,category_1_installments_mean,category_1_installments_min,category_1_installments_max,category_1_installments_std
count,201917.0,201917.0,201917.0,201917.0,201917.0,170958.0,170958.0,170958.0,170958.0,170958.0,170958.0,170958.0,170958.0,170958.0,170958.0,170958.0,170958.0,170958.0,170958.0,170958.0,170958.0,170958.0,170958.0,170958.0,170958.0,149929.0,170958.0,170958.0,170958.0,170958.0,149929.0,170958.0,170958.0,170958.0,149929.0,170958.0,170958.0,170958.0,170958.0,170958.0,170958.0,149929.0,170958.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,179986.0,179986.0,179986.0,179986.0,179986.0,179986.0,179986.0,179986.0,179986.0,179986.0,179986.0,179986.0,179986.0,179986.0,179986.0,179986.0,179986.0,179986.0,179986.0,179986.0,153199.0,179986.0,179986.0,179986.0,179986.0,153199.0,179986.0,179986.0,179986.0,153199.0,179986.0,179986.0,179986.0,179986.0,179986.0,179986.0,153199.0,179986.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,200105.0,195268.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,201917.0,200105.0,195268.0,201917.0,179986.0,179986.0,179986.0,24707.0,179986.0,179986.0,179986.0,54442.0,179986.0,179986.0,179986.0,109948.0,179986.0,179986.0,179986.0,24707.0
mean,3.105311,1.74541,0.565569,-0.393636,381.978981,9.135952,1.907211,0.167005,0.45997,0.035305,0.1372,0.0728,0.108947,0.429925,0.412814,0.145832,4.46722,3.818868,1.574784,1.968677,3.37897,73.79798,41.48107,77.6357,36.39954,14.89398,9.812276,1.281757,2.765317,0.65264,0.968499,6.524493,9.291557,3.674341,2.66768,12406640.0,1498694000.0,1511101000.0,-4.131525,-1.822278,-6.472853,2.095066,77.862506,81.558982,4.764572,0.089686,0.500305,0.036716,0.150409,0.078542,0.122838,0.476144,0.407472,0.104783,32.817583,17.788443,2.620324,4.743578,11.197626,-48.057201,-0.537956,0.908573,-0.729093,0.296594,49.546244,0.81071,3.276693,0.089527,0.639682,6.514012,11.185626,1.584399,3.421537,20832360.0,1494927000.0,1515760000.0,-3.826098,-0.191499,-7.815186,2.399019,77.891003,6.776555,0.2173,0.049146,0.524707,0.035994,0.15384,0.081576,0.128451,0.478555,0.385585,0.100245,6.683353,5.339232,1.582051,2.30943,4.453919,-3.741898,-0.524393,-0.131695,-0.660595,0.231556,4.62656,0.744392,1.479748,0.328637,0.519733,4.084927,4.519679,3.648789,0.524177,2543707.0,1518229000.0,1520773000.0,1.476221,1.815386,1.158679,0.396778,77.845091,-3.976121,2.721625,10.15297,5.477183,-5.940973,3.744065,-0.51158,0.189999,-0.641952,0.126389,-0.115751,0.553493,0.22235,0.197723,10.15297,5.477183,6.540215,3.748689,0.849704,0.418928,0.552245,0.40096,1.548506,1.043923,0.463344,0.420413,0.893353,-0.514448,-0.548484,-0.480413,0.350644,-0.46792,-0.589325,-0.304832,0.537369,-0.517949,-0.607599,-0.380523,0.20093,0.791541,0.678189,0.904893,1.167784
std,1.18616,0.751362,0.495683,3.8505,293.710176,12.421256,8.129138,0.304564,0.44887,0.168389,0.316862,0.238785,0.287773,0.444927,0.39553,0.281453,4.011189,2.95019,0.782849,1.172872,2.308419,14616.12,14543.06,14615.6,14541.45,525.445822,33.070475,9.647105,24.527993,7.750653,10.531419,2.870989,3.351162,3.464427,1.759912,10857090.0,10696890.0,8489900.0,2.715947,2.447315,3.96941,1.500721,2.342374,99.243357,16.061294,0.202862,0.435615,0.166409,0.320962,0.241873,0.294961,0.499422,0.417937,0.174376,30.437825,11.39388,1.496941,3.462667,5.18119,65.190379,0.384256,3.247188,0.09418,0.582287,89.446982,0.964914,5.128631,0.731361,1.057823,1.850014,1.872746,1.666704,1.170569,10237200.0,10319940.0,6509686.0,2.130039,0.696935,3.847831,1.179373,2.393546,6.694579,0.593864,0.164295,0.455389,0.170197,0.335172,0.253042,0.310203,0.497842,0.429808,0.218113,6.650089,4.255338,0.873557,1.670109,3.058439,4.395325,0.554888,1.532163,0.433407,0.592308,8.195394,1.138139,3.359907,1.036922,1.094625,2.280113,2.477625,2.258646,0.854921,1743585.0,6331257.0,6347417.0,0.3308,0.387985,0.365378,0.227721,2.411319,2.005362,1.174003,10.150187,5.038403,6.723107,3.543141,0.463838,0.524614,0.329801,0.478231,1.059925,1.186032,0.452998,0.490601,10.150187,5.038403,10.421322,5.828698,1.025063,0.822403,0.770898,0.812548,1.97345,1.863437,0.770941,0.835927,0.114446,0.583854,0.527115,0.717112,0.835266,0.685725,0.458111,1.322885,1.217479,0.578244,0.467502,1.010518,0.61836,1.232097,1.065111,1.571911,1.723479
min,1.0,1.0,0.0,-33.219281,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,-1013.128,-0.7468927,-0.7468927,-0.7469078,0.0,-17.0,-1.0,-1.0,-1.0,0.0,1.0,1.0,1.0,0.0,0.0,1483229000.0,1483229000.0,-13.0,-13.0,-13.0,0.0,76.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,-1823.9233,-0.745371,-0.745315,-0.746893,0.0,-9.0,-0.692308,0.0,-1.0,0.0,1.061224,2.0,1.0,0.122169,152065.0,1483229000.0,1485935000.0,-12.571429,-11.0,-13.0,0.122169,76.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,-70.814575,-0.74707,-0.747,-0.747,0.0,-6.0,-1.0,-1.0,-1.0,0.0,1.0,1.0,1.0,0.0,0.0,1488339000.0,1488579000.0,1.0,1.0,1.0,0.0,76.0,-12.0,0.707107,1.0,0.0,-165.81122,0.0,-0.745373,0.0,-0.746883,0.0,-0.745349,0.0,0.0,0.0,1.0,0.0,-3.0,0.0,-0.733333,0.0,-1.0,0.0,-0.333333,0.0,0.0,0.0,0.030488,-0.74707,-0.74707,-0.74707,0.0,-0.74707,-0.74707,-0.74707,0.0,-0.74707,-0.74707,-0.74707,0.0,-1.0,-1.0,-1.0,0.0
25%,2.0,1.0,0.0,-0.88311,153.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,1.0,1.0,2.0,-6.203094,-0.6992726,-0.6582514,-0.7377417,0.02594,0.0,0.0,0.0,0.0,0.0,4.5,8.0,1.0,1.154701,2188430.0,1488382000.0,1506978000.0,-6.0,-3.0,-10.0,0.894427,76.714286,23.0,0.0,0.0,0.04878,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.0,9.0,2.0,3.0,7.0,-60.720615,-0.680864,-0.371245,-0.743737,0.073418,0.0,0.0,0.0,0.0,0.0,5.44,12.0,1.0,2.799826,11649080.0,1484432000.0,1514720000.0,-5.5,0.0,-12.0,1.361817,76.701613,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0,1.0,1.0,2.0,-5.257324,-0.692773,-0.648,-0.736,0.039294,0.0,0.0,0.0,0.0,0.0,3.0,3.0,3.0,0.351866,746530.5,1518432000.0,1520338000.0,1.230769,2.0,1.0,0.351091,76.636364,-6.0,1.581139,3.666667,2.12132,-7.871208,1.480667,-0.680064,0.028621,-0.732371,0.007863,-0.568884,0.109576,0.057861,0.034862,3.666667,2.12132,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.852941,-0.693604,-0.699219,-0.692017,0.045575,-0.691755,-0.700806,-0.691406,0.099958,-0.694458,-0.719727,-0.681803,0.026905,0.0,0.0,0.0,0.0
50%,3.0,2.0,1.0,-0.023437,306.0,6.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.25,0.318182,0.0,3.0,3.0,1.0,2.0,3.0,-2.714322,-0.6337172,-0.499331,-0.722114,0.082389,3.0,0.761905,1.0,0.0,0.375534,6.5,11.0,2.0,2.828427,10196630.0,1498391000.0,1513890000.0,-3.75,-1.0,-6.0,1.908627,77.0,48.0,0.0,0.0,0.454545,0.0,0.0,0.0,0.0,0.0,0.304348,0.0,24.0,15.0,2.0,4.0,11.0,-27.492445,-0.627772,0.032968,-0.739695,0.14782,12.0,1.0,1.0,0.0,0.0,6.566396,12.0,1.0,3.611021,19788350.0,1493031000.0,1518858000.0,-3.55,0.0,-7.0,2.254097,76.807018,5.0,0.0,0.0,0.631579,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,4.0,1.0,2.0,4.0,-2.686523,-0.641726,-0.513,-0.7236,0.089026,1.0,0.333333,1.0,0.0,0.0,3.5,4.0,3.0,0.5,2845612.0,1520263000.0,1523342000.0,1.5,2.0,1.0,0.5,77.0,-3.857143,2.636737,6.857143,4.041452,-3.993746,2.761295,-0.621082,0.065846,-0.715532,0.021594,-0.405554,0.243127,0.114195,0.081155,6.857143,4.041452,2.333333,1.258306,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.926829,-0.642578,-0.652637,-0.638587,0.113929,-0.631592,-0.665143,-0.623399,0.223042,-0.643555,-0.691284,-0.606201,0.065576,0.333333,0.333333,0.5,0.445215
75%,4.0,2.0,1.0,0.765453,488.0,11.0,1.0,0.2,1.0,0.0,0.0,0.0,0.0,1.0,0.818182,0.166667,6.0,5.0,2.0,2.0,5.0,-0.7413367,-0.431351,0.04423805,-0.6719255,0.27798,10.0,1.227273,3.0,1.0,0.707107,8.631579,12.0,6.0,3.979312,20909570.0,1508078000.0,1517649000.0,-2.0,0.0,-3.0,3.194964,77.888889,100.0,3.0,0.066667,0.987179,0.0,0.013158,0.0,0.0125,1.0,0.854839,0.147059,43.0,24.0,3.0,6.0,15.0,-11.393582,-0.522301,1.055192,-0.731536,0.32025,64.0,1.307692,6.0,1.0,1.052262,7.714286,12.0,1.0,4.212314,30944710.0,1504355000.0,1519662000.0,-2.0,0.0,-4.0,3.437467,77.796875,9.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.857143,0.090909,9.0,7.0,2.0,3.0,6.0,-1.211914,-0.531657,-0.1819,-0.692,0.212123,7.0,1.0,2.0,1.0,0.755929,4.0,4.0,3.0,0.534522,4083677.0,1521240000.0,1524643000.0,1.714286,2.0,1.0,0.534522,77.857143,-2.0,3.89444,12.857143,7.188208,-1.882503,4.875727,-0.499316,0.170828,-0.665619,0.076908,-0.044238,0.584795,0.23983,0.199709,12.857143,7.188208,9.285714,5.660781,1.348148,0.498017,1.0,0.666667,2.666667,1.752549,0.730081,0.647835,0.972516,-0.529175,-0.55542,-0.514537,0.311951,-0.483355,-0.59668,-0.400806,0.537616,-0.530945,-0.626465,-0.431152,0.16288,1.0,1.0,1.0,1.414214
max,5.0,3.0,1.0,17.965068,2284.0,1510.0,1510.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,70.0,42.0,11.0,17.0,22.0,6010604.0,6010604.0,6010604.0,6010604.0,85021.209923,1999.0,999.0,999.0,999.0,707.106781,12.0,12.0,12.0,7.778175,36584330.0,1519861000.0,1519862000.0,0.0,0.0,0.0,9.192388,89.0,2537.0,658.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,397.0,95.0,20.0,67.0,34.0,3627.7957,36.648655,186.47742,28.55479,54.437153,1647.0,53.7,999.0,12.0,222.544165,11.985075,12.0,11.0,7.778175,36626920.0,1514762000.0,1519862000.0,-0.014925,0.0,-1.0,7.0,89.0,109.0,16.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,109.0,40.0,14.0,28.0,24.0,109.099,44.1875,76.7,44.2,47.347411,1004.0,34.62069,999.0,12.0,185.476878,12.0,12.0,12.0,7.778175,5343723.0,1525132000.0,1525133000.0,2.0,2.0,2.0,0.707107,89.0,-0.5,7.778175,230.636364,296.52136,259.12827,215.65907,41.825645,48.009804,29.15585,48.512424,72.958466,78.07564,53.135094,61.05312,230.636364,296.52136,261.0,452.957504,36.141667,104.572838,12.0,9.192388,206.0,443.314223,99.659569,233.464661,1.0,44.1875,44.1875,44.1875,22.89615,44.1875,44.1875,76.6875,54.60351,44.1875,44.1875,76.6875,47.347412,18.410714,12.0,35.821429,24.622468


In [15]:
df_train['rate_new/hist_purchase_date_max'] = df_train['new_purchase_date_max']/df_train['hist_purchase_date_max']

0         1.514385e+09
1         1.514467e+09
2         1.492801e+09
3                  NaN
4         1.519759e+09
              ...     
201912    1.518114e+09
201913    1.511473e+09
201914    1.513959e+09
201915    1.509124e+09
201916    1.518705e+09
Name: hist_purchase_date_max, Length: 201917, dtype: float64