## DCN V2 Baseline

https://www.kaggle.com/code/nadare/tensorflow-baseline-lb-0-681

### Configuration

In [9]:
# モジュールの動的import(import先のファイルが更新されたときに追従する)
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
import os
from glob import glob
import gc

import numpy as np
import polars as pl
import pandas as pd
import tensorflow as tf

from src.utils.pipeline import Pipeline
from src.utils.aggregator import Aggregator


In [11]:
tf.version.VERSION

'2.12.0'

In [12]:
ROOT = "/kaggle"
TRAIN_DIR = os.path.join(ROOT, "input", "parquet_files", "train")
TEST_DIR = os.path.join(ROOT, "input", "parquet_files", "test")

### Read data

In [13]:
def read_file(path, depth=None):
  df = pl.read_parquet(path)
  df = df.pipe(Pipeline.set_table_dtypes)

  # depth>0 は集計値を使わないと、left join時にOOMになる（RAM 64GBでも)
  if depth in [1, 2]:
    df = df.group_by("case_id").agg(Aggregator.get_exprs(df))

  return df

def read_files(regex_path, depth=None):
  chunks = []
  for path in glob(str(regex_path)):
    chunks.append(pl.read_parquet(path).pipe(Pipeline.set_table_dtypes))

  df = pl.concat(chunks, how="vertical_relaxed")

  # depth>0 は集計値を使わないと、left join時にOOMになる（RAM 64GBでも)
  if depth in [1, 2]:
    df = df.group_by("case_id").agg(Aggregator.get_exprs(df))

  return df

In [14]:
data_store = {
  "base": read_file(os.path.join(TRAIN_DIR, "train_base.parquet")),
  "depth_0": [
      read_file(os.path.join(TRAIN_DIR, "train_static_cb_0.parquet")),
      read_files(os.path.join(TRAIN_DIR, "train_static_0_*.parquet")),
  ],
  "depth_1": [
      read_files(os.path.join(TRAIN_DIR, "train_applprev_1_*.parquet"), 1),
      read_file(os.path.join(TRAIN_DIR, "train_tax_registry_a_1.parquet"), 1),
      read_file(os.path.join(TRAIN_DIR, "train_tax_registry_b_1.parquet"), 1),
      read_file(os.path.join(TRAIN_DIR, "train_tax_registry_c_1.parquet"), 1),
      read_file(os.path.join(TRAIN_DIR, "train_credit_bureau_b_1.parquet"), 1),
      read_file(os.path.join(TRAIN_DIR, "train_other_1.parquet"), 1),
      read_file(os.path.join(TRAIN_DIR, "train_person_1.parquet"), 1),
      read_file(os.path.join(TRAIN_DIR, "train_deposit_1.parquet"), 1),
      read_file(os.path.join(TRAIN_DIR, "train_debitcard_1.parquet"), 1),
  ],
  "depth_2": [
      read_file(os.path.join(TRAIN_DIR, "train_credit_bureau_b_2.parquet"), 2),
  ],
}

In [17]:
def feature_eng(base, depth_0, depth_1, depth_2):
  base_df = (
    base.with_columns(
      month_decision=pl.col("date_decision").dt.month(),
      weekday_decision=pl.col("date_decision").dt.weekday(),
    )
  )

  # 全てのdfをleft join
  for i, df in enumerate(depth_0+depth_1+depth_2):
    base_df = base_df.join(df, how="left", on="case_id", suffix=f"_{i}")

  base_df = base_df.pipe(Pipeline.handle_dates).to_pandas()
  base_df = base_df.pipe(Pipeline.reduce_memory)

  return base_df

In [18]:
train_df = feature_eng(**data_store)

print(f"train data shape: {train_df.shape}")

del data_store
gc.collect()


start - end memory:- 4357.62 - 2649.80 Mb
train data shape: (1526659, 376)


266

In [19]:
pd.set_option('display.max_columns', 500)
train_df

Unnamed: 0,case_id,WEEK_NUM,target,month_decision,weekday_decision,assignmentdate_238D,assignmentdate_4527235D,assignmentdate_4955616D,birthdate_574D,contractssum_5085716L,dateofbirth_337D,dateofbirth_342D,days120_123L,days180_256L,days30_165L,days360_512L,days90_310L,description_5085714M,education_1103M,education_88M,firstquarter_103L,for3years_128L,for3years_504L,for3years_584L,formonth_118L,formonth_206L,formonth_535L,forquarter_1017L,forquarter_462L,forquarter_634L,fortoday_1092L,forweek_1077L,forweek_528L,forweek_601L,foryear_618L,foryear_818L,foryear_850L,fourthquarter_440L,maritalst_385M,maritalst_893M,numberofqueries_373L,pmtaverage_3A,pmtaverage_4527227A,pmtaverage_4955615A,pmtcount_4527229L,pmtcount_4955617L,pmtcount_693L,pmtscount_423L,pmtssum_45A,requesttype_4525192L,responsedate_1012D,responsedate_4527233D,responsedate_4917613D,riskassesment_302T,riskassesment_940T,secondquarter_766L,thirdquarter_1082L,actualdpdtolerance_344P,amtinstpaidbefduel24m_4187115A,annuity_780A,annuitynextmonth_57A,applicationcnt_361L,applications30d_658L,applicationscnt_1086L,applicationscnt_464L,applicationscnt_629L,applicationscnt_867L,avgdbddpdlast24m_3658932P,avgdbddpdlast3m_4187120P,avgdbdtollast24m_4525197P,avgdpdtolclosure24_3658938P,avginstallast24m_3658937A,avglnamtstart24m_4525187A,avgmaxdpdlast9m_3716943P,avgoutstandbalancel6m_4187114A,avgpmtlast12m_4525200A,bankacctype_710L,cardtype_51L,clientscnt12m_3712952L,clientscnt3m_3712950L,clientscnt6m_3712949L,clientscnt_100L,clientscnt_1022L,clientscnt_1071L,clientscnt_1130L,clientscnt_136L,clientscnt_157L,clientscnt_257L,clientscnt_304L,clientscnt_360L,clientscnt_493L,clientscnt_533L,clientscnt_887L,clientscnt_946L,cntincpaycont9m_3716944L,cntpmts24_3658933L,commnoinclast6m_3546845L,credamount_770A,credtype_322L,currdebt_22A,currdebtcredtyperange_828A,datefirstoffer_1144D,datelastinstal40dpd_247D,datelastunpaid_3546854D,daysoverduetolerancedd_3976961L,deferredmnthsnum_166L,disbursedcredamount_1113A,disbursementtype_67L,downpmt_116A,dtlastpmtallstes_4499206D,eir_270L,equalitydataagreement_891L,equalityempfrom_62L,firstclxcampaign_1125D,firstdatedue_489D,homephncnt_628L,inittransactionamount_650A,inittransactioncode_186L,interestrate_311L,interestrategrace_34L,isbidproduct_1095L,isbidproductrequest_292L,isdebitcard_729L,lastactivateddate_801D,lastapplicationdate_877D,lastapprcommoditycat_1041M,lastapprcommoditytypec_5251766M,lastapprcredamount_781A,lastapprdate_640D,lastcancelreason_561M,lastdelinqdate_224D,lastdependentsnum_448L,lastotherinc_902A,lastotherlnsexpense_631A,lastrejectcommoditycat_161M,lastrejectcommodtypec_5251769M,lastrejectcredamount_222A,lastrejectdate_50D,lastrejectreason_759M,lastrejectreasonclient_4145040M,lastrepayingdate_696D,lastst_736L,maininc_215A,mastercontrelectronic_519L,mastercontrexist_109L,maxannuity_159A,maxannuity_4075009A,maxdbddpdlast1m_3658939P,maxdbddpdtollast12m_3658940P,maxdbddpdtollast6m_4187119P,maxdebt4_972A,maxdpdfrom6mto36m_3546853P,maxdpdinstldate_3546855D,maxdpdinstlnum_3546846P,maxdpdlast12m_727P,maxdpdlast24m_143P,maxdpdlast3m_392P,maxdpdlast6m_474P,maxdpdlast9m_1059P,maxdpdtolerance_374P,maxinstallast24m_3658928A,maxlnamtstart6m_4525199A,maxoutstandbalancel12m_4187113A,maxpmtlast3m_4525190A,mindbddpdlast24m_3658935P,mindbdtollast24m_4525191P,mobilephncnt_593L,monthsannuity_845L,numactivecreds_622L,numactivecredschannel_414L,numactiverelcontr_750L,numcontrs3months_479L,numincomingpmts_3546848L,numinstlallpaidearly3d_817L,numinstls_657L,numinstlsallpaid_934L,numinstlswithdpd10_728L,numinstlswithdpd5_4187116L,numinstlswithoutdpd_562L,numinstmatpaidtearly2d_4499204L,numinstpaid_4499208L,numinstpaidearly3d_3546850L,numinstpaidearly3dest_4493216L,numinstpaidearly5d_1087L,numinstpaidearly5dest_4493211L,numinstpaidearly5dobd_4499205L,numinstpaidearly_338L,numinstpaidearlyest_4493214L,numinstpaidlastcontr_4325080L,numinstpaidlate1d_3546852L,numinstregularpaid_973L,numinstregularpaidest_4493210L,numinsttopaygr_769L,numinsttopaygrest_4493213L,numinstunpaidmax_3546851L,numinstunpaidmaxest_4493212L,numnotactivated_1143L,numpmtchanneldd_318L,numrejects9m_859L,opencred_647L,paytype1st_925L,paytype_783L,payvacationpostpone_4187118D,pctinstlsallpaidearl3d_427L,pctinstlsallpaidlat10d_839L,pctinstlsallpaidlate1d_3546856L,pctinstlsallpaidlate4d_3546849L,pctinstlsallpaidlate6d_3546844L,pmtnum_254L,posfpd10lastmonth_333P,posfpd30lastmonth_3976960P,posfstqpd30lastmonth_3976962P,previouscontdistrict_112M,price_1097A,sellerplacecnt_915L,sellerplacescnt_216L,sumoutstandtotal_3546847A,sumoutstandtotalest_4493215A,totaldebt_9A,totalsettled_863A,totinstallast1m_4525188A,twobodfilling_608L,typesuite_864L,validfrom_1069D,max_actualdpd_943P,max_annuity_853A,max_credacc_actualbalance_314A,max_credacc_credlmt_575A,max_credacc_maxhisbal_375A,max_credacc_minhisbal_90A,max_credamount_590A,max_currdebt_94A,max_downpmt_134A,max_mainoccupationinc_437A,max_maxdpdtolerance_577P,max_outstandingdebt_522A,max_revolvingaccount_394A,max_approvaldate_319D,max_creationdate_885D,max_dateactivated_425D,max_dtlastpmt_581D,max_dtlastpmtallstes_3545839D,max_employedfrom_700D,max_firstnonzeroinstldate_307D,max_cancelreason_3545846M,max_district_544M,max_education_1138M,max_postype_4733339M,max_profession_152M,max_rejectreason_755M,max_rejectreasonclient_4145042M,max_byoccupationinc_3656910L,max_childnum_21L,max_credacc_status_367L,max_credacc_transactions_402L,max_credtype_587L,max_familystate_726L,max_inittransactioncode_279L,max_isbidproduct_390L,max_isdebitcard_527L,max_pmtnum_8L,max_status_219L,max_tenor_203L,max_num_group1,max_amount_4527230A,max_recorddate_4527225D,max_name_4527232M,max_num_group1_3,max_amount_4917619A,max_deductiondate_4917603D,max_name_4917606M,max_num_group1_4,max_pmtamount_36A,max_processingdate_168D,max_employername_160M,max_num_group1_5,max_amount_1115A,max_credlmt_1052A,max_credlmt_228A,max_credlmt_3940954A,max_debtpastduevalue_732A,max_debtvalue_227A,max_dpd_550P,max_dpd_733P,max_dpdmax_851P,max_installmentamount_644A,max_installmentamount_833A,max_instlamount_892A,max_maxdebtpduevalodued_3940955A,max_overdueamountmax_950A,max_pmtdaysoverdue_1135P,max_residualamount_1093A,max_residualamount_127A,max_residualamount_3940956A,max_totalamount_503A,max_totalamount_881A,max_contractdate_551D,max_contractmaturitydate_151D,max_lastupdate_260D,max_classificationofcontr_1114M,max_contractst_516M,max_contracttype_653M,max_credor_3940957M,max_periodicityofpmts_997M,max_pmtmethod_731M,max_purposeofcred_722M,max_subjectrole_326M,max_subjectrole_43M,max_credquantity_1099L,max_credquantity_984L,max_dpdmaxdatemonth_804T,max_dpdmaxdateyear_742T,max_interesteffectiverate_369L,max_interestrateyearly_538L,max_numberofinstls_810L,max_overdueamountmaxdatemonth_494T,max_overdueamountmaxdateyear_432T,max_periodicityofpmts_997L,max_pmtnumpending_403L,max_num_group1_6,max_amtdebitincoming_4809443A,max_amtdebitoutgoing_4809440A,max_amtdepositbalance_4809441A,max_amtdepositincoming_4809444A,max_amtdepositoutgoing_4809442A,max_num_group1_7,max_mainoccupationinc_384A,max_birth_259D,max_birthdate_87D,max_empl_employedfrom_271D,max_contaddr_district_15M,max_contaddr_zipcode_807M,max_education_927M,max_empladdr_district_926M,max_empladdr_zipcode_114M,max_language1_981M,max_registaddr_district_1083M,max_registaddr_zipcode_184M,max_childnum_185L,max_contaddr_matchlist_1032L,max_contaddr_smempladdr_334L,max_empl_employedtotal_800L,max_empl_industry_691L,max_familystate_447L,max_gender_992L,max_housetype_905L,max_housingtype_772L,max_incometype_1044T,max_isreference_387L,max_maritalst_703L,max_personindex_1023L,max_persontype_1072L,max_persontype_792L,max_relationshiptoclient_415T,max_relationshiptoclient_642T,max_remitter_829L,max_role_1084L,max_role_993L,max_safeguarantyflag_411L,max_sex_738L,max_type_25L,max_num_group1_8,max_amount_416A,max_contractenddate_991D,max_openingdate_313D,max_num_group1_9,max_last180dayaveragebalance_704A,max_last180dayturnover_1134A,max_last30dayturnover_651A,max_openingdate_857D,max_num_group1_10,max_pmts_dpdvalue_108P,max_pmts_pmtsoverdue_635A,max_pmts_date_1107D,max_num_group1_11,max_num_group2
0,0,0,0,1,4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1917.599976,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0,30000.000000,CAL,0.000000,0.000000,,,,,0.0,30000.000000,GBA,0.0,,0.449951,,,,,0.0,,CASH,0.449951,,False,,,,,a55475b1,a55475b1,,,a55475b1,,,,,a55475b1,a55475b1,,,a55475b1,a55475b1,,,,0.0,0.0,0.000000,,,,,0.000000,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,1.0,,0.0,0.0,0.0,0.0,,,0.0,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,,OTHER,OTHER,,,,,,,24.0,0.0,0.0,,a55475b1,,0.0,0.0,,,0.000000,0.000000,,BO,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10800.0,-11874,,-475.0,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,,False,False,MORE_FIVE,OTHER,MARRIED,,,,SALARIED_GOVT,,,2.0,5.0,5.0,SPOUSE,SPOUSE,False,PE,,True,F,PRIMARY_MOBILE,3,,,,,,,,,,,,,,
1,1,0,0,1,4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3134.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0,19999.800781,CAL,0.000000,0.000000,,,,,0.0,19999.800781,GBA,0.0,,0.299805,,,,,0.0,,CASH,0.299805,0.0,False,,,,,a55475b1,a55475b1,,,a55475b1,,,,,a55475b1,a55475b1,,,a55475b1,a55475b1,,,,0.0,0.0,0.000000,,,,,0.000000,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,1.0,,0.0,0.0,0.0,0.0,,,0.0,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,,OTHER,OTHER,,,,,,,18.0,0.0,0.0,,a55475b1,,0.0,0.0,,,0.000000,0.000000,,BO,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10000.0,-22435,,-3718.0,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,,False,False,MORE_FIVE,OTHER,DIVORCED,,,,SALARIED_GOVT,,,2.0,5.0,5.0,SIBLING,SIBLING,False,PE,,True,M,PRIMARY_MOBILE,4,,,,,,,,,,,,,,
2,2,0,0,1,5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4937.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,78000.000000,CAL,0.000000,0.000000,,,,,0.0,78000.000000,GBA,0.0,,0.449951,,,,,1.0,,CASH,0.449951,,False,,,,-2102.0,a55475b1,a55475b1,,,a55475b1,,,,,a55475b1,a55475b1,10000.000000,-2102.0,a55475b1,a55475b1,,D,,0.0,0.0,0.000000,,,,,0.000000,,,,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,2.0,,0.0,0.0,0.0,0.0,,,0.0,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,False,OTHER,OTHER,,,,,,,36.0,0.0,0.0,,a55475b1,,0.0,0.0,,,0.000000,0.000000,,BO,AL,,0.0,1682.400024,,0.0,,,16000.000000,,0.0,8200.0,,,,,-2102.0,,,,-3244.0,-2072.0,a55475b1,P136_108_173,P97_36_170,a55475b1,a55475b1,a55475b1,a55475b1,,0.0,,,CAL,SINGLE,CASH,False,,24.0,D,24.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,14000.0,-16105,,-3244.0,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,,False,False,MORE_FIVE,OTHER,MARRIED,,,,EMPLOYED,,,2.0,5.0,5.0,SPOUSE,SPOUSE,False,PE,,True,F,PRIMARY_MOBILE,4,,,,,,,,,,,,,,
3,3,0,0,1,4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4643.600098,0.000000,0.0,1.0,0.0,2.0,0.0,1.0,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0,40000.000000,CAL,0.000000,0.000000,,,,,0.0,40000.000000,GBA,0.0,,0.419922,True,True,,,0.0,,CASH,0.419922,0.0,False,,,,4.0,a55475b1,a55475b1,,,P94_109_143,,,,,a55475b1,a55475b1,59999.800781,4.0,P94_109_143,a55475b1,,D,,0.0,0.0,0.000000,,,,,0.000000,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,1.0,,0.0,0.0,0.0,1.0,,,0.0,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,1.0,False,OTHER,OTHER,,,,,,,12.0,0.0,0.0,,a55475b1,,1.0,1.0,,,0.000000,0.000000,,BO,AL,,0.0,6140.000000,,0.0,,,59999.800781,,0.0,11000.0,,,,,4.0,,,,-233.0,35.0,P94_109_143,P131_33_167,P97_36_170,a55475b1,a55475b1,P94_109_143,a55475b1,,,,,CAL,MARRIED,CASH,False,,12.0,D,12.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10000.0,-9286,,-233.0,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,,False,False,MORE_FIVE,OTHER,MARRIED,,,,EMPLOYED,,,1.0,4.0,4.0,SPOUSE,SPOUSE,False,PE,,True,F,PRIMARY_MOBILE,2,,,,,,,,,,,,,,
4,4,0,1,1,5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3390.199951,0.000000,0.0,1.0,0.0,0.0,0.0,1.0,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0,44000.000000,CAL,0.000000,0.000000,,,,,0.0,44000.000000,GBA,0.0,,0.449951,,,,,1.0,,CASH,0.449951,,False,,,,4.0,a55475b1,a55475b1,,,P24_27_36,,,,,a55475b1,a55475b1,,,a55475b1,a55475b1,,T,,0.0,0.0,0.000000,,,,,0.000000,0.0,,,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,1.0,,0.0,0.0,0.0,0.0,,,0.0,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,False,OTHER,OTHER,,,,,,,24.0,0.0,0.0,,a55475b1,,0.0,0.0,,,0.000000,0.000000,,BO,AL,,0.0,2556.600098,,0.0,,,40000.000000,,0.0,16000.0,,,,,4.0,,,,,35.0,P24_27_36,P194_82_174,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,,,,,CAL,,CASH,False,,24.0,T,24.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,24000.0,-9134,,-1481.0,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,,False,False,MORE_FIVE,OTHER,MARRIED,,,,EMPLOYED,,,2.0,5.0,5.0,SIBLING,SIBLING,False,PE,,True,F,PRIMARY_MOBILE,3,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1526654,2703450,91,0,10,1,,,-998.0,,52863.589844,-22192.0,,0.0,0.0,0.0,0.0,0.0,2fc785b2,a55475b1,a55475b1,0.0,,,,,,,,,,,,,,,,,1.0,a55475b1,a55475b1,0.0,,,12155.000000,,12.0,,,,,,,14.0,,,1.0,1.0,0.0,176561.359375,3675.400146,0.000000,0.0,0.0,0.0,0.0,0.0,10.0,-23.0,-43.0,-23.0,0.0,7356.800293,,0.0,16392.496094,6750.200195,CA,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.0,24.0,0.0,30000.000000,CAL,0.000000,0.000000,-2784.0,,-1481.0,8.0,0.0,30000.000000,GBA,0.0,,0.419922,,,-1610.0,-4832.0,0.0,,CASH,0.419922,,True,,,-355.0,-362.0,P12_6_178,P142_50_170,20020.0,-362.0,a55475b1,-1481.0,,,,a55475b1,a55475b1,150000.000000,-1040.0,P94_109_143,P94_109_143,,K,36000.0,0.0,0.0,75521.906250,,0.0,0.0,0.0,105019.789062,0.0,-1512.0,6.0,0.0,0.0,0.0,0.0,0.0,8.0,46718.199219,49651.402344,77533.757812,14346.319336,-144.0,-144.0,3.0,65.0,1.0,0.0,1.0,0.0,92.0,106.0,0.0,112.0,0.0,1.0,117.0,106.0,113.0,103.0,103.0,11.0,11.0,99.0,99.0,99.0,12.0,4.0,113.0,113.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,False,OTHER,OTHER,,0.911621,0.026550,0.035400,0.035400,0.035400,12.0,0.0,0.0,0.0,P123_39_170,0.0,0.0,8.0,0.000000,0.000000,0.000000,428159.656250,14346.319336,FO,,,0.0,30875.000000,0.092000,0.0,0.092000,0.092000,150000.000000,0.000000,0.0,50000.0,6.0,0.000000,760714944.0,-362.0,-362.0,-355.0,3.0,3.0,-2090.0,-332.0,a55475b1,P123_39_170,a55475b1,P60_146_156,a55475b1,a55475b1,a55475b1,1.0,2.0,AC,0.0,REL,MARRIED,POS,True,True,24.0,K,24.0,12.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.000000,0.0,0.0,0.0,0.0,40000.0,-22193,,,P123_39_170,P40_33_175,a55475b1,a55475b1,a55475b1,P209_127_106,P123_39_170,P40_33_175,,False,False,,,,,OWNED,,RETIRED_PENSIONER,,,0.0,1.0,1.0,,,,CL,,True,F,PRIMARY_MOBILE,0,,,,,,,,,,,,,,
1526655,2703451,91,0,10,1,,,-5592.0,,324608.531250,-25536.0,,0.0,0.0,0.0,0.0,0.0,2fc785b2,a55475b1,a55475b1,1.0,,,,,,,,,,,,,,,,,0.0,a55475b1,a55475b1,0.0,,,22904.599609,,12.0,,,,,,,14.0,,,1.0,2.0,0.0,301276.468750,7088.600098,6191.600098,0.0,0.0,5.0,0.0,0.0,5.0,-18.0,-12.0,-18.0,0.0,12553.200195,,0.0,105129.312500,15780.400391,CA,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17.0,21.0,0.0,100000.000000,CAL,68098.398438,68098.398438,-1679.0,,-128.0,2.0,0.0,40739.539062,GBA,0.0,,0.399902,,,-1679.0,-1876.0,2.0,,CASH,0.399902,,True,,,-364.0,-371.0,a55475b1,a55475b1,0.0,-371.0,a55475b1,-128.0,,,,a55475b1,a55475b1,,,a55475b1,a55475b1,,A,,0.0,0.0,117251.601562,,0.0,2.0,2.0,202775.546875,0.0,-128.0,7.0,2.0,2.0,0.0,2.0,2.0,2.0,40499.800781,116813.398438,250031.203125,40499.804688,-92.0,-92.0,2.0,55.0,2.0,0.0,1.0,0.0,69.0,70.0,24.0,73.0,0.0,0.0,76.0,69.0,75.0,70.0,70.0,0.0,0.0,70.0,70.0,70.0,0.0,1.0,75.0,75.0,11.0,11.0,11.0,11.0,0.0,0.0,0.0,False,OTHER,OTHER,,0.945801,0.000000,0.013512,0.000000,0.000000,24.0,0.0,0.0,0.0,P162_18_172,,0.0,3.0,68098.398438,68098.398438,68098.398438,701247.312500,40499.804688,FO,,,0.0,12809.200195,90.000000,0.0,90.000000,90.000000,114000.000000,59773.714844,0.0,50000.0,1.0,68098.398438,780594496.0,-371.0,-371.0,-364.0,-7.0,8.0,,-341.0,a55475b1,P162_18_172,a55475b1,P46_145_78,a55475b1,a55475b1,a55475b1,10340.0,0.0,AC,0.0,REL,MARRIED,NDF,True,True,24.0,K,24.0,5.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,27500.0,27477.599609,0.0,0.0,0.0,0.0,36800.0,-25541,,,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,,False,False,,,,,,,RETIRED_PENSIONER,,,0.0,1.0,1.0,,,,CL,,True,F,PRIMARY_MOBILE,1,,,,,,,,,,,,,,
1526656,2703452,91,0,10,1,,,,,102738.757812,-15768.0,,2.0,2.0,0.0,3.0,2.0,2fc785b2,a55475b1,a55475b1,0.0,,,,,,,,,,,,,,,,,1.0,a55475b1,a55475b1,3.0,,,,,,,,,,,,14.0,,,0.0,4.0,0.0,14232.400391,7788.800293,0.000000,0.0,0.0,0.0,0.0,0.0,3.0,-12.0,,-16.0,1.0,2662.400146,,,,1500.599976,CA,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,60000.000000,CAL,0.000000,0.000000,,,-569.0,4.0,0.0,60000.000000,GBA,0.0,-350.0,0.419922,,,,-720.0,0.0,,CASH,0.419922,,True,,,-409.0,-45.0,P159_130_59,P75_90_70,3998.0,-413.0,P180_60_137,-569.0,,,,a55475b1,a55475b1,,,a55475b1,a55475b1,,T,24000.0,0.0,0.0,6600.000000,,,-27.0,,17143.400391,4.0,-597.0,5.0,0.0,4.0,0.0,0.0,0.0,4.0,3243.400146,4182.000000,0.000000,,-27.0,-55.0,1.0,9.0,0.0,0.0,0.0,1.0,9.0,6.0,0.0,6.0,0.0,0.0,8.0,6.0,9.0,6.0,6.0,3.0,3.0,6.0,6.0,6.0,3.0,3.0,9.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,False,OTHER,OTHER,,0.666504,0.000000,0.333252,0.111084,0.000000,11.0,0.0,0.0,0.0,P133_44_167,0.0,0.0,1.0,0.000000,0.000000,0.000000,24002.000000,,BO,,,0.0,9048.000000,,0.0,,,80000.000000,0.000000,0.0,34000.0,1.0,0.000000,,-413.0,-45.0,-409.0,-350.0,-350.0,-977.0,-14.0,a55475b1,P133_44_167,a55475b1,P67_102_161,a55475b1,a55475b1,a55475b1,,,,,COL,MARRIED,POS,True,,14.0,T,14.0,2.0,,,,,15552.0,-4.0,ad68e80f,5.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,30000.0,-15771,,,P133_44_167,P59_150_74,a55475b1,a55475b1,a55475b1,P209_127_106,P19_11_176,P11_15_81,,False,False,,,,,,,PRIVATE_SECTOR_EMPLOYEE,,,0.0,1.0,1.0,,,,CL,,False,M,PRIMARY_MOBILE,0,,,,,,,,,,,,,,
1526657,2703453,91,0,10,1,,,-4616.0,,212683.296875,-25808.0,,2.0,2.0,1.0,4.0,1.0,2fc785b2,6b2ae0fa,a55475b1,1.0,,,,,,,,,,,,,,,,,3.0,3439d993,a55475b1,4.0,,,15792.400391,,14.0,,,,,,,12.0,,,2.0,1.0,0.0,197371.578125,1195.400024,2827.199951,0.0,0.0,36.0,0.0,0.0,9.0,-33.0,-64.0,-34.0,0.0,8212.600586,,0.0,47943.062500,9921.200195,CA,,0.0,0.0,0.0,0.0,1.0,0.0,0.0,,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,7.0,23.0,0.0,6000.000000,CAL,46806.601562,46806.601562,-2350.0,,-994.0,38.0,0.0,6000.000000,GBA,0.0,,0.419922,,,-1202.0,-2624.0,1.0,,CASH,0.419922,,True,,,-287.0,-292.0,a55475b1,a55475b1,0.0,-292.0,a55475b1,-994.0,,,,P159_130_59,P174_113_42,2198.000000,-2656.0,a55475b1,a55475b1,,A,,0.0,0.0,163202.000000,,-66.0,0.0,-33.0,126780.000000,2.0,-1878.0,13.0,0.0,0.0,0.0,0.0,0.0,34.0,88740.804688,94265.203125,81604.601562,2827.199951,-68.0,-68.0,2.0,79.0,2.0,1.0,1.0,0.0,119.0,73.0,30.0,89.0,7.0,9.0,109.0,74.0,119.0,78.0,73.0,12.0,12.0,61.0,61.0,61.0,13.0,23.0,115.0,119.0,17.0,17.0,17.0,17.0,0.0,0.0,0.0,False,OTHER,OTHER,,0.696289,0.043488,0.205322,0.108093,0.099121,6.0,0.0,0.0,0.0,P123_6_84,0.0,0.0,4.0,46806.601562,46806.601562,46806.601562,440145.312500,5654.399902,BO,,,0.0,5981.399902,179.423996,0.0,398.024017,198.024002,123800.000000,34550.855469,0.0,76000.0,33.0,46806.601562,780825920.0,-292.0,-292.0,-287.0,-293.0,4.0,,-261.0,a55475b1,P123_6_84,a55475b1,P46_145_78,a55475b1,a55475b1,a55475b1,33059.0,0.0,AC,14.0,REL,MARRIED,POS,False,True,48.0,K,48.0,8.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,13454.0,13333.400391,822.0,0.0,6.8,0.0,30000.0,-25814,,,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,a55475b1,,False,False,,,,,,,RETIRED_PENSIONER,,,0.0,1.0,1.0,,,,CL,,False,F,PRIMARY_MOBILE,1,44916.644531,-861.0,-1956.0,1.0,,,,-1956.0,1.0,,,,,


In [23]:
# @ref: https://www.kaggle.com/code/ravi20076/homecredit-starter-training-v1/notebook Processing
feature_columns = train_df.drop(columns=["case_id", "WEEK_NUM", "target"], errors="ignore").columns
feature_ix_columns = [col + "_ix" for col in feature_columns]
category_columns = train_df.select_dtypes(include="object").columns

print(f"feature_columns: {len(feature_columns)}, category_columns: {len(category_columns)}")

In [30]:
# category to int
feature_d = {}

for col in feature_columns:
    vc = train_df[col].fillna("nan_value").astype(str).value_counts()
    d = {v: i for i, v in enumerate(vc.index, start=1)}
    # TODO: 代入先を新しいdfにする
    train_df[col+"_ix"] = np.vectorize(lambda v: d.get(v, 0))(train_df[col].fillna("nan_value").astype(str)).astype(np.int32)

    feature_d[col] = d

    del train_df[col]
    gc.collect()

feature_num_vocabs = [len(feature_d[v]) + 1 for v in feature_columns]

for col, num_vocabs in zip(feature_columns, feature_num_vocabs):
    print(col, num_vocabs)

KeyError: 'month_decision'

In [None]:
# aggregate unique_feature
unique_train_df = train_df[feature_ix_columns].drop_duplicates()
unique_train_df["unique_ix"] = np.arange(unique_train_df.shape[0])

train_df = train_df.merge(unique_train_df, on=feature_ix_columns, how="left")
unique_feature = tf.convert_to_tensor(unique_train_df[feature_ix.columns].values)

In [None]:
from src.dataloader import DataLoader
from src.models import DCNV2
from src.processor import Trainer, Predictor

dataloader = DataLoader(train_data, train_labels, unique_feature)
model = DCNV2(
    feature_num_vocabs=feature_num_vocabs,
    feat_dim=8,
    out_dim=32,
    num_cross=5,
    num_linear=0,
)
# Predictor不要かも
predictor = Predictor(out_dim=2)
trainer = Trainer(model, predictor)

SyntaxError: invalid syntax (2990837518.py, line 4)

### Training

In [None]:
@tf.function(experimental_relax_shapes=True)
def forward_step(batch_inputs):
    with tf.GradientTape() as tape:
        loss, f1, acc = trainer(batch_inputs, training=True)

    gradients = tape.gradient(loss, trainer.trainable_variables)
    optimizer.apply_gradients(zip(gradients, trainer.trainable_variables))

    return loss, f1, acc