In [1]:
import glob
from IPython.display import display
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import japanize_matplotlib
import seaborn as sns 
plt.style.use('ggplot')
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 200)

In [2]:
path_in_train = "../data/raw/train.csv"
path_in_weather = "../data/raw/weather.csv"
path_in_test = "../data/raw/test.csv"

In [3]:
train_df = pd.read_csv(path_in_train)
wea_df = pd.read_csv(path_in_weather)
test_df = pd.read_csv(path_in_test)

In [4]:
train_df.head()

Unnamed: 0,kind,date,amount,mode_price,area
0,だいこん,20051107,201445,735.0,千葉
1,だいこん,20051108,189660,840.0,千葉_各地_青森
2,だいこん,20051110,218166,735.0,千葉_各地_青森
3,だいこん,20051111,182624,682.5,千葉_青森
4,だいこん,20051112,220691,682.5,千葉_青森


In [5]:
test_df.head()

Unnamed: 0,kind,date,area
0,だいこん,20220502,千葉_各地_青森
1,だいこん,20220506,千葉_各地_青森
2,だいこん,20220507,千葉_各地_青森
3,だいこん,20220509,千葉_各地_青森
4,だいこん,20220510,千葉_各地_青森


In [6]:
wea_df.head()

Unnamed: 0,date,mean_temp,max_temp,max_temp_time,min_temp,min_temp_time,sum_rain,sun_time,mean_humid,area
0,20041106,13.1,20.1,2004/11/6 12:50,8.1,2004/11/6 05:31,5.0,1.6,77.0,青森
1,20041107,9.6,12.5,2004/11/7 13:40,5.4,2004/11/7 22:17,0.0,4.1,63.0,青森
2,20041108,9.0,15.5,2004/11/8 12:51,3.2,2004/11/8 06:28,0.0,8.9,72.0,青森
3,20041109,12.2,17.7,2004/11/9 14:40,5.7,2004/11/9 02:49,1.5,1.2,85.0,青森
4,20041110,11.6,16.4,2004/11/10 14:42,6.9,2004/11/10 06:23,0.0,7.5,89.0,青森


In [7]:
print(train_df.shape)
print(test_df.shape)
print(wea_df.shape)

(97782, 5)
(320, 3)
(204320, 10)


# 日次データ処理

In [8]:
for df in (train_df, wea_df, test_df):
    df['date'] = pd.to_datetime(df["date"], format="%Y%m%d")
    
for i in ("max_temp_time", "min_temp_time"):
    wea_df.loc[:, i] = pd.to_datetime(wea_df[i], format="%Y/%m/%d %H:%M")

# concat

In [9]:
all_df = pd.concat([train_df,test_df])
# sort
all_df = all_df.sort_values(["kind","date"])

In [10]:
all_df

Unnamed: 0,kind,date,amount,mode_price,area
90690,いんげん,2006-06-01,6960.0,1680.0,千葉_各地
90691,いんげん,2006-06-02,5840.0,1680.0,千葉_各地
90692,いんげん,2006-06-03,9320.0,1942.5,千葉_各地
90693,いんげん,2006-06-05,10241.0,1627.5,千葉_茨城
90694,いんげん,2006-06-06,5887.0,1522.5,千葉_茨城
...,...,...,...,...,...
75,レタス,2022-05-26,,,各地_茨城_長野
76,レタス,2022-05-27,,,各地_茨城_長野
77,レタス,2022-05-28,,,各地_茨城_長野
78,レタス,2022-05-30,,,各地_茨城_長野


# areaを合わせる

In [11]:
all_df["area"].unique(), wea_df["area"].unique()

(array(['千葉_各地', '千葉_茨城', '千葉_各地_茨城', '千葉', '福島', '各地_福島_青森', '各地_福島',
        '福島_青森', '各地_福島_長崎', '各地_福島_茨城', '福島_長崎', '各地', '各地_福島_鹿児島',
        '各地_沖縄', '各地_沖縄_鹿児島', '各地_鹿児島', '各地_長崎_鹿児島', '各地_長崎', '千葉_各地_鹿児島',
        '各地_福島_群馬', '千葉_各地_長崎', '栃木', '各地_栃木', '栃木_茨城', '各地_栃木_茨城',
        '各地_和歌山_群馬', '和歌山_群馬', '和歌山', '各地_和歌山', '各地_群馬', '山形', '群馬',
        '山形_群馬', '各地_秋田_群馬', '各地_山形_秋田', '長野', '千葉_埼玉', '千葉_各地_埼玉',
        '各地_埼玉', '埼玉', '青森', 'トンガ_北海道', 'トンガ', 'トンガ_北海道_各地', 'トンガ_メキシコ_各地',
        'トンガ_各地', 'トンガ_メキシコ', 'メキシコ', 'メキシコ_各地', 'ニュージーランド_沖縄', 'ニュージーランド',
        'ニュージーランド_各地', 'ニュージーランド_鹿児島', 'ニュージーランド_各地_鹿児島', '各地_神奈川_茨城',
        '各地_茨城', '北海道_各地', '北海道', '北海道_各地_青森', '北海道_青森', 'ニュージーランド_各地_沖縄',
        '各地_神奈川', '神奈川', 'メキシコ_各地_鹿児島', 'メキシコ_鹿児島', '鹿児島', '神奈川_茨城',
        '北海道_各地_茨城', '北海道_茨城', 'ニュージーランド_メキシコ', 'ニュージーランド_メキシコ_各地',
        'メキシコ_北海道', 'メキシコ_北海道_各地', '各地_茨城_鹿児島', '各地_埼玉_群馬', '千葉_各地_宮崎',
        '各地_埼玉_福島', '各地_岩手_福島', '各地_埼玉_宮崎', '岩手', '各地_宮崎_高知', '各地_宮崎_群馬',

## 卸売データ
各地＝全国平均、複数県＝平均値

In [12]:
# 卸売データのエリア
area_pairs = all_df["area"].unique()

yasai_areas = set()

for area_pair in area_pairs:
    areas = area_pair.split("_")
    yasai_areas = (yasai_areas | set(areas)) # 論理和
    
    
# 天候データのエリア
wea_areas = wea_df["area"].unique()

In [13]:
# マッピングのための辞書を作成
area_map = {}
update_area_map = {
    '岩手':'盛岡','宮城':'仙台','静岡':'浜松','沖縄':'那覇','神奈川':'横浜','愛知':'名古屋','茨城':'水戸','北海道':'帯広','各地':'全国',
    '兵庫':'神戸','香川':'高松','埼玉':'熊谷','国内':'全国','山梨':'甲府','栃木':'宇都宮','群馬':'前橋','愛媛':'松山'
}

for yasai_area in yasai_areas:
    if (yasai_area in wea_areas):
        area_map[yasai_area] = yasai_area
    elif (yasai_area in update_area_map):
        area_map[yasai_area] = update_area_map[yasai_area]
    else:
        area_map[yasai_area] = "全国"
        
area_map

{'中国': '全国',
 '和歌山': '和歌山',
 'カナダ': '全国',
 '千葉': '千葉',
 '徳島': '徳島',
 '静岡': '浜松',
 '愛知': '名古屋',
 '高知': '高知',
 'メキシコ': '全国',
 '埼玉': '熊谷',
 '兵庫': '神戸',
 '香川': '高松',
 'ニュージーランド': '全国',
 '神奈川': '横浜',
 '宮崎': '宮崎',
 '長野': '長野',
 '沖縄': '那覇',
 'トンガ': '全国',
 'アメリカ': '全国',
 '熊本': '熊本',
 '青森': '青森',
 '東京': '東京',
 '福島': '福島',
 '各地': '全国',
 '茨城': '水戸',
 '福岡': '福岡',
 '宮城': '仙台',
 '山形': '山形',
 '群馬': '前橋',
 '山梨': '甲府',
 '秋田': '秋田',
 '栃木': '宇都宮',
 '岩手': '盛岡',
 '北海道': '帯広',
 '新潟': '新潟',
 '佐賀': '佐賀',
 '鹿児島': '鹿児島',
 '愛媛': '松山',
 '長崎': '長崎'}

In [14]:
# 卸売データのareaを置換
all_df["area"] = all_df["area"].apply(lambda x: "_".join([area_map[i] for i in x.split("_")]))
all_df.head()

Unnamed: 0,kind,date,amount,mode_price,area
90690,いんげん,2006-06-01,6960.0,1680.0,千葉_全国
90691,いんげん,2006-06-02,5840.0,1680.0,千葉_全国
90692,いんげん,2006-06-03,9320.0,1942.5,千葉_全国
90693,いんげん,2006-06-05,10241.0,1627.5,千葉_水戸
90694,いんげん,2006-06-06,5887.0,1522.5,千葉_水戸


## 天候データ
複数エリアに跨る場合はそれらの平均を取る

In [15]:
# datetime型の平均の取り方がわからないので削除
wea_df = wea_df.drop(columns=["max_temp_time","min_temp_time"])

In [16]:
# wea_dfに全国を追加する
agg_cols = [i for i in wea_df.columns if i not in ["area","date"]]
tmp_df = wea_df.groupby(["date"])[agg_cols].agg(["mean"]).reset_index()

new_cols = []
for col1,col2 in tmp_df.columns:
    new_cols.append(col1)
tmp_df.columns = new_cols

tmp_df["area"] = "全国"
tmp_df["date"] = wea_df[wea_df["area"]=="千葉"]["date"].values
tmp_df = tmp_df[wea_df.columns]

wea_df = pd.concat([wea_df, tmp_df])
wea_df.tail()

Unnamed: 0,date,mean_temp,max_temp,min_temp,sum_rain,sun_time,mean_humid,area
6380,2022-04-26,20.146875,23.94375,16.74375,19.71875,0.546875,85.0,全国
6381,2022-04-27,19.634375,24.26875,15.19375,5.40625,3.33125,77.21875,全国
6382,2022-04-28,16.965625,21.95,12.609375,0.0625,7.79375,61.9375,全国
6383,2022-04-29,13.8875,18.603125,9.59375,28.453125,1.03125,78.15625,全国
6384,2022-04-30,13.34375,18.515625,8.5,0.296875,10.0375,61.125,全国


In [17]:
area_pairs = all_df["area"].unique()
target_cols = [i for i in wea_df.columns if i not in("area","date")]
date = wea_df[wea_df["area"]=="千葉"]["date"]

In [18]:
area_pair_dfs = []

for area_pair in area_pairs:
    areas = area_pair.split("_")
    # 全ての値が０のDFを作成
    base_tmp_df = pd.DataFrame(np.zeros(wea_df[wea_df["area"]=="千葉"][target_cols].shape), columns=target_cols)
    for area in areas:
        tmp_df = wea_df[wea_df["area"]==area].reset_index(drop=True)[target_cols]
        base_tmp_df = base_tmp_df.add(tmp_df)
    base_tmp_df /= len(areas)
    base_tmp_df["area"] = area_pair
    base_tmp_df["date"] = date.to_list()
    area_pair_dfs.append(base_tmp_df)
    

# for area_pair in area_pairs:
#     areas = area_pair.split('_')
#     if len(areas) > 0:
#         area = areas[0]
#         base_tmp_df = wea_df[wea_df['area'] == area]
#         base_tmp_df = base_tmp_df[target_cols].reset_index(drop=True)
#         for area in areas[1:]:
#             tmp_df = wea_df[wea_df['area'] == area]
#             tmp_df = tmp_df[target_cols].reset_index(drop=True)
#             base_tmp_df = base_tmp_df.add(tmp_df)
#         base_tmp_df /= len(areas)
#         base_tmp_df['area'] = area_pair
#         area_pair_dfs.append(base_tmp_df)

In [19]:
area_pair_df = pd.concat(area_pair_dfs)
area_pair_df

Unnamed: 0,mean_temp,max_temp,min_temp,sum_rain,sun_time,mean_humid,area,date
0,16.462500,21.051562,12.754687,0.218750,6.007812,77.906250,千葉_全国,2004-11-06
1,16.168750,21.387500,12.003125,0.000000,8.017188,72.703125,千葉_全国,2004-11-07
2,15.409375,19.607813,11.539062,0.007812,3.462500,71.218750,千葉_全国,2004-11-08
3,16.243750,21.656250,11.640625,0.117188,7.773438,73.281250,千葉_全国,2004-11-09
4,16.589062,21.807812,11.450000,2.726562,6.204688,75.140625,千葉_全国,2004-11-10
...,...,...,...,...,...,...,...,...
6380,20.000000,22.400000,16.800000,14.500000,0.000000,90.500000,水戸_浜松,2022-04-26
6381,21.500000,26.050000,16.900000,14.750000,1.400000,80.000000,水戸_浜松,2022-04-27
6382,16.650000,21.000000,12.750000,0.000000,5.750000,60.000000,水戸_浜松,2022-04-28
6383,13.400000,17.900000,9.500000,33.500000,0.650000,78.500000,水戸_浜松,2022-04-29


In [20]:
wea_df = area_pair_df

#  結合

## all_df + wea_df
area,dateをキーとしてmerge

In [21]:
all_df = pd.merge(all_df, wea_df, on=['date', 'area'], how='left')
all_df

Unnamed: 0,kind,date,amount,mode_price,area,mean_temp,max_temp,min_temp,sum_rain,sun_time,mean_humid
0,いんげん,2006-06-01,6960.0,1680.0,千葉_全国,21.787500,27.742188,17.728125,0.023438,9.517188,64.625000
1,いんげん,2006-06-02,5840.0,1680.0,千葉_全国,20.820312,23.975000,17.831250,2.242188,1.482812,72.968750
2,いんげん,2006-06-03,9320.0,1942.5,千葉_全国,19.114062,22.946875,16.275000,0.007812,3.910938,72.171875
3,いんげん,2006-06-05,10241.0,1627.5,千葉_水戸,17.450000,21.050000,13.500000,0.000000,2.350000,76.000000
4,いんげん,2006-06-06,5887.0,1522.5,千葉_水戸,19.100000,24.050000,14.950000,6.250000,4.200000,77.000000
...,...,...,...,...,...,...,...,...,...,...,...
98097,レタス,2022-05-26,,,全国_水戸_長野,,,,,,
98098,レタス,2022-05-27,,,全国_水戸_長野,,,,,,
98099,レタス,2022-05-28,,,全国_水戸_長野,,,,,,
98100,レタス,2022-05-30,,,全国_水戸_長野,,,,,,


# ラグ特徴量

In [22]:
# wea_dfに５月も追加
# 5月の日付を取得
import datetime
start = datetime.datetime.strptime("2022-05-01", "%Y-%m-%d")
may_date = pd.date_range(start, periods=31)

for area in wea_df["area"].unique():
    # areaとdate意外NANのDF作る
    maywea_df = pd.DataFrame(columns=wea_df.columns,
                             data={"date":may_date,
                                   "area":area}
                            )
    # dtypesをfloat64に戻す
    cols = [i for i in maywea_df.columns if i not in ("date","area")]
    maywea_df[cols] = maywea_df[cols].astype("float64")
    # wea_dfとconcat
    wea_df = pd.concat([wea_df,maywea_df])
# area,dateでソート
wea_df = wea_df.sort_values(["area","date"])
wea_df

Unnamed: 0,mean_temp,max_temp,min_temp,sum_rain,sun_time,mean_humid,area,date
0,13.0,18.0,9.0,0.0,6.6,82.0,仙台,2004-11-06
1,13.7,18.6,8.9,0.0,9.4,60.0,仙台,2004-11-07
2,11.1,14.8,7.5,0.0,0.4,72.0,仙台,2004-11-08
3,13.1,19.3,7.2,0.0,9.0,77.0,仙台,2004-11-09
4,13.6,19.0,9.3,0.0,7.9,82.0,仙台,2004-11-10
...,...,...,...,...,...,...,...,...
26,,,,,,,鹿児島,2022-05-27
27,,,,,,,鹿児島,2022-05-28
28,,,,,,,鹿児島,2022-05-29
29,,,,,,,鹿児島,2022-05-30


In [23]:
# 単純ラグ特徴量
def add_lag_feat(all_df, wea_df, nshift):
    
    # mode_price, amount
    for value in ["mode_price", "amount"]:
        df_wide = all_df.pivot(index="date",columns="kind",values=value)
        df_wide_lag = df_wide.shift(nshift)
        df_long_lag = df_wide_lag.stack().reset_index()
        df_long_lag.columns = ["date", "kind", "{}_{}prev".format(value,nshift)]
        
        all_df = pd.merge(all_df, df_long_lag, on=['date', 'kind'], how='left')
        
    # wether
    cols = [i for i in wea_df.columns if i not in ("area","date")]
    for value in cols:
        
        df_wide = wea_df.pivot(index="date",columns="area",values=value)
        df_wide_lag = df_wide.shift(nshift)
        df_long_lag = df_wide_lag.stack().reset_index()
        df_long_lag.columns = ["date", "area", "{}_{}prev".format(value,nshift)]
        
        all_df = pd.merge(all_df, df_long_lag, on=['date', 'area'], how='left')
        
    return all_df

In [24]:
all_df = add_lag_feat(all_df,wea_df,31)
all_df

Unnamed: 0,kind,date,amount,mode_price,area,mean_temp,max_temp,min_temp,sum_rain,sun_time,mean_humid,mode_price_31prev,amount_31prev,mean_temp_31prev,max_temp_31prev,min_temp_31prev,sum_rain_31prev,sun_time_31prev,mean_humid_31prev
0,いんげん,2006-06-01,6960.0,1680.0,千葉_全国,21.787500,27.742188,17.728125,0.023438,9.517188,64.625000,,,20.501562,25.242188,16.687500,1.804688,8.170968,70.750000
1,いんげん,2006-06-02,5840.0,1680.0,千葉_全国,20.820312,23.975000,17.831250,2.242188,1.482812,72.968750,,,15.135938,20.078125,10.784375,11.578125,1.353125,76.625000
2,いんげん,2006-06-03,9320.0,1942.5,千葉_全国,19.114062,22.946875,16.275000,0.007812,3.910938,72.171875,,,13.131250,18.853125,8.171875,0.000000,12.228125,59.687500
3,いんげん,2006-06-05,10241.0,1627.5,千葉_水戸,17.450000,21.050000,13.500000,0.000000,2.350000,76.000000,,,18.700000,23.800000,12.750000,0.000000,10.950000,68.000000
4,いんげん,2006-06-06,5887.0,1522.5,千葉_水戸,19.100000,24.050000,14.950000,6.250000,4.200000,77.000000,,,19.450000,24.300000,15.350000,0.000000,9.250000,64.500000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98097,レタス,2022-05-26,,,全国_水戸_長野,,,,,,,1944.0,78486.0,18.853125,26.819792,11.769792,0.010417,10.411458,70.916667
98098,レタス,2022-05-27,,,全国_水戸_長野,,,,,,,2268.0,108357.0,20.082292,24.447917,16.281250,10.906250,0.448958,81.000000
98099,レタス,2022-05-28,,,全国_水戸_長野,,,,,,,2160.0,96217.0,18.744792,23.156250,13.464583,7.468750,3.043750,77.072917
98100,レタス,2022-05-30,,,全国_水戸_長野,,,,,,,1944.0,124337.0,12.062500,16.834375,7.731250,28.651042,0.777083,78.052083


In [94]:
all_df.corr()

Unnamed: 0,amount,mode_price,mean_temp,max_temp,min_temp,sum_rain,sun_time,mean_humid,mode_price_31prev,amount_31prev,mean_temp_31prev,max_temp_31prev,min_temp_31prev,sum_rain_31prev,sun_time_31prev,mean_humid_31prev
amount,1.0,0.056723,-0.035818,-0.031457,-0.03846,-0.003119,0.006664,0.018325,0.156461,0.814587,-0.029531,-0.02437,-0.033181,-0.009564,0.014052,0.011768
mode_price,0.056723,1.0,-0.011508,-0.012425,-0.011108,0.016256,0.003702,0.02791,0.814206,0.137841,0.006507,0.005752,0.006983,0.009936,0.006795,0.031872
mean_temp,-0.035818,-0.011508,1.0,0.985159,0.986274,0.142074,-0.001849,0.417897,-0.037212,-0.000867,0.772952,0.760456,0.762677,0.125383,0.016731,0.268633
max_temp,-0.031457,-0.012425,0.985159,1.0,0.948341,0.083922,0.116706,0.34213,-0.030302,-0.000126,0.744485,0.735378,0.732514,0.121352,0.018136,0.259856
min_temp,-0.03846,-0.011108,0.986274,0.948341,1.0,0.190818,-0.111029,0.48167,-0.042769,-0.001963,0.781159,0.766408,0.772579,0.123104,0.016014,0.270319
sum_rain,-0.003119,0.016256,0.142074,0.083922,0.190818,1.0,-0.423827,0.445202,0.009034,0.004672,0.119944,0.117925,0.115946,0.018749,0.013703,0.029835
sun_time,0.006664,0.003702,-0.001849,0.116706,-0.111029,-0.423827,1.0,-0.642882,0.021738,0.006308,-0.092332,-0.088978,-0.092164,-0.003466,0.004346,-0.054961
mean_humid,0.018325,0.02791,0.417897,0.34213,0.48167,0.445202,-0.642882,1.0,0.028397,0.02858,0.420966,0.415606,0.41332,0.059316,-0.0169,0.282421
mode_price_31prev,0.156461,0.814206,-0.037212,-0.030302,-0.042769,0.009034,0.021738,0.028397,1.0,0.133865,-0.047774,-0.04037,-0.052588,-0.003851,0.013051,0.025276
amount_31prev,0.814587,0.137841,-0.000867,-0.000126,-0.001963,0.004672,0.006308,0.02858,0.133865,1.0,-0.002076,-0.000313,-0.005524,0.00273,0.013992,0.011122


## ラベルエンコーディング

In [121]:
from sklearn.preprocessing import LabelEncoder

def get_labelencoding(all_df):
    cols = all_df.dtypes[all_df.dtypes=="object"].index
    for col in cols:
        all_df.loc[:, col] = all_df[col].fillna("NaN")
        le = LabelEncoder()
        all_df.loc[:, col] = le.fit_transform(all_df[col])

    return all_df

In [122]:
all_df_label = get_labelencoding(all_df)

In [125]:
all_df_label.dtypes

kind                          int64
date                 datetime64[ns]
amount                      float64
mode_price                  float64
area                          int64
mean_temp                   float64
max_temp                    float64
min_temp                    float64
sum_rain                    float64
sun_time                    float64
mean_humid                  float64
mode_price_31prev           float64
amount_31prev               float64
mean_temp_31prev            float64
max_temp_31prev             float64
min_temp_31prev             float64
sum_rain_31prev             float64
sun_time_31prev             float64
mean_humid_31prev           float64
dtype: object

# 予測モデル

In [127]:
date = datetime.datetime(2022,5,1)
train = all_df_label[all_df_label["date"]<date]
test = all_df_label[all_df_label["date"]>=date]

In [128]:
cols = [i for i in train.columns if i not in ("amount", "mode_price","date")]
train_x = train[cols]
train_y = train["mode_price"]
test = test[cols]

In [129]:
print(train_x.shape)
print(train_y.shape)
print(test.shape)

(97782, 16)
(97782,)
(320, 16)


In [130]:
import shap
import lightgbm as lgb

In [131]:
val_st = datetime.datetime(2021,5,1)
tr = train[train["date"]<val_st]
va = train[train["date"]>=val_st]
tr_x = train[cols]
tr_y = train["mode_price"]
va_x = va[cols]
va_y = va["mode_price"]

# 

#入力データセット
dtrain = lgb.Dataset(tr_x, tr_y)
dvalid = lgb.Dataset(va_x, va_y, reference=dtrain)

#学習
params = {
    "boosting_type": "gbdt",
    "objective": "regression",
    "metric": "mae",
    "learning_rate": 0.3,
    "num_leaves": 31,
    "colsample_bytree": 0.5,
    "reg_lambda": 5,
    "random_state": 71,
    "num_boost_round": 5000,
    "verbose_eval": False,
    "early_stopping_rounds": 100
}

model = lgb.train(params,
                  dtrain,
                  valid_sets=[dtrain, dvalid],
                  num_boost_round = 10000,
                  early_stopping_rounds=100,
                  verbose_eval=100)

Found `num_boost_round` in params. Will use it instead of argument
'early_stopping_rounds' argument is deprecated and will be removed in a future release of LightGBM. Pass 'early_stopping()' callback via 'callbacks' argument instead.
'verbose_eval' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.


You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3836
[LightGBM] [Info] Number of data points in the train set: 97782, number of used features: 16
[LightGBM] [Info] Start training from score 1341.207081
Training until validation scores don't improve for 100 rounds
[100]	training's l1: 267.532	valid_1's l1: 285.714
[200]	training's l1: 246.866	valid_1's l1: 263.003
[300]	training's l1: 232.646	valid_1's l1: 244.831
[400]	training's l1: 220.699	valid_1's l1: 230.626
[500]	training's l1: 210.41	valid_1's l1: 218.242
[600]	training's l1: 201.236	valid_1's l1: 206.261
[700]	training's l1: 192.845	valid_1's l1: 196.675
[800]	training's l1: 185.183	valid_1's l1: 188.718
[900]	training's l1: 178.244	valid_1's l1: 182.174
[1000]	training's l1: 171.847	valid_1's l1: 175.755
[1100]	training's l1: 165.855	valid_1's l1: 169.92
[1200]	training's l1: 159.893	valid_1's l1: 163.103
[1300]	training's l1