In [12]:
import numpy as np
import pandas as pd
import torch
import tqdm
import os
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_boston, make_classification, make_regression
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, LabelEncoder, StandardScaler, RobustScaler
from sklearn import metrics
from sklearn.metrics import mean_squared_error, mean_squared_log_error, mean_absolute_error, median_absolute_error

from lightgbm import LGBMRegressor

pd.options.display.max_columns = None

In [2]:
cat_cols = ['Neutered/Spayed', 'Gender', 'MIXED_BREED_FLAG', 'BREED', 'neuter_dt_bool']
num_cols = ['DW_PET_ID', 'ALB', 'ALKP', 'ALT', 'AMYL', 'BUN', 'CA', 'CHOL', 'CREA', 'EOSINOPHIL', 'GLOB', 'GLU',
       'HCT', 'HGB', 'LYMPHOCYTE', 'MCH', 'MCHC', 'MCV', 'MONOCYTE', 'MPV',
       'PHOS', 'PLT', 'RBC', 'RDW', 'TBIL', 'TP', 'WBC']

In [9]:
X_train = pd.read_csv('data/X_train.csv')
X_valid = pd.read_csv('data/X_valid.csv')
y_train = pd.read_csv('data/y_train.csv')
y_valid = pd.read_csv('data/y_valid.csv')

In [13]:
model = LGBMRegressor(n_jobs=-1, objective="regression_l1")
model.fit(X_train, y_train)

print("Train MAE: ", mean_absolute_error(y_train, model.predict(X_train)))
print("MeanAE: ", mean_absolute_error(y_valid, model.predict(X_valid)))
print("MedianAE: ", median_absolute_error(y_valid, model.predict(X_valid)))

Train MAE:  2.0317840233479054
MeanAE:  2.0641994975445517
MedianAE:  1.4483482767917746


In [11]:
model = LGBMRegressor(n_jobs=-1, objective="regression_l1",
            boosting_type='dart',
            learning_rate=0.13110563910426518,
            max_depth=50,
            n_estimators=1000,
            num_leaves=200,
            reg_alpha=1.5921278701590583e-05,
            reg_lambda=1.0,
            subsample_for_bin=200000
)

model.fit(X_train, y_train)

print("Train MAE: ", mean_absolute_error(y_train, model.predict(X_train)))
print("MeanAE: ", mean_absolute_error(y_valid, model.predict(X_valid)))
print("MedianAE: ", median_absolute_error(y_valid, model.predict(X_valid)))

Train MAE:  1.9297663520882544
MeanAE:  2.02428259663832
MedianAE:  1.3721584083552105


In [None]:
X_test = pd.read_csv("data/X_test.csv")
X_test.head()

In [None]:
sub_df = X_test['DW_PET_VST_ID']
sub_df['AgeEstimate'] = model.predict(X_test.drop('DW_PET_VST_ID'))
sub_df.head()

In [None]:
sub_df.to_csv("data/submission.csv")