In [13]:
import os
import pandas as pd
import plotly.express as px
import lightgbm as lgb
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler

In [14]:
# -----------
# DEFINTIIONS
# -----------
MODEL = "lgbm"

In [15]:
# make output directory
path = os.path.join("images", "models", MODEL)
if not os.path.exists(path):
    os.makedirs(path)

In [16]:
# load data
train = pd.read_csv('data/train.csv')
test = pd.read_csv('data/test.csv')

In [17]:
# prepare submission
submission = test[['id']]

In [18]:
# remove outliers
train = train[train['target']>0]

In [19]:
# split out the target
x_train = train.loc[:, train.columns != 'target']
y_train = train['target']
x_test = test

In [20]:
# drop id
train = train.drop(['id'], axis=1)
test = test.drop(['id'], axis=1)

In [21]:
# scaling
sc = MinMaxScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [22]:
clf = lgb.LGBMRegressor()
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)

In [25]:
submission['target'] = y_pred
test['target'] = y_pred
train['label'] = 'train'
test['label'] = 'test'
df = pd.concat([train, test])
fig = px.histogram(df, x='target', color='label', marginal="box", barmode="overlay")
fig.write_image(os.path.join(path, "lgbm_scaled.png"), width=1080, height=720, scale=2)

In [24]:
submission.to_csv('submission.csv', index=False)