# 1. SETTINGS

In [1]:
import numpy as np
import pandas as pd
pd.set_option("display.max_columns", None)

import matplotlib.pyplot as plt
import seaborn as sns 

import lightgbm as lgb

import os
import time
import multiprocessing

from sklearn.metrics import log_loss
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix

import tsfresh
from tsfresh import extract_features
from tsfresh.utilities.dataframe_functions import impute

import warnings
warnings.filterwarnings('ignore')

import gc
gc.enable()

In [2]:
### FUNCTION 4
def remove_bands(df):
    
    ##### INDIVIDUAL VARIABLES
    
    # extract some bands
    t2 = df.loc[:, df.columns.str.endswith('_p2')].divide(3)
    t3 = df.loc[:, df.columns.str.endswith('_p3')].divide(3)
    t4 = df.loc[:, df.columns.str.endswith('_p4')].divide(3)

    # rename columns
    t2.columns = [col.replace("_p2", "_p234") for col in t2.columns]
    t3.columns = [col.replace("_p3", "_p234") for col in t3.columns]
    t4.columns = [col.replace("_p4", "_p234") for col in t4.columns]

    # average
    t234 = t2.add(t3)
    t234 = t234.add(t4)

    # remove individual bands
    df = df.loc[:, ~df.columns.str.endswith('_p2')]
    df = df.loc[:, ~df.columns.str.endswith('_p3')]
    df = df.loc[:, ~df.columns.str.endswith('_p4')]

    # merge averaged band
    df = pd.concat([df, t234], axis = 1)
    
    
    ##### PASSBAND RATIOS
    
    # extract some bands
    t2 = df.filter(like = 'p2_p0').divide(3)
    t3 = df.filter(like = 'p3_p0').divide(3)
    t4 = df.filter(like = 'p4_p0').divide(3)

    # rename columns
    t2.columns = [col.replace("p2_p0", "p234_p0") for col in t2.columns]
    t3.columns = [col.replace("p3_p0", "p234_p0") for col in t3.columns]
    t4.columns = [col.replace("p4_p0", "p234_p0") for col in t4.columns]

    # average
    t234 = t2.add(t3)
    t234 = t234.add(t4)

    # remove individual bands
    #drops = list(df.filter(like = 'p2_p0').columns) + list(df.filter(like = 'p3_p0').columns) + list(df.filter(like = 'p4_p0').columns)
    #keeps = [f for f in df.columns if f not in drops]
    #df = df[keeps]

    # merge averaged band
    df = pd.concat([df, t234], axis = 1)
        
    return df

In [3]:
### FUNCTION 5
def add_dist_ratios(df):
    
    # compute ratios
    df['dist_by_med_flux_p0'] = df['distmod'] - df['flux_median_p0']
    df['dist_by_med_flux_p1'] = df['distmod'] - df['flux_median_p1']
    df['dist_by_med_flux_p2'] = df['distmod'] - df['flux_median_p2']
    df['dist_by_med_flux_p3'] = df['distmod'] - df['flux_median_p3']
    df['dist_by_med_flux_p4'] = df['distmod'] - df['flux_median_p4']
    df['dist_by_med_flux_p5'] = df['distmod'] - df['flux_median_p5']
       
    return df

# 2. DATA PREPARATION

## TRAIN

## TEST

## MERGER AND SCALING

In [4]:
### IMPORT READY DATA
data = pd.read_csv('../input/data_v10_merged.csv')
data.shape

(3500738, 422)

In [5]:
# drop some features
oof_df = data[['object_id']]
del data['object_id'], data['hostgal_specz']

In [6]:
# impute inf & null
data.replace(to_replace = [-np.inf, np.inf], value = np.nan, inplace = True)
data_mean = data.median(axis = 0, skipna = True)
data.fillna(data_mean, inplace = True)
data = data.astype('float32')

In [7]:
# rescale
from sklearn.preprocessing import StandardScaler, MinMaxScaler
ss = MinMaxScaler()
data = ss.fit_transform(data)
data = pd.DataFrame(data)
data.shape

(3500738, 420)

# 3. AUTOENCODER

In [39]:
# libraries
import keras
from keras.datasets import mnist
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Flatten, Reshape
from keras import regularizers
from keras import backend as K

In [40]:
# parameters
encoding_dim = 30
num_epochs   = 50
num_batch    = 250

In [41]:
# clear session
K.clear_session()

In [42]:
### AUTOENCODER

# dimensions
input_dim = data.shape[1]

# architecture type
autoencoder = Sequential()

# encoder layers
autoencoder.add(Dense(4 * encoding_dim, input_shape = (input_dim,), activation = 'relu'))
autoencoder.add(Dense(2 * encoding_dim, activation = 'relu'))
autoencoder.add(Dense(encoding_dim, activation = 'relu'))

# decoder layers
autoencoder.add(Dense(2 * encoding_dim, activation = 'relu'))
autoencoder.add(Dense(4 * encoding_dim, activation = 'relu'))
autoencoder.add(Dense(input_dim, activation = 'sigmoid'))

In [43]:
### ENCODER PART

# dimensions
input_img = Input(shape = (input_dim, ))

# encoder layers
encoder_layer1 = autoencoder.layers[0]
encoder_layer2 = autoencoder.layers[1]
encoder_layer3 = autoencoder.layers[2]
encoder = Model(input_img, encoder_layer3(encoder_layer2(encoder_layer1(input_img))))

In [None]:
### MODELING

# compile
autoencoder.compile(optimizer = 'adam', loss = 'binary_crossentropy')

# fit
autoencoder.fit(data, data,
                epochs     = num_epochs,
                batch_size = num_batch)

Epoch 1/50
Epoch 2/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
 309750/3500738 [=>............................] - ETA: 9:52 - loss: 0.340

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 7/50
Epoch 8/50

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 11/50
 595500/3500738 [====>.........................] - ETA: 7:22 - loss: 0.3407

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 14/50
Epoch 15/50
Epoch 16/50
 120250/3500738 [>.............................]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 17/50
Epoch 19/50

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 20/50
Epoch 21/50

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



 358750/3500738 [==>...........................] - ETA: 7:43 - loss: 0.3407

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 23/50

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 26/50

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



 635500/3500738 [====>.........................] - ETA: 6:42 - loss: 0.3407

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 29/50
Epoch 31/50


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 32/50
Epoch 33/50
 757500/3500738 [=====>........................] - ETA: 5:25 - loss: 0.3407

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 36/50
Epoch 37/50

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



 392250/3500738 [==>...........................] - ETA: 6:07 - loss: 0.3406

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Epoch 40/50

In [None]:
# predict
oof_preds = encoder.predict(data)
preds = pd.DataFrame(oof_preds)
preds.columns = ['auto' + str(l) for l in list(preds.columns)]
preds.insert(loc = 0, column = 'object_id', value = oof_df.object_id.reset_index(drop = True))

In [None]:
preds.describe()

In [None]:
preds.to_csv('../input/auto_f30_b250_e50.csv', index = False)
preds.shape

# 4. CV