Thanks [Firat Gonen](http://https://www.kaggle.com/frtgnn) for helping me with this model on Ion Switching competition. This is a model we implemented in the initial stages of the competition and thus have an embarrasing score! :)

# Necessary imports

In [None]:
import gc
import numpy as np
import pandas as pd
import xgboost as xgb
from math import sqrt
import seaborn as sns
import lightgbm as lgb
import matplotlib.pyplot as plt
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import mean_squared_error,f1_score, accuracy_score

# Load train and test data

In [None]:
train = pd.read_csv('../input/ion-switch-model-ready-data-frame-to-work-locally/train_ion_switch.csv')
test  = pd.read_csv('../input/ion-switch-model-ready-data-frame-to-work-locally/test_ion_switch.csv')

# Memory Reduction
Else file will crash due to excessive memory usage

In [None]:
def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2    
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)    
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

In [None]:
train = reduce_mem_usage(train)
test = reduce_mem_usage(test)

In [None]:
y     = train['open_channels']
train = train.drop(['open_channels'],axis=1)

# K-Fold Technique for Cross-Validation

In [None]:
id_train = train['time']
id_test  = test['time']

train = train.drop('time', axis = 1)
test  = test.drop( 'time', axis = 1)

nfolds = 4
folds = KFold(n_splits=4, shuffle=True, random_state=4590)

In [None]:
params = {'min_child_weight': 7, 'colsample_bytree': 0.7, 'max_depth': 10, 'eta': 0.2,
            'subsample': 0.6, 'lambda': 2, 'nthread': -1, 'booster' : 'gbtree', 'silent': 1, 'gamma' : 0, 'alpha': 1,
            'eval_metric': 'rmse', 'objective': 'reg:squarederror'}

# Train the model

In [None]:
#feature_importance_df = np.zeros((train.shape[1], nfolds))
mvalid = np.zeros(len(train))
mfull  = np.zeros(len(test))

for fold_, (trn_idx, val_idx) in enumerate(folds.split(train.values, train.values)):
    print('----')
    print("fold nÂ°{}".format(fold_))
    
    x0,y0 = train.iloc[trn_idx], y[trn_idx]
    x1,y1 = train.iloc[val_idx], y[val_idx]
    
    print(y0.size, y1.size)
    
    pd.DataFrame(y1).to_csv('y_test_fold' + str(fold_) + '.csv', index=False)
    pd.DataFrame(y0).to_csv('y_test_exp_fold' + str(fold_) + '.csv', index=False)
    
    trn_data = xgb.DMatrix(x0, label= y0); val_data = xgb.DMatrix(x1, label= y1)
    
    num_round = 2000
    clf = xgb.train(params, trn_data, num_round, evals = [(val_data, "val_data")], 
                    verbose_eval=200, early_stopping_rounds = 100)
    mvalid[val_idx] = clf.predict(xgb.DMatrix(x1), ntree_limit=clf.best_iteration)
    
    pd.DataFrame(mvalid).to_csv('xgb_preds_exp_fold' + str(fold_) + '.csv', index=False)
    
    #feature_importance_df[:, fold_] = clf.feature_importance()
    
    mfull += clf.predict(xgb.DMatrix(test), ntree_limit=clf.best_iteration) / folds.n_splits
    
    pd.DataFrame(mfull).to_csv('xgb_preds_fold' + str(fold_) + '.csv', index=False)
    
np.sqrt(mean_squared_error(mvalid, y))

# File submission

In [None]:
sub = pd.read_csv("../input/liverpool-ion-switching/sample_submission.csv")

submission = pd.DataFrame()
submission['time']  = sub['time']
submission['open_channels'] = mfull
submission['open_channels'] = submission['open_channels'].round(decimals=0)   # We used the regressor, so to convert to the class values we round it to the nearest decimal.
submission['open_channels'] = submission['open_channels'].astype(int)         # And finally convert those values to integer to make the final submission file.
submission.to_csv('submission.csv', float_format='%0.4f', index = False)

In [None]:
submission.tail()