##### Load modules

In [1]:
import numpy as np
import pandas as pd
import lightgbm as lgb
from tensorflow import keras
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """
    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df

##### Inference on test data

In [3]:
df = pd.read_pickle('test.pickle')
df = df.sort_values('row_id')
df = df.drop(columns='row_id')

In [4]:
models = [lgb.Booster(model_file='{}.model'.format(i)) for i in range(3)]

In [5]:
def batch_inference(models, df, n_batch):
    
    result = []
    n_batch = int(n_batch)
    
    for idx in np.arange(0, df.shape[0], n_batch):
        progress = idx / df.shape[0] * 100
        progress = np.round(progress, 2)
        print('\r', progress, end='')
        
        start = idx
        end = (idx + n_batch)
        batch = df[start:end]
                
        pred = [model.predict(batch) for model in models]
        pred = np.mean(pred, axis=0)
        
        result.append(pred)
        
    result = np.concatenate(result)
    
    print('\r', '100.00')
    return result

In [6]:
y_pred = batch_inference(models, df, 1e4)
y_pred = np.clip(y_pred, 0, None)
y_pred = np.expm1(y_pred)

 100.00


In [7]:
df = pd.read_csv('sample_submission.csv')

In [8]:
df['meter_reading'] = y_pred

In [9]:
df.to_csv('submission.csv', index=False, float_format='%.4f')