This kernel is mainly based on https://www.kaggle.com/isaienkov/lightgbm-fe-1-19 . I made some adjustments 

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
building_df = pd.read_csv("../input/ashrae-energy-prediction/building_metadata.csv")
weather_train = pd.read_csv("../input/ashrae-energy-prediction/weather_train.csv")
train = pd.read_csv("../input/ashrae-energy-prediction/train.csv")

train = train.merge(building_df, left_on = "building_id", right_on = "building_id", how = "left")
train = train.merge(weather_train, left_on = ["site_id", "timestamp"], right_on = ["site_id", "timestamp"])
del weather_train

In [None]:
train["timestamp"] = pd.to_datetime(train["timestamp"])
train["quarter"] = train["timestamp"].dt.quarter
train["month"] = train["timestamp"].dt.month
train["weekday"] = train["timestamp"].dt.weekday
train["hour"] = train["timestamp"].dt.hour
train["quarter"] = train["quarter"].astype(np.uint8)
train["month"] = train["month"].astype(np.uint8)
train["weekday"] = train['weekday'].astype(np.uint8)
train["hour"] = train['hour'].astype(np.uint8)
train['square_feet'] = np.log(train['square_feet'])

In [None]:
del train["timestamp"]

In [None]:
#train['wind_direction'] = train['wind_direction'].apply(degToCompass)
#train['beaufort_scale'] = train['beaufort_scale'].astype(np.uint8)
#train["wind_direction"] = train['wind_direction'].astype(np.uint8)
train["meter"] = train['meter'].astype(np.uint8)
train["site_id"] = train['site_id'].astype(np.uint8)

In [None]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
train["primary_use"] = le.fit_transform(train["primary_use"])

categoricals = ["building_id","site_id", "meter","primary_use", "quarter","month","weekday", "hour"]

In [None]:
drop_cols = ["sea_level_pressure", "wind_direction"]

numericals = ["square_feet", "air_temperature", "cloud_coverage",
              "dew_temperature", 'precip_depth_1_hr',"wind_speed","relative humidity","year_built","floor_count"]

feat_cols = categoricals + numericals

In [None]:
train.head()

In [None]:
train["relative humidity"] = train["dew_temperature"] / train["air_temperature"] *100

In [None]:
train = train[~(train['meter'].isin([0]) & (train['meter_reading'].isin([0])))]    ## 

train = train[~(train["building_id"].isin([1099]) & (train["meter"].isin([2])))]  ##building 1099 meter 2

train.head()

In [None]:
target = np.log1p(train["meter_reading"])

del train["meter_reading"] 

train = train.drop(drop_cols, axis = 1)

In [None]:
from sklearn.model_selection import KFold, StratifiedKFold
folds = 4
seed = 666

kf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=seed)
print(kf)


In [None]:
from sklearn.metrics import mean_squared_error
import lightgbm as lgb
from sklearn.model_selection import KFold, StratifiedKFold
from tqdm import tqdm


params = {
            'boosting_type': 'gbdt',
            'objective': 'regression',
            'metric': {'rmse'},
            'subsample': 0.25,
            'subsample_freq': 1,
            'learning_rate': 0.4,
            'num_leaves': 20,
            'feature_fraction': 0.9,
            'lambda_l1': 1,  
            'lambda_l2': 1
            }

folds = 2
seed = 666

kf = StratifiedKFold(n_splits=folds, shuffle=True, random_state=seed)

models = []
for train_index, val_index in kf.split(train, train['building_id']):
    train_X = train[feat_cols].iloc[train_index]
    val_X = train[feat_cols].iloc[val_index]
    train_y = target.iloc[train_index]
    val_y = target.iloc[val_index]
    lgb_train = lgb.Dataset(train_X, train_y, categorical_feature=categoricals)
    lgb_eval = lgb.Dataset(val_X, val_y, categorical_feature=categoricals)
    gbm = lgb.train(params,
                lgb_train,
                num_boost_round=500,
                valid_sets=(lgb_train, lgb_eval),
                early_stopping_rounds=100,
                verbose_eval = 100)
    models.append(gbm)

In [None]:
import gc
del train, train_X, val_X, lgb_train, lgb_eval, train_y, val_y, target
gc.collect()

In [None]:
test = pd.read_csv("../input/ashrae-energy-prediction/test.csv")
test = test.merge(building_df, left_on = "building_id", right_on = "building_id", how = "left")
del building_df
gc.collect()
test["primary_use"] = le.transform(test["primary_use"])

weather_test = pd.read_csv("../input/ashrae-energy-prediction/weather_test.csv")

test = test.merge(weather_test, left_on = ["site_id", "timestamp"], right_on = ["site_id", "timestamp"], how = "left")
del weather_test
gc.collect()

In [None]:
test["timestamp"] = pd.to_datetime(test["timestamp"])
test["hour"] = test["timestamp"].dt.hour
test["weekday"] = test["timestamp"].dt.weekday
test["month"] = test["timestamp"].dt.month
test["quarter"] = test["timestamp"].dt.quarter
test["quarter"] = test["quarter"].astype(np.uint8)
test["month"] = test["month"].astype(np.uint8)
test["weekday"] = test['weekday'].astype(np.uint8)
test["hour"] = test['hour'].astype(np.uint8)

test['square_feet'] = np.log(test['square_feet'])

test["meter"] = test['meter'].astype(np.uint8)
test["site_id"] = test['site_id'].astype(np.uint8)
test["relative humidity"] = test["dew_temperature"] / test["air_temperature"] *100
test = test[feat_cols]

In [None]:
i=0
res=[]
step_size = 50000
for j in tqdm(range(int(np.ceil(test.shape[0]/50000)))):
    res.append(np.expm1(sum([model.predict(test.iloc[i:i+step_size]) for model in models])/folds))
    i+=step_size

In [None]:
res = np.concatenate(res)

In [None]:
submission = pd.read_csv('/kaggle/input/ashrae-energy-prediction/sample_submission.csv')
submission['meter_reading'] = res
submission.loc[submission['meter_reading']<0, 'meter_reading'] = 0
submission.to_csv('submission.csv', index=False)
submission

In [None]:
 gbm.feature_importance()

In [None]:
print(feat_cols)

In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
a = gbm.feature_importance()
b = feat_cols
sns.barplot(x=a, y=b)