In [2]:
# Core
import pandas as pd
import numpy as np
import os
import cv2
import gc
import itertools
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm
from tqdm.notebook import tqdm
from datetime import datetime
import json,itertools
from typing import Optional
from glob import glob
import warnings
from IPython import display as ipd
warnings.filterwarnings("ignore")
import matplotlib.gridspec as gridspec
import matplotlib.patches as mpatches
import matplotlib as mpl
from matplotlib.patches import Rectangle
import seaborn as sns
import random
from joblib import Parallel, delayed
import os, shutil
import datetime 

# Keras
import tensorflow.keras.backend as K
import tensorflow as tf

from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Keras
from tensorflow import keras
import tensorflow as tf
import keras
from keras import backend as K
from keras.models import Model
from tensorflow.keras.layers import LSTM, Flatten, TimeDistributed, Conv1D, Input, Dense, Multiply, Add, Activation, GRU
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.pooling import MaxPooling2D
from keras.layers.merge import concatenate
from keras.losses import binary_crossentropy
from keras.callbacks import Callback, ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.models import load_model, save_model, Sequential
from tensorflow.data import Dataset
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.regularizers import l2
from tensorflow.keras import optimizers
import wandb

import requests
import calendar
import time

In [3]:
seeds=101

def seeding(SEED):
    """
    Sets all random seeds for the program (Python, NumPy, and TensorFlow).
    """
    np.random.seed(SEED)
    random.seed(SEED)
    os.environ["PYTHONHASHSEED"] = str(SEED)
    os.environ["TF_CUDNN_DETERMINISTIC"] = str(SEED)
    tf.random.set_seed(SEED)
    print("seeding done")


seeding(seeds)

In [4]:
DATASET_DIR = "../input/water-storage-dam-dataset"

In [5]:
water_list = sorted(glob(DATASET_DIR + "/water_data/*.csv"))
rf_list = sorted(glob(DATASET_DIR + "../input/water-storage-dam-dataset/rf_data/*.csv"))

In [None]:
water_df = pd.DataFrame()
rf_df = pd.DataFrame()

def interpolate_train() :
    __methods = {
        'swl': 'linear',
        'inf':'linear',
        'sfw':'linear',
        'ecpc':'linear',
        'tototf':'linear',
        'tide_level': 'cubic',
        'fw_1018662': 'cubic',
        'fw_1019630':'cubic',
        'fw_1018683':'quadratic',
        'fw_1018680':'values',
        'wl_1018662': 'quadratic',
        'wl_1018680': 'quadratic',
        'wl_1018683':'quadratic',
        'wl_1019630':'cubic'
    }

    for col, method in __methods.items():
        tmp[col] = tmp[col].interpolate(method=method, limit_direction='both')
        if method == 'quadratic':
            tmp[col] = tmp[col].interpolate(method='linear', limit_direction='both')
        elif method == 'cubic' :
            tmp[col] = tmp[col].interpolate(method='linear', limit_direction='both')
            
def interpolate_test() :
    __methods = {
        'swl': 'pad',
        'inf':'pad',
        'sfw':'pad',
        'ecpc':'pad',
        'tototf':'pad',
        'tide_level': 'pad',
        'fw_1018662': 'pad',
        'fw_1019630':'pad',
        'fw_1018683':'pad',
        'fw_1018680':'pad'
    }

    for col, method in __methods.items():
        tmp[col] = tmp[col].interpolate(method=method)
        if method == 'quadratic':
            tmp[col] = tmp[col].interpolate(method='linear')
        elif method == 'pad' :
            tmp[col] = tmp[col].interpolate(method='linear')

def tototf_lag(df) :
    lag_list = [24, 30, 36]
    for lag in lag_list:
        ft_name = ('tototf_shifted%s' % lag)
        df[ft_name] = df['tototf'].shift(lag)
        df[ft_name].fillna(0, inplace=True)
    return df

def inf_lag(df) :
    lag_list = [36, 48, 60]
    for lag in lag_list:
        ft_name = ('inf_shifted%s' % lag)
        df[ft_name] = df['inf'].shift(lag)
        df[ft_name].fillna(0, inplace=True)
    return df

def tide_lag(df) :
    lag_list = [6, 12, 18]
    for lag in lag_list:
        ft_name = ('tide_shifted%s' % lag)
        df[ft_name] = df['tide_level'].shift(lag)
        df[ft_name].fillna(0, inplace=True)
    return df

def fw_diff(df, column) :
    ft_name = (column + "_shifted")
    ft_name2 = (column + '_diff')
    df[ft_name] = df[column].shift(1)
    df[ft_name].fillna(0, inplace=True)
    df[ft_name2] = df[ft_name] - df[column]
    df = df.drop([ft_name], axis=1)
    return df

def fw_diff2(df, column) :
    ft_name = (column + "_shifted")
    ft_name2 = (column + '_diff2')
    df[ft_name] = df[column].shift(2)
    df[ft_name].fillna(0, inplace=True)
    df[ft_name2] = df[ft_name] - df[column]
    df = df.drop([ft_name], axis=1)
    return df

def fw_diff3(df, column) :
    ft_name = (column + "_shifted")
    ft_name2 = (column + '_diff3')
    df[ft_name] = df[column].shift(3)
    df[ft_name].fillna(0, inplace=True)
    df[ft_name2] = df[ft_name] - df[column]
    df = df.drop([ft_name], axis=1)
    return df

            
for i in water_list[:-1] :
    tmp = pd.read_csv(i)
    tmp = tmp.replace(" ", np.nan)
    interpolate_train()
    tmp = tmp.fillna(0)
    tmp = tototf_lag(tmp)
    tmp = inf_lag(tmp)
    tmp = tide_lag(tmp)
    tmp = fw_diff(tmp, 'fw_1018662')
    tmp = fw_diff(tmp, 'fw_1018680')
    tmp = fw_diff(tmp, 'fw_1018683')
    tmp = fw_diff(tmp, 'fw_1019630')
    tmp = fw_diff(tmp, 'swl')
    tmp = fw_diff(tmp, 'tide_level')
    tmp = fw_diff(tmp, 'tototf')
    tmp = fw_diff2(tmp, 'fw_1018662')
    tmp = fw_diff2(tmp, 'fw_1018680')
    tmp = fw_diff2(tmp, 'fw_1018683')
    tmp = fw_diff2(tmp, 'fw_1019630')
    tmp = fw_diff2(tmp, 'swl')
    tmp = fw_diff2(tmp, 'tide_level')
    tmp = fw_diff2(tmp, 'tototf')
    tmp = fw_diff3(tmp, 'fw_1018662')
    tmp = fw_diff3(tmp, 'fw_1018680')
    tmp = fw_diff3(tmp, 'fw_1018683')
    tmp = fw_diff3(tmp, 'fw_1019630')
    tmp = fw_diff3(tmp, 'swl')
    tmp = fw_diff3(tmp, 'tide_level')
    tmp = fw_diff3(tmp, 'tototf')
    tmp = tmp[144:]
    water_df = pd.concat([water_df, tmp])

for i in rf_list[:-1] :
    tmp = pd.read_csv(i)
    tmp = tmp.replace(" ", np.nan)
    tmp = tmp[144:]
    rf_df = pd.concat([rf_df, tmp])
    
tmp = pd.read_csv(water_list[-1])
tmp = tmp.replace(" ", np.nan)
interpolate_test()
tmp = tmp.fillna(0)
tmp = tototf_lag(tmp)
tmp = inf_lag(tmp)
tmp = tide_lag(tmp)
tmp = fw_diff(tmp, 'fw_1018662')
tmp = fw_diff(tmp, 'fw_1018680')
tmp = fw_diff(tmp, 'fw_1018683')
tmp = fw_diff(tmp, 'fw_1019630')
tmp = fw_diff(tmp, 'swl')
tmp = fw_diff(tmp, 'tide_level')
tmp = fw_diff(tmp, 'tototf')
tmp = fw_diff2(tmp, 'fw_1018662')
tmp = fw_diff2(tmp, 'fw_1018680')
tmp = fw_diff2(tmp, 'fw_1018683')
tmp = fw_diff2(tmp, 'fw_1019630')
tmp = fw_diff2(tmp, 'swl')
tmp = fw_diff2(tmp, 'tide_level')
tmp = fw_diff2(tmp, 'tototf')
tmp = fw_diff3(tmp, 'fw_1018662')
tmp = fw_diff3(tmp, 'fw_1018680')
tmp = fw_diff3(tmp, 'fw_1018683')
tmp = fw_diff3(tmp, 'fw_1019630')
tmp = fw_diff3(tmp, 'swl')
tmp = fw_diff3(tmp, 'tide_level')
tmp = fw_diff3(tmp, 'tototf')
tmp = tmp[144:]
water_df_test = tmp
rf_df_test = pd.read_csv(rf_list[-1])
rf_df_test = rf_df_test[144:]

In [None]:
df = water_df.merge(rf_df, left_on='ymdhm', right_on='ymdhm', how='outer')
df_test = water_df_test.merge(rf_df_test, left_on='ymdhm', right_on='ymdhm', how='outer')

In [None]:
df_test.isna().sum()

In [None]:
df.isna().sum()

In [None]:
df.info()

In [None]:
df_test.info()

In [None]:
df.shape

In [None]:
df_test.shape

In [None]:
df.describe()

In [None]:
train_temp = pd.read_csv(water_list[1])
train_temp = train_temp.interpolate(method='linear')
train_temp.isnull().sum()

In [None]:
def make_train_nan(col, n):
    new_list = []
    for idx, temp in enumerate(train_temp[col]):
        if idx%n==0:
            new_list.append(temp)
        else:
            new_list.append(np.nan)
    train_temp['{}'.format(col+'_nan')] = new_list
cols = ['swl', 'inf', 'sfw', 'ecpc', 'tototf', 'tide_level', 'fw_1018662', 'fw_1018683', 'fw_1019630', 'wl_1018662', 'wl_1018680', 'wl_1018683', 'wl_1019630']

for col in cols :
    make_train_nan(col, 3)

In [None]:
def compare_interpolate_methods(col, methods, metric):
    error_dict = dict()
    for method in methods:
        fillna = train_temp['{}'.format(col+'_nan')].interpolate(method=method)
        if fillna.isna().sum() != 0:
            fillna = fillna.interpolate(method='linear')
        error = metric(train_temp['{}'.format(col)], fillna)
        error_dict['{}'.format(method)] = error
    
    return error_dict
all_cols_error_dict = dict()

for col in cols :
    methods = ['pad','linear','quadratic','cubic']
    error_dict = compare_interpolate_methods(col, methods, mean_squared_error)
    all_cols_error_dict['{}'.format(col)] = error_dict
    
all_cols_error_df = pd.DataFrame(all_cols_error_dict)

In [None]:
all_cols_error_df.transpose()

In [None]:
df.isna().sum()

In [None]:
df_test.isna().sum()

In [None]:
print('train data : ', df.shape)
print('test data : ', df_test.shape)

In [None]:
f, ax = plt.subplots(figsize=(20,8))
sns.histplot(df['swl'])

In [None]:
f, ax = plt.subplots(figsize=(20,8))
sns.histplot(df['inf'])

In [None]:
f, ax = plt.subplots(figsize=(20,8))
sns.histplot(df['sfw'])

In [None]:
f, ax = plt.subplots(figsize=(20,8))
sns.histplot(df['ecpc'])

In [None]:
f, ax = plt.subplots(figsize=(20,8))
sns.histplot(df['tototf'])

In [None]:
f, ax = plt.subplots(figsize=(20,8))
sns.histplot(df['tide_level'])

In [None]:
f, ax = plt.subplots(figsize=(20,8))
sns.histplot(df['wl_1018662'])

In [None]:
f, ax = plt.subplots(figsize=(20,8))
sns.histplot(df['wl_1018680'])

In [None]:
f, ax = plt.subplots(figsize=(20,8))
sns.histplot(df['wl_1018683'])

In [None]:
f, ax = plt.subplots(figsize=(20,8))
sns.histplot(df['wl_1019630'])

In [None]:
corr = []
num_corr = df.corr()['wl_1018662']
corr.append(num_corr)
num_corr = df.corr()['wl_1018680']
corr.append(num_corr)
num_corr = df.corr()['wl_1018683']
corr.append(num_corr)
num_corr = df.corr()['wl_1019630']
corr.append(num_corr)
corr_df = pd.concat(corr, axis=1).T

In [None]:
f, ax = plt.subplots(figsize=(20,8))
sns.heatmap(corr_df.T, cmap=sns.diverging_palette(240,10,as_cmap=True), ax=ax)
plt.show()

In [None]:
df.describe()

In [None]:
df_test.describe()

In [None]:
df.describe()

In [None]:
print('train data : ', df.shape)
print('test data : ', df_test.shape)

train_df = df.copy()
test_df = df_test.copy()

In [None]:
train_df['ymdhm'] = pd.to_datetime(train_df['ymdhm'])
train_df['dayofyear'] = train_df['ymdhm'].dt.dayofyear
train_df['weekday'] = train_df['ymdhm'].dt.weekday
train_df['ymdhm'] = train_df['ymdhm'].dt.strftime('%Y-%m-%d %H:%M:%S')
train_df['date'] = train_df['ymdhm'].str.split(' ').str[0]
train_df['hour'] = train_df['ymdhm'].str.split(' ').str[1].str.split(':').str[0].astype(int)
train_df['hour_te'] = np.sin(2*np.pi*(train_df['hour'])/23)
train_df['hour_te1'] = np.cos(2*np.pi*(train_df['hour'])/23)
train_df['year'] = train_df['date'].str.split('-').str[0].astype(int)
train_df['month'] = train_df['date'].str.split('-').str[1].astype(int)
train_df['day'] = train_df['date'].str.split('-').str[2].astype(int)

In [None]:
rain_df_temp = rain_df.groupby(['year','month','day', 'hour'])['PrecipitationProb'].mean().reset_index()
train_df = train_df.merge(rain_df_temp, left_on=['year','month','day', 'hour'], right_on=['year','month','day', 'hour'], how='inner')
method_df_temp = method_df.groupby(['year','month','day', 'hour'])['PrecipitationForm'].mean().reset_index()
train_df = train_df.merge(method_df_temp, left_on=['year','month','day', 'hour'], right_on=['year','month','day', 'hour'], how='inner')

In [None]:
test_df['ymdhm'] = pd.to_datetime(test_df['ymdhm'])
test_df['dayofyear'] = test_df['ymdhm'].dt.dayofyear
test_df['weekday'] = test_df['ymdhm'].dt.weekday
test_df['ymdhm'] = test_df['ymdhm'] .dt.strftime('%Y-%m-%d %H:%M:%S')
test_df['date'] = test_df['ymdhm'].str.split(' ').str[0]
test_df['hour'] = test_df['ymdhm'].str.split(' ').str[1].str.split(':').str[0].astype(int)
test_df['hour_te'] = np.sin(2*np.pi*(test_df['hour'])/23)
test_df['hour_te1'] = np.cos(2*np.pi*(test_df['hour'])/23)
test_df['year'] = test_df['date'].str.split('-').str[0].astype(int)
test_df['month'] = test_df['date'].str.split('-').str[1].astype(int)
test_df['day'] = test_df['date'].str.split('-').str[2].astype(int)

In [None]:
rain_df_temp = rain_df_test.groupby(['year','month','day', 'hour'])['PrecipitationProb'].mean().reset_index()
test_df = test_df.merge(rain_df_temp, left_on=['year','month','day', 'hour'], right_on=['year','month','day', 'hour'], how='inner')
method_df_temp = method_df_test.groupby(['year','month','day', 'hour'])['PrecipitationForm'].mean().reset_index()
test_df = test_df.merge(method_df_temp, left_on=['year','month','day', 'hour'], right_on=['year','month','day', 'hour'], how='inner')

In [None]:
train_rf_temp = train_df.groupby(['year','month'])['rf_10184100'].sum().reset_index().rename(columns = {'rf_10184100' : 'rf_10184100_month_sum'})
test_rf_temp = test_df.groupby(['year','month'])['rf_10184100'].sum().reset_index().rename(columns = {'rf_10184100' : 'rf_10184100_month_sum'})
train_df = train_df.merge(train_rf_temp, left_on=['year','month'], right_on=['year','month'], how='outer')
test_df = test_df.merge(test_rf_temp, left_on=['year','month'], right_on=['year','month'], how='outer')

train_rf_temp = train_df.groupby(['year','month'])['rf_10184110'].sum().reset_index().rename(columns = {'rf_10184110' : 'rf_10184110_month_sum'})
test_rf_temp = test_df.groupby(['year','month'])['rf_10184110'].sum().reset_index().rename(columns = {'rf_10184110' : 'rf_10184110_month_sum'})
train_df = train_df.merge(train_rf_temp, left_on=['year','month'], right_on=['year','month'], how='outer')
test_df = test_df.merge(test_rf_temp, left_on=['year','month'], right_on=['year','month'], how='outer')

train_rf_temp = train_df.groupby(['year','month'])['rf_10184140'].sum().reset_index().rename(columns = {'rf_10184140' : 'rf_10184140_month_sum'})
test_rf_temp = test_df.groupby(['year','month'])['rf_10184140'].sum().reset_index().rename(columns = {'rf_10184140' : 'rf_10184140_month_sum'})
train_df = train_df.merge(train_rf_temp, left_on=['year','month'], right_on=['year','month'], how='outer')
test_df = test_df.merge(test_rf_temp, left_on=['year','month'], right_on=['year','month'], how='outer')

In [None]:
train_rf_temp = train_df.groupby(['year','month', 'day'])['rf_10184100'].sum().reset_index().rename(columns = {'rf_10184100' : 'rf_10184100_day_sum'})
test_rf_temp = test_df.groupby(['year','month', 'day'])['rf_10184100'].sum().reset_index().rename(columns = {'rf_10184100' : 'rf_10184100_day_sum'})
train_df = train_df.merge(train_rf_temp, left_on=['year','month', 'day'], right_on=['year','month', 'day'], how='outer')
test_df = test_df.merge(test_rf_temp, left_on=['year','month', 'day'], right_on=['year','month', 'day'], how='outer')

train_rf_temp = train_df.groupby(['year','month', 'day'])['rf_10184110'].sum().reset_index().rename(columns = {'rf_10184110' : 'rf_10184110_day_sum'})
test_rf_temp = test_df.groupby(['year','month', 'day'])['rf_10184110'].sum().reset_index().rename(columns = {'rf_10184110' : 'rf_10184110_day_sum'})
train_df = train_df.merge(train_rf_temp, left_on=['year','month', 'day'], right_on=['year','month', 'day'], how='outer')
test_df = test_df.merge(test_rf_temp, left_on=['year','month', 'day'], right_on=['year','month', 'day'], how='outer')

train_rf_temp = train_df.groupby(['year','month', 'day'])['rf_10184140'].sum().reset_index().rename(columns = {'rf_10184140' : 'rf_10184140_day_sum'})
test_rf_temp = test_df.groupby(['year','month', 'day'])['rf_10184140'].sum().reset_index().rename(columns = {'rf_10184140' : 'rf_10184140_day_sum'})
train_df = train_df.merge(train_rf_temp, left_on=['year','month', 'day'], right_on=['year','month', 'day'], how='outer')
test_df = test_df.merge(test_rf_temp, left_on=['year','month', 'day'], right_on=['year','month', 'day'], how='outer')

In [None]:
train_inf_temp = train_df.groupby(['year','month','day'])['inf'].mean().reset_index().rename(columns = {'inf' : 'inf_day_mean'})
test_inf_temp = test_df.groupby(['year','month','day'])['inf'].mean().reset_index().rename(columns = {'inf' : 'inf_day_mean'})
train_df = train_df.merge(train_inf_temp, left_on=['year','month','day'], right_on=['year','month','day'], how='outer')
test_df = test_df.merge(test_inf_temp, left_on=['year','month','day'], right_on=['year','month','day'], how='outer')

train_tototf_temp = train_df.groupby(['year','month','day'])['tototf'].mean().reset_index().rename(columns = {'tototf' : 'tototf_day_mean'})
test_tototf_temp = test_df.groupby(['year','month','day'])['tototf'].mean().reset_index().rename(columns = {'tototf' : 'tototf_day_mean'})
train_df = train_df.merge(train_tototf_temp, left_on=['year','month','day'], right_on=['year','month','day'], how='outer')
test_df = test_df.merge(test_tototf_temp, left_on=['year','month','day'], right_on=['year','month','day'], how='outer')

In [None]:
train_tototf_temp = train_df.groupby(['year','month','day', 'hour'])['tototf'].mean().reset_index().rename(columns = {'tototf' : 'tototf_hour_mean'})
test_tototf_temp = test_df.groupby(['year','month','day', 'hour'])['tototf'].mean().reset_index().rename(columns = {'tototf' : 'tototf_hour_mean'})
train_df = train_df.merge(train_tototf_temp, left_on=['year','month','day', 'hour'], right_on=['year','month','day', 'hour'], how='outer')
test_df = test_df.merge(test_tototf_temp, left_on=['year','month','day', 'hour'], right_on=['year','month','day', 'hour'], how='outer')

train_tototf_temp = train_df.groupby(['year','month','day', 'hour'])['inf'].mean().reset_index().rename(columns = {'inf' : 'inf_hour_mean'})
test_tototf_temp = test_df.groupby(['year','month','day', 'hour'])['inf'].mean().reset_index().rename(columns = {'inf' : 'inf_hour_mean'})
train_df = train_df.merge(train_tototf_temp, left_on=['year','month','day', 'hour'], right_on=['year','month','day', 'hour'], how='outer')
test_df = test_df.merge(test_tototf_temp, left_on=['year','month','day', 'hour'], right_on=['year','month','day', 'hour'], how='outer')

In [None]:
train_fw_temp = train_df.groupby(['year','month','day', 'hour'])['fw_1018662'].mean().reset_index().rename(columns = {'fw_1018662' : 'fw_1018662_hour_mean'})
test_fw_temp = test_df.groupby(['year','month','day', 'hour'])['fw_1018662'].mean().reset_index().rename(columns = {'fw_1018662' : 'fw_1018662_hour_mean'})
train_df = train_df.merge(train_fw_temp, left_on=['year','month','day', 'hour'], right_on=['year','month','day', 'hour'], how='outer')
test_df = test_df.merge(test_fw_temp, left_on=['year','month','day', 'hour'], right_on=['year','month','day', 'hour'], how='outer')

train_fw_temp = train_df.groupby(['year','month','day', 'hour'])['fw_1018680'].mean().reset_index().rename(columns = {'fw_1018680' : 'fw_1018680_hour_mean'})
test_fw_temp = test_df.groupby(['year','month','day', 'hour'])['fw_1018680'].mean().reset_index().rename(columns = {'fw_1018680' : 'fw_1018680_hour_mean'})
train_df = train_df.merge(train_fw_temp, left_on=['year','month','day', 'hour'], right_on=['year','month','day', 'hour'], how='outer')
test_df = test_df.merge(test_fw_temp, left_on=['year','month','day', 'hour'], right_on=['year','month','day', 'hour'], how='outer')

train_fw_temp = train_df.groupby(['year','month','day', 'hour'])['fw_1018683'].mean().reset_index().rename(columns = {'fw_1018683' : 'fw_1018683_hour_mean'})
test_fw_temp = test_df.groupby(['year','month','day', 'hour'])['fw_1018683'].mean().reset_index().rename(columns = {'fw_1018683' : 'fw_1018683_hour_mean'})
train_df = train_df.merge(train_fw_temp, left_on=['year','month','day', 'hour'], right_on=['year','month','day', 'hour'], how='outer')
test_df = test_df.merge(test_fw_temp, left_on=['year','month','day', 'hour'], right_on=['year','month','day', 'hour'], how='outer')

train_fw_temp = train_df.groupby(['year','month','day', 'hour'])['fw_1019630'].mean().reset_index().rename(columns = {'fw_1019630' : 'fw_1019630_hour_mean'})
test_fw_temp = test_df.groupby(['year','month','day', 'hour'])['fw_1019630'].mean().reset_index().rename(columns = {'fw_1019630' : 'fw_1019630_hour_mean'})
train_df = train_df.merge(train_fw_temp, left_on=['year','month','day', 'hour'], right_on=['year','month','day', 'hour'], how='outer')
test_df = test_df.merge(test_fw_temp, left_on=['year','month','day', 'hour'], right_on=['year','month','day', 'hour'], how='outer')

In [None]:
meas_fw_cols = ['fw_1018662', 'fw_1018680', 'fw_1018683', 'fw_1019630']
train_df['meas_fw_avg'] = np.mean(train_df[meas_fw_cols], axis=1)
train_df['meas_fw_std'] = np.std(train_df[meas_fw_cols], axis=1)
test_df['meas_fw_avg'] = np.mean(test_df[meas_fw_cols], axis=1)
test_df['meas_fw_std'] = np.std(test_df[meas_fw_cols], axis=1)
train_df['tototf/meas_fw_avg'] = train_df['tototf'] / train_df['meas_fw_avg']
test_df['tototf/meas_fw_avg'] = test_df['tototf'] / test_df['meas_fw_avg']

In [None]:
train_df.head()

In [None]:
test_df.head()

In [None]:
train_df.to_csv('train_data.csv')
test_df.to_csv('test_data.csv')