In [38]:
import torch 
import numpy as np 
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn import metrics

In [2]:
def standardization(X):
    std=np.apply_along_axis(np.nanstd,0,X)
    mean=np.apply_along_axis(np.nanmean,0,X)
    z=(X-mean)/std
    return [z,mean,std]

In [3]:
class MyDataset(Dataset):
    """ 
    자료 로드 부분 입력
    - forward
      - deltas
      - masks
      - values
    
    - backward
      - deltas
      - masks
      - values
      
    """ 
    def __init__(self):
        super(MyDataset,self).__init__()
        
        df=pd.read_csv('../NA_30per_QC2_gamak1 (1).csv',parse_dates=True,na_values='NaN')
        df['date']=pd.to_datetime(df['date'])

        #full date generate
        df=df.set_index('date')

        #year
        df=df[:'2019-01-01']
        df=df['2018-01-01':]
        temp=standardization(df)
        df=temp[0]
        self.mean_sd=temp[1],temp[2]
        #dt_range = pd.date_range(min(df.index), max(df.index), freq='1H')
        #na_date=set(dt_range)-set(df.index)
        #df=pd.concat([df ,pd.DataFrame(columns=df.columns,index=na_date)],axis=0)
        df=df.sort_index()
        self.df=df
        df_temp=df.copy()
        df_temp=df_temp.fillna(method='ffill')
        self.df_forwards=df_temp
        
        #forward
        dic=dict()
        for var in df.columns.values.tolist():
            print(var)
            temp=list()
            temp.append(0)
            for i in range(1,df.shape[0]):
                temp_df=df[[var]][i:i+1]
                if temp_df.notnull().values!=True:
                    temp.append(i-temp[-1])
                else:
                    temp.append(temp[-1])
            dic.update({var:temp})
        
        self.deltas=np.array(pd.DataFrame(dic))
        #eval generate
        df_temp=df.copy()
        for seed in range(1,df.shape[1]):
            np.random.seed(seed)
            temp_idx=np.random.choice(df.shape[0],int(np.ceil(df.shape[0]*.1)))
            df_temp[df_temp.columns[seed]][temp_idx]=np.nan
        self.eval=df_temp
        
        #backward
        df=df[::-1]
        self.b_df=df
        df_temp=df.copy()
        df_temp=df_temp.fillna(method='ffill')
        self.b_df_forwards=df_temp
        dic=dict()
        for var in df.columns.values.tolist():
            print(var)
            temp=list()
            temp.append(0)
            for i in range(1,df.shape[0]):
                temp_df=df[[var]][i:i+1]
                if temp_df.notnull().values!=True:
                    temp.append(-(i-temp[-1]))
                else:
                    temp.append(temp[-1])
            dic.update({var:temp})
        self.back_deltas=np.array(pd.DataFrame(dic))

        #eval generate
        df_temp=df.copy()
        for seed in range(1,df.shape[1]):
            np.random.seed(seed)
            temp_idx=np.random.choice(df.shape[0],int(np.ceil(df.shape[0]*.1)))
            df_temp[df_temp.columns[seed]][temp_idx]=np.nan
        self.b_eval=df_temp

        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        if idx>=50:
            
            data=dict()
            data['forward']={'values':np.array(self.df)[(idx-50):(idx)],'masks':np.array(self.df.notnull()+0)[(idx-50):(idx)],
                             'deltas':self.deltas[idx:(idx+50)],'forwards':np.array(self.df_forwards)[(idx-50):(idx)],
                             'evals':np.array(self.eval)[(idx-50):(idx)],'eval_masks':np.array(self.eval.notnull()+0)[(idx-50):(idx)]}

            data['backward']={'values':np.array(self.b_df)[(idx-50):(idx)],'masks':np.array(self.b_df.notnull()+0)[(idx-50):(idx)],
                              'deltas':self.back_deltas[(idx-50):(idx)],'forwards':np.array(self.b_df_forwards)[(idx-50):(idx)],
                             'evals':np.array(self.b_eval)[(idx-50):(idx)],'eval_masks':np.array(self.b_eval.notnull()+0)[(idx-50):(idx)]}
            return data



In [39]:
def collate_fn(recs):
    if recs[0]!=None:
        if all(list(map(lambda x: x!=None,recs))):
            forward  = list(map(lambda x: x['forward'], recs))
            backward = list(map(lambda x: x['backward'], recs))
            if all(list(map(lambda x:len(x['deltas'])==50,forward))):
                def to_tensor_dict(recs):
                    values     =torch.FloatTensor(list(map(lambda r:r['values'],recs)))
                    masks      =torch.FloatTensor(list(map(lambda r:r['masks'], recs)))
                    deltas     =torch.FloatTensor(list(map(lambda r:r['deltas'],recs)))
                    evals      =torch.FloatTensor(list(map(lambda r:r['evals'], recs)))
                    eval_masks =torch.FloatTensor(list(map(lambda r:r['eval_masks'], recs)))

                    return {'values':values, 'masks': masks, 'deltas': deltas, 'evals': evals, 'eval_masks': eval_masks} 

                ret_dict = {'forward': to_tensor_dict(forward), 'backward': to_tensor_dict(backward)}
                return ret_dict

In [5]:
Mydata=MyDataset()

depth_top
depth_middle
depth_bottom
wtemp_top
wtemp_middle
wtemp_bottom
sal_top
sal_middle
sal_bottom
do_top
do_middle
do_bottom
density_top
density_middle
density_bottom
dense_diff0
dense_diff1
depth_top
depth_middle
depth_bottom
wtemp_top
wtemp_middle
wtemp_bottom
sal_top
sal_middle
sal_bottom
do_top
do_middle
do_bottom
density_top
density_middle
density_bottom
dense_diff0
dense_diff1


In [6]:
from torch.autograd import Variable
def to_var(var):
    if torch.is_tensor(var):
        var = Variable(var)
        if torch.cuda.is_available():
            var = var.cuda()
        return var
    if isinstance(var, int) or isinstance(var, float) or isinstance(var, str):
        return var
    if isinstance(var, dict):
        for key in var:
            var[key] = to_var(var[key])
        return var
    if isinstance(var, list):
        var = map(lambda x: to_var(x), var)
        return var

In [33]:
import os 
#os.chdir('/home/ducj2/data/do_bottom/models')
os.chdir('../')
import models
import utils
import sys
sys.path.append('./models')
from models import brits_i

In [34]:
data_iter=torch.utils.data.DataLoader(Mydata,batch_size=32,collate_fn=collate_fn,shuffle=False)

In [48]:
def evaluate(model, val_iter):
    model.eval()

    labels = []
    preds = []

    evals = []
    imputations = []

    save_impute = []
    save_label = []

    for idx, data in enumerate(val_iter):
        if data!=None:
            data = to_var(data)
            ret = model.run_on_batch(data, None)

            # save the imputation results which is used to test the improvement of traditional methods with imputed values
            save_impute.append(ret['imputations'].data.cpu().numpy())

            pred = ret['predictions'].data.cpu().numpy()

            eval_masks = ret['eval_masks'].data.cpu().numpy()
            eval_ = ret['evals'].data.cpu().numpy()
            imputation = ret['imputations'].data.cpu().numpy()

            evals += eval_[np.where(eval_masks == 1)].tolist()
            imputations += imputation[np.where(eval_masks == 1)].tolist()

            # collect test label & prediction
            pred = pred
            preds += pred.tolist()
    preds = np.asarray(preds)
#    print('AUC {}'.format(metrics.roc_auc_score(labels, preds)))

    evals = np.asarray(evals)
    imputations = np.asarray(imputations)

    print('MAE', np.abs(evals - imputations).mean())
    print('MRE', np.abs(evals - imputations).sum() / np.abs(evals).sum())
    save_impute = np.concatenate(save_impute, axis=0)
    np.save('./result_2018/{}_data'.format('brits_i'), save_impute)


In [None]:
#model = getattr(models, args.model).Model(108, 0.3, 1)
model= brits_i.Model()
if torch.cuda.is_available():
    model = model.cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
data_iter=torch.utils.data.DataLoader(Mydata,batch_size=32,collate_fn=collate_fn,shuffle=True)
for epoch in range(1000):
    model.train()
    run_loss = 0.0
    for idx, data in enumerate(data_iter):
        if data!=None:
            data = to_var(data)
            ret = model.run_on_batch(data, optimizer)
            run_loss += ret['loss'].item()
            print ('\r Progress epoch {}, {:.2f}%, average loss {}'.format(epoch, (idx + 1) * 100.0 / len(data_iter), run_loss / (idx + 1.0)))
    data_iter=torch.utils.data.DataLoader(Mydata,batch_size=32,collate_fn=collate_fn,shuffle=False)
    evaluate(model, data_iter)


 Progress epoch 0, 0.66%, average loss 75.28227233886719
 Progress epoch 0, 3.31%, average loss 30.494963073730467
 Progress epoch 0, 3.97%, average loss 38.569803873697914
 Progress epoch 0, 5.30%, average loss 38.2337064743042
 Progress epoch 0, 5.96%, average loss 42.35043165418837
 Progress epoch 0, 7.28%, average loss 41.345606023615055
 Progress epoch 0, 8.61%, average loss 41.007459787222054
 Progress epoch 0, 9.27%, average loss 43.628321511404856
 Progress epoch 0, 10.60%, average loss 42.94490671157837
 Progress epoch 0, 11.26%, average loss 44.84088718189913
 Progress epoch 0, 12.58%, average loss 44.13326986212479
 Progress epoch 0, 13.25%, average loss 45.76909637451172
 Progress epoch 0, 15.89%, average loss 41.09439945220947
 Progress epoch 0, 18.54%, average loss 37.86007499694824
 Progress epoch 0, 19.21%, average loss 39.21824751229122
 Progress epoch 0, 20.53%, average loss 39.117344025642645
 Progress epoch 0, 21.19%, average loss 40.08032560348511
 Progress epoch 0

In [53]:
np.load('/home/ai03/Desktop/nas/cj/jupyter/do_bottom/result_2018/brits_i_data.npy')

(2599, 50, 17)