In [12]:
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import os
import seaborn as sns
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline 
import json
from torch.utils.data import DataLoader

In [25]:
import torch
from torch.utils.data import Dataset
import pandas as pd
import os
import numpy as np

class CaseDataset(Dataset):
    def __init__(self, 
                 path,
                 mesure='day',
                 sensor_idx = 0,
                year_idx = 0):
        self.path = path
        with open(os.path.join(path, 'case_summary.json'), 'r') as f:
                self.case_summary  = json.load(f)
        self.mesure = 'day' # only one implemented.
        self.sensor_idx = sensor_idx
        self.year_idx = year_idx
        self.load_precipitation()
        self.build_data()

    def load_precipitation(self):
        self.precipitation_info = self.case_summary['precipitations']
        self.precip_raw = pd.read_csv(os.path.join(self.path, 
                                                  self.precipitation_info['file']),index_col=[0])
    def build_data(self):
        sensor_raw = self.case_summary['sensors'][self.sensor_idx]['raw'][self.year_idx] +'_processed.csv'
        sensor_label = self.case_summary['sensors'][self.sensor_idx]['val'][self.year_idx]+'_processed.csv'

        # info might not be in correct order
        # info = self.case_summary['sensors'][self.sensor_idx]['data_info'][self.year_idx]
        # station_info = self.case_summary['sensors'][self.sensor_idx]['station_info']

        df_raw = pd.read_csv(os.path.join(self.path, sensor_raw),index_col=[0])
        df_val = pd.read_csv(os.path.join(self.path, sensor_label),index_col=[0])    
        
        df_raw.rename(columns={'value':'raw'}, inplace = True)
        df_val.rename(columns={'value':'val'}, inplace = True)

        df_merge = pd.merge(df_raw, 
                            df_val, 
                            on=['date','year','month','day','hour','minute'], 
                            how='left')
        df_merge['raw-val'] = df_merge.apply(lambda row: row.raw - row.val, axis = 1)
        df_merge['is_error'] = df_merge.apply(lambda row: row['raw-val']!=0, axis = 1)

        if self.mesure== 'day':
            df_merge['date'] = pd.to_datetime(df_merge['date'])
            df_merge['date_only'] = df_merge['date'].dt.date
            dates = df_merge['date_only'].unique()
            self.index = dates
        self.data = df_merge
        
    def __len__(self):
        return len(self.index)
    
    def create_signals(self,idx):
        this_day = self.data[self.data['date_only'] == self.index[idx]]
        inputs = this_day['raw'].to_numpy()
        inputs = np.expand_dims(inputs,0)
        
        labels = this_day['val'].to_numpy()
        labels = np.expand_dims(labels,0)
        return inputs, labels
    
    def __getitem__(self, idx):
        inputs, labels = self.create_signals(idx)
        inputs = torch.from_numpy(inputs).float()
        labels = torch.from_numpy(labels).float()
        
        return inputs, labels

In [29]:
cas = '1'
path = '/home/theaiunicorn/datasets/hackQC2022/cas_' + cas
result_path = '/home/theaiunicorn/datasets/hackQC2022/results/cas_' + cas

In [30]:
dataset = CaseDataset(path)

In [31]:
x,y = dataset.__getitem__(0)