In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import torch
import itertools

from pathlib import Path
from torch.utils.data import Dataset

class AssasDatafile:

    def __init__(self, filepath):
        
        self.data = pd.read_csv(filepath, sep=' ')
        self.filepath = filepath
        print("read file: %s" % (self.filepath))
        self.run = int(filepath.split('_')[-1].replace('.txt',''))
        
        self.timedata = self.data[self.data.columns[0]]
        self.data.drop(self.data.columns[0], axis=1, inplace=True)
        self.data.drop(self.data.columns[-1], axis=1, inplace=True)

        column_parts = AssasDatafile.split_column(self.data)
        
        self.meshes = set(map(lambda x: x[2], column_parts))
        self.variable = set(map(lambda x: x[0], column_parts))
        self.channel = set(map(lambda x: x[1], column_parts))

    @staticmethod
    def split_column(data):
        column_parts = []
        for column in data.columns:
            parts = column.split('_')
            if len(parts) == 3:
                column_parts.append(parts)
            if len(parts) == 4:
                parts = [parts[0]+"_"+parts[1],parts[2],parts[3]]
                column_parts.append(parts)
            if len(parts) == 5:
                parts = [parts[0]+"_"+parts[1]+"_"+parts[2],parts[3],parts[4]]
                column_parts.append(parts)
        
        return column_parts 
    
    def get_data(self):
        return self.data
    
    def get_run(self):
        return self.run
    
    def get_meta_data(self):
        return self.channel, self.variable, self.meshes
    
    def plot_data_mesh(self, mesh):
        
        name = "%s_%s_mesh%d" % (list(self.variable)[0], list(self.channel)[0], mesh)
        plt.figure(figsize=(12,12))
        plt.plot(self.timedata.to_numpy(), self.data[name].to_numpy(), '-^', label=("%s" % name))
        plt.grid(axis='both')
        plt.ylabel('variable %s, channel %s, delta %1.1f s' % (list(self.variable)[0], list(self.channel)[0], self.timedata[1]))
        plt.xlabel('T [s?]')
        plt.legend(title='mesh%d' % (mesh))
        plt.show()
    
    def plot_data_all(self):
        
        plt.figure(figsize=(12,12))
        
        for i in range(1, 16):
            name = "%s_%s_mesh%d" % (list(self.variable)[0], list(self.channel)[0], i)
            plt.plot(self.timedata.to_numpy(), self.data[name].to_numpy(), '-^', label=("mesh%d" % i))

        plt.grid(axis='both')
        plt.ylabel('variable %s, channel %s, delta %1.1f s' % (list(self.variable)[0], list(self.channel)[0], self.timedata[1]))
        plt.xlabel('T [s?]')
        plt.legend(title='mesh1-15')
        plt.show()

    def plot_data(self):
        
        nrows=4
        ncols=4

        fig, axes = plt.subplots(nrows, ncols, sharex=True, figsize=(18,18))
        
        fig.suptitle('variable: %s, run: %d channel: %s, delta %f' 
                     % (list(self.variable)[0], self.run, list(self.channel)[0], self.timedata[1]), fontsize=20)

        counter = 1
        for i in range(0, nrows):
            for ii in range(0, ncols):
                name = "%s_%s_mesh%d" % (list(self.variable)[0], list(self.channel)[0], counter)
                if counter < 16: 
                    axes[i, ii].plot(self.timedata.to_numpy(), self.data[name].to_numpy())
                    axes[i, ii].title.set_text('mesh%d' % (counter))
                counter += 1

        plt.show()

class AssasDataset(Dataset):

    def __init__(self, root, device, history=10, forecast=1, standardize=False, dtype=torch.float32):
        
        self.history = history
        self.forecast = forecast

        self.filenames, self.data = AssasDataset.build_data(root)
        self.data_dict = AssasDataset.build_dict(self.data)
    
    @staticmethod
    def build_data(root):

        filenames = []
        data = []
        for path in Path(root).rglob('*.txt'):
            
            assas_datafile = AssasDatafile(str(path))
            data.append((assas_datafile, list(assas_datafile.variable)[0], list(assas_datafile.channel)[0], assas_datafile.run))
            filenames.append(str(path))

        return filenames, data

    @staticmethod
    def plot_data_dict(data_dict, run, variable, channel):
        
        nrows=4
        ncols=4

        fig, axes = plt.subplots(nrows, ncols, sharex=True, figsize=(18,18))
        
        fig.suptitle('variable: %s, run: %d channel: %s' 
                     % (variable, run, channel), fontsize=20)

        counter = 1
        for i in range(0, nrows):
            for ii in range(0, ncols):
                if counter < 16:
                    meshname = "mesh%d" % (counter)
                    axes[i, ii].plot(data_dict[run][variable][channel]['timedata'], data_dict[run][variable][channel][meshname], '-o')
                    axes[i, ii].title.set_text('mesh%d' % (counter))
                counter += 1

        plt.show()

    @staticmethod
    def build_dict(data):
        
        data_dict = {}
        
        variables = list(set([t[1] for t in data]))
        channels = list(set([t[2] for t in data]))
        runs = list(set([t[3] for t in data]))
        meshes = range(1, 16)

        for run in runs:
            data_dict[run] = {}
            for variable in variables:
                data_dict[run][variable] = {}
                for channel in channels:
                    data_dict[run][variable][channel] = {}
                    data_dict[run][variable][channel]['timedata'] = []
                    for i in meshes:
                        data_dict[run][variable][channel][i] = []                        

        for i in data:
            dataframe = i[0].get_data()
            data_dict[i[3]][i[1]][i[2]]["timedata"] = i[0].timedata.to_numpy()
            for ii in meshes:
                name = "%s_%s_mesh%d" % (i[1], i[2], ii)
                print(name)
                meshname = "mesh%d" % (ii)
                data_dict[i[3]][i[1]][i[2]][meshname] = dataframe[name].to_numpy()

        return data_dict

    def get_data(self):
        return self.data

In [2]:
ad = AssasDataset("./data", "cpu")
ad.data_dict[123]["Debris"]["Ch1"]["mesh1"]

read file: data/th_data/CHANNEL2_sat_temp/CHANNEL2_sat_temp_25.txt
read file: data/th_data/CHANNEL2_sat_temp/CHANNEL2_sat_temp_215.txt
read file: data/th_data/CHANNEL2_sat_temp/CHANNEL2_sat_temp_284.txt
read file: data/th_data/CHANNEL2_sat_temp/CHANNEL2_sat_temp_92.txt
read file: data/th_data/CHANNEL2_sat_temp/CHANNEL2_sat_temp_207.txt
read file: data/th_data/CHANNEL2_sat_temp/CHANNEL2_sat_temp_46.txt
read file: data/th_data/CHANNEL2_sat_temp/CHANNEL2_sat_temp_160.txt
read file: data/th_data/CHANNEL2_sat_temp/CHANNEL2_sat_temp_196.txt
read file: data/th_data/CHANNEL2_sat_temp/CHANNEL2_sat_temp_91.txt
read file: data/th_data/CHANNEL2_sat_temp/CHANNEL2_sat_temp_229.txt
read file: data/th_data/CHANNEL2_sat_temp/CHANNEL2_sat_temp_11.txt
read file: data/th_data/CHANNEL2_sat_temp/CHANNEL2_sat_temp_225.txt
read file: data/th_data/CHANNEL2_sat_temp/CHANNEL2_sat_temp_68.txt
read file: data/th_data/CHANNEL2_sat_temp/CHANNEL2_sat_temp_183.txt
read file: data/th_data/CHANNEL2_sat_temp/CHANNEL2_sat

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0.

In [8]:
df = ad.data[0][0].get_data()
display(df)
df[:50]["T_sat_Ch2_mesh1"]

Unnamed: 0,T_sat_Ch2_mesh1,T_sat_Ch2_mesh2,T_sat_Ch2_mesh3,T_sat_Ch2_mesh4,T_sat_Ch2_mesh5,T_sat_Ch2_mesh6,T_sat_Ch2_mesh7,T_sat_Ch2_mesh8,T_sat_Ch2_mesh9,T_sat_Ch2_mesh10,T_sat_Ch2_mesh11,T_sat_Ch2_mesh12,T_sat_Ch2_mesh13,T_sat_Ch2_mesh14,T_sat_Ch2_mesh15
0,620.157983,620.116168,620.025699,619.927223,619.886881,619.789683,619.691770,619.590672,619.549999,619.450075,619.349375,619.245325,619.204204,619.098778,619.034650
1,566.575686,566.557472,566.532271,566.507070,566.485604,566.461215,566.437093,566.413004,566.392979,566.369449,566.346100,566.322694,566.303777,566.280260,566.263699
2,563.859905,563.841358,563.818414,563.797997,563.779478,563.762332,563.746442,563.731625,563.717743,563.704459,563.691932,563.680001,563.669131,563.657280,563.648051
3,560.433522,560.413951,560.389791,560.370019,560.354054,560.340351,560.328639,560.318444,560.309135,560.300599,560.293007,560.286267,560.280225,560.272513,560.266687
4,551.154259,551.135013,551.110867,551.090123,551.071674,551.054938,551.039763,551.025954,551.013175,551.000979,550.989897,550.978928,550.970741,550.968032,550.966974
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
326,398.448085,398.447871,398.447635,398.447424,398.447217,398.447013,398.446809,398.446607,398.446402,398.446199,398.445979,398.445777,398.445562,398.445338,398.445189
327,398.671688,398.671483,398.671256,398.671049,398.670843,398.670638,398.670434,398.670230,398.670024,398.669825,398.669610,398.669409,398.669204,398.668990,398.668844
328,398.455347,398.455150,398.454929,398.454723,398.454518,398.454314,398.454110,398.453906,398.453699,398.453500,398.453286,398.453087,398.452883,398.452671,398.452526
329,398.430970,398.430778,398.430564,398.430364,398.430165,398.429967,398.429769,398.429571,398.429370,398.429176,398.428968,398.428772,398.428571,398.428361,398.428217


0     620.157983
1     566.575686
2     563.859905
3     560.433522
4     551.154259
5     529.115454
6     481.926328
7     454.161658
8     453.825279
9     454.665376
10    459.065351
11    460.587880
12    461.092254
13    461.212693
14    461.141843
15    460.951204
16    460.644855
17    459.396090
18    456.466184
19    453.969199
20    451.994979
21    450.296728
22    448.816254
23    447.482397
24    446.132254
25    444.814000
26    443.593932
27    442.461614
28    441.393806
29    440.356001
30    439.343474
31    438.376643
32    437.486948
33    436.622304
34    435.806961
35    435.000704
36    434.031810
37    433.058933
38    432.366663
39    431.825836
40    431.121506
41    430.304676
42    428.538962
43    429.216387
44    426.298627
45    422.693098
46    420.066563
47    418.233466
48    417.361309
49    417.384391
Name: T_sat_Ch2_mesh1, dtype: float64

In [18]:

import numpy as np
from numpy import array

import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

class CNN_ForecastNet(nn.Module):
    
    def __init__(self):
    
        super(CNN_ForecastNet,self).__init__()
    
        self.conv1d = nn.Conv1d(3,64,kernel_size=1)
        self.relu = nn.ReLU(inplace=True)
        self.fc1 = nn.Linear(64*2,50)
        self.fc2 = nn.Linear(50,1)
        
    def forward(self,x):

        x = self.conv1d(x)
        x = self.relu(x)
        x = x.view(-1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        
        return x

class VariableDataset(Dataset):
    
    def __init__(self, feature, target):
        
        self.feature = feature
        self.target = target
    
    def __len__(self):

        return len(self.feature)
    
    def __getitem__(self, idx):
        
        item = self.feature[idx]
        label = self.target[idx]
        
        return item,label

def split_sequence(sequence, n_steps):
    
    x, y = list(), list()
    
    for i in range(len(sequence)):
        
        end_ix = i + n_steps
        
        if end_ix > len(sequence)-1:
            break
        seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
        x.append(seq_x)
        y.append(seq_y)
    
    return array(x), array(y)

value = "T_sat_Ch2_mesh1"

train_set = df[:200][value]
valid_set = df[220:320][value]

print("train_set ", train_set)
print("valid_set ", valid_set)

print('Proportion of train_set : {:.2f}%'.format(len(train_set)/len(df[:][value])))
print('Proportion of valid_set : {:.2f}%'.format(len(valid_set)/len(df[:][value])))

raw_seq = [10,20,30,40,50,60,70,80,90]
n_steps = 3

train_x,train_y = split_sequence(train_set.values,n_steps)
alid_x,valid_y = split_sequence(valid_set.values,n_steps)





train_set  0      620.157983
1      566.575686
2      563.859905
3      560.433522
4      551.154259
          ...    
195    398.057004
196    398.208633
197    398.457986
198    398.196892
199    398.717694
Name: T_sat_Ch2_mesh1, Length: 200, dtype: float64
valid_set  220    397.327260
221    397.716809
222    397.495082
223    397.353790
224    397.138098
          ...    
315    398.236510
316    398.837622
317    398.717151
318    398.688598
319    398.744481
Name: T_sat_Ch2_mesh1, Length: 100, dtype: float64
Proportion of train_set : 0.60%
Proportion of valid_set : 0.30%
