In [1]:
import numpy as np
import os
import pickle
from glob import glob
import torch
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import openpyxl

In [2]:
class DataLoader:
    def __init__(self, data_path, column_path):
        self.data_path = data_path
        self.column_path = column_path
        self.data_pkl_files = glob(self.data_path + '/*.pkl')
        self.ind_pkl_files = []  # individual pkl file.
        self.ood_pkl_files = []  # Out of distribution pkl file.
        self.car_num_list = []

        self.ood_car_num_list = set()
        self.ind_car_num_list = set()

        self.all_car_dict = {}

        for each_path in tqdm(self.data_pkl_files):
            this_pkl_file = torch.load(each_path)
            this_car_number = this_pkl_file[1]['car']
            if this_pkl_file[1]['label'] == '00':
                self.ind_pkl_files.append(each_path)
                self.ind_car_num_list.add(this_car_number)
            else:
                self.ood_pkl_files.append(each_path)
                self.ood_car_num_list.add(this_car_number)
            self.car_num_list.append(this_pkl_file[1]['car'])
            if this_car_number not in self.all_car_dict:
                self.all_car_dict[this_car_number] = []
                self.all_car_dict[this_car_number].append(each_path)
            else:
                self.all_car_dict[this_car_number].append(each_path)


class dataExtractor(DataLoader):
    def __init__(self, data_path, column_path):
        super().__init__(data_path, column_path)

    def extract_values(self, car_num):
        combined_car_data = self.all_car_dict[car_num]
        car_data = [torch.load(combined_car_data[data]) for data in range(len(combined_car_data))]

        parameters = torch.load(self.column_path)

        main_dataframe = pd.DataFrame()

        for i in range(len(car_data)):
            car_values = pd.DataFrame(car_data[i][0], columns=parameters)
            car_spec_values = pd.DataFrame(list(car_data[i][1].values())).transpose()
            car_spec_values.columns = list(car_data[0][1].keys())
            whole_data = pd.concat([car_spec_values, car_values], axis=1)
            whole_data.ffill(inplace=True)

            main_dataframe = pd.concat([main_dataframe, whole_data], axis=0)

        return main_dataframe

    def dataframe_creation(self, car_list):
        all_car_data = pd.DataFrame()
        for car in tqdm(car_list):
            all_car_data = pd.concat([all_car_data, self.extract_values(car)], axis=0)
        return all_car_data




In [3]:
current_dir = os.getcwd()

In [4]:
data_path_1 = current_dir+'/data/battery_brand1/test'
column_path_1 = current_dir+'/data/battery_brand1/column.pkl'

data_path_2 = current_dir+'/data/battery_brand2/test'
column_path_2 = current_dir+'/data/battery_brand2/column.pkl'

data_path_3 = current_dir+'/data/battery_brand3/data'
column_path_3 = current_dir+'/data/battery_brand3/column.pkl'

In [5]:
battery_band_1 = dataExtractor(data_path_1, column_path_1)      # Battery Band object 1

battery_band_2 = dataExtractor(data_path_2, column_path_2)      # Battery Band object 2

battery_band_3 = dataExtractor(data_path_3, column_path_3)      # Battery Band object 3


100%|██████████| 60100/60100 [00:18<00:00, 3304.79it/s]
100%|██████████| 74862/74862 [00:20<00:00, 3630.64it/s]
100%|██████████| 29598/29598 [00:08<00:00, 3473.89it/s]


In [6]:
ind_1 = battery_band_1.dataframe_creation(battery_band_1.ind_car_num_list)
ood_1 = battery_band_1.dataframe_creation(battery_band_1.ood_car_num_list)

ind_2 = battery_band_2.dataframe_creation(battery_band_2.ind_car_num_list)
ood_2 = battery_band_2.dataframe_creation(battery_band_2.ood_car_num_list)

ind_3 = battery_band_3.dataframe_creation(battery_band_3.ind_car_num_list)
ood_3 = battery_band_3.dataframe_creation(battery_band_3.ood_car_num_list)

100%|██████████| 17/17 [12:01<00:00, 42.46s/it]
100%|██████████| 15/15 [00:17<00:00,  1.15s/it]
100%|██████████| 11/11 [10:18<00:00, 56.27s/it]
100%|██████████| 7/7 [25:01<00:00, 214.49s/it]
100%|██████████| 91/91 [00:54<00:00,  1.66it/s]
100%|██████████| 9/9 [00:02<00:00,  3.83it/s]


In [7]:
all_ind = pd.concat([ind_1, ind_2, ind_3], axis=0)
all_ood = pd.concat([ood_1, ood_2, ood_3], axis=0)

In [15]:
p=all_ind.drop_duplicates()

In [16]:
q=all_ood.drop_duplicates()

In [21]:
combined_data = pd.concat([all_ind, all_ood], axis=0)
combined_data_wD = pd.concat([p, q], axis=0)

In [23]:
combined_data.to_csv('J:\combined_data.csv', index=False)
combined_data_wD.to_csv('J:\combined_data_wD.csv', index=False)

In [11]:
ind_1.to_csv('ind_1.csv', index=False)
ood_1.to_csv('ood_1.csv', index=False)
ind_2.to_csv('ind_2.csv', index=False)
ood_2.to_csv('ood_2.csv', index=False)
ind_3.to_csv('ind_3.csv', index=False)
ood_3.to_csv('ood_3.csv', index=False)

In [24]:
combined_data_wD

Unnamed: 0,label,car,charge_segment,mileage,volt,current,soc,max_single_volt,min_single_volt,max_temp,min_temp,timestamp
0,00,193,104,978.426495,-2.654998,-1.850000,45.503125,3.639583,3.629818,23.0,21.0,0.0
1,00,193,104,978.426495,-2.553234,-20.350000,45.534375,3.648698,3.638932,23.0,21.0,10.0
2,00,193,104,978.426495,-2.451471,-38.850000,45.565625,3.657812,3.648047,23.0,21.0,20.0
3,00,193,104,978.426495,-2.333055,-58.958333,45.613542,3.668750,3.658789,23.0,21.0,30.0
4,00,193,104,978.426495,-2.064770,-93.541667,45.811458,3.696094,3.684180,23.0,21.0,40.0
...,...,...,...,...,...,...,...,...,...,...,...,...
123,10,475,9,3501.798720,45.660300,-24.200000,95.000000,4.181900,4.169100,31.0,28.0,2511.0
124,10,475,9,3501.798720,45.660300,-24.220000,95.000000,4.181100,4.170100,31.0,28.0,2521.0
125,10,475,9,3501.798720,45.660300,-24.390000,95.000000,4.182000,4.170900,31.0,28.0,2531.0
126,10,475,9,3501.798720,45.660300,-24.290000,95.000000,4.181900,4.170200,31.0,28.0,2541.0
