# Degradation Preprocessing Data

Titan Hartono (titan.hartono@helmholtz-berlin.de)
Data collected and cleaned from: Paolo Graniero, Hans Koebler

ver 20221122

## 1. Import libraries and load the dataset

In [1]:
# Import all the packages needed for the notebook to run

# %matplotlib inline
import matplotlib.pyplot as plt
import sys
import os
# import rdkit
import numpy as np
import pandas as pd
from pandas import DataFrame, read_csv
from IPython.display import display_html
import seaborn as sns
import json

### Unpacking the whole folder

In [2]:
###### FUNCTIONS ######

def extract_var(df_repeated):
    '''
    df_repeated: the whole dataset you're going to split, already have repeated rows/ 'flattened'
    '''
    
    # Define variables
    totalNumPixel = 6
    columns_Temperature = ['Temperature_t','Temperature']
    columns_Irradiation = ['Irradiation_t','Irradiation']
    columns_MPPT = ['MPPT_t','MPPT_Irr','MPPT_I','MPPT_mV','MPPT_J','MPPT_V','MPPT_EFF','MPPT_dur_s','MPPT_dur_h']
    columns_IVdataFor = ['IV_t', 'IV_I_sc','IV_V_oc','IV_I_MPP','IV_V_MPP','IV_FF','IV_n','JVScan']
    columns_IVdataRev = ['IV_t', 'IV_I_sc','IV_V_oc','IV_I_MPP','IV_V_MPP','IV_FF','IV_n','JVScan']
    columns_JVScan = ['VmV','ImA','V','J']
    
    for device in range(len(df)): # Going through device within the dataset

        for pixel in range(totalNumPixel): # Going through pixel within the dataset
            
            print('We are on device: ',device,' and pixel: ',pixel)
            
            # Picking specific device
            Temperature = pd.DataFrame.from_dict(df_repeated['Temperature'][(device)*(totalNumPixel)+pixel])
            Irradiation = pd.DataFrame.from_dict(df_repeated['Irradiation'][(device)*(totalNumPixel)+pixel])
            IVdataFor = pd.DataFrame.from_dict(df_repeated['IVdataFor'][(device)*(totalNumPixel)+pixel])
            IVdataRev = pd.DataFrame.from_dict(df_repeated['IVdataRev'][(device)*(totalNumPixel)+pixel])
            MPPTdata = pd.DataFrame.from_dict(df_repeated['MPPTdata'][(device)*(totalNumPixel)+pixel])

            # Picking specific pixel of device
            # Not applicable for  because there's only 1 temperature sensor for entire device
            MPPTdata_pixel = pd.DataFrame.from_dict(pd.DataFrame.from_dict(MPPTdata.T[pixel]))
            IVdataFor_pixel = pd.DataFrame.from_dict(pd.DataFrame.from_dict(IVdataFor.T[pixel]))
            IVdataRev_pixel = pd.DataFrame.from_dict(pd.DataFrame.from_dict(IVdataRev.T[pixel]))
            
            # Dropping NaN entries
            MPPTdata_pixel = MPPTdata_pixel.dropna() # Dropping NaN entries
            IVdataFor_pixel = IVdataFor_pixel.dropna() # Dropping NaN entries
            IVdataRev_pixel = IVdataRev_pixel.dropna() # Dropping NaN entries
            
            # Creating empty dataframe
            MPPTdata_i = pd.DataFrame(columns = columns_MPPT) # Creating empty dataframe 
            IVdataFor_i = pd.DataFrame(columns = columns_IVdataFor) # Creating empty dataframe 
            IVdataRev_i = pd.DataFrame(columns = columns_IVdataRev) # Creating empty dataframe 

            # Going through each time series of the pixel
            
            # MPPTdata
            for i in range(len(MPPTdata_pixel.index)):
                MPPTdata_i = MPPTdata_i.append(pd.DataFrame(MPPTdata_pixel[pixel][i].values(),index=columns_MPPT).T)
            
            # IVdataFor
            for i in range(len(IVdataFor_pixel.index)):
                IVdataFor_i = IVdataFor_i.append(pd.DataFrame(IVdataFor_pixel[pixel][i].values(),index=columns_IVdataFor).T)
                
                # Reindexing IVdataFor_i
                IVdataFor_i.index = range(IVdataFor_i.shape[0])
                
                for k in range(len(IVdataFor_i['JVScan'])):
                    IVdataFor_i['JVScan'][k] = pd.DataFrame.from_dict(IVdataFor_i['JVScan'][k])
            
            # IVdataRev
            for i in range(len(IVdataRev_pixel.index)): 
                IVdataRev_i = IVdataRev_i.append(pd.DataFrame(IVdataRev_pixel[pixel][i].values(),index=columns_IVdataRev).T)
                
                # Reindexing IVdataRev_i
                IVdataRev_i.index = range(IVdataRev_i.shape[0])
                
                for l in range(len(IVdataRev_i['JVScan'])):
                    IVdataRev_i['JVScan'][l] = pd.DataFrame.from_dict(IVdataRev_i['JVScan'][l])
        
            # Reindexing for MPPT
            MPPTdata_i.index = range(MPPTdata_i.shape[0])

            # Storing in nested dataframe
            df_repeated.at[(device)*(totalNumPixel)+pixel, 'Temperature'] = Temperature # without i because only 1 sensor
            df_repeated.at[(device)*(totalNumPixel)+pixel, 'Irradiation'] = Irradiation # without i because only 1 sensor
            df_repeated.at[(device)*(totalNumPixel)+pixel, 'MPPTdata'] = MPPTdata_i
            df_repeated.at[(device)*(totalNumPixel)+pixel, 'IVdataFor'] = IVdataFor_i
            df_repeated.at[(device)*(totalNumPixel)+pixel, 'IVdataRev'] = IVdataRev_i
    
    return df_repeated

In [6]:
# Directory of json files, and directory for output .pkl files
path_json = './dataset/20221101_newdata_tobeprocessed/2/'
path_pkl = './dataset/pkl_2/'
files_and_directories = os.listdir(path_json)

# Count
count = 0

# Going through each file in the directory
for filename in files_and_directories: 
    
    totalNumPixel = 6
    count +=1
    print ('Count: ',count,', filename: ', filename)
    
    # Load the json file
    only_filename = os.path.splitext(os.path.basename(filename))[0] # filename only
    df = pd.read_json(path_json+filename, encoding= 'unicode_escape')
    
    # Adding the pixel column and filename column
    df_repeated = df.loc[df.index.repeat(totalNumPixel)].reset_index(drop=True)
    df_repeated['Pixel'] = pd.DataFrame([0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3,4,5,
                                        0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3,4,5,
                                        0,1,2,3,4,5,0,1,2,3,4,5])
    df_repeated['Filename'] = only_filename

    # Rearrange position of the columns
    df_repeated.insert(1,'Pixel', df_repeated.pop('Pixel'))
    df_repeated.insert(2,'Filename', df_repeated.pop('Filename'))

    # Extract the dataset and flatten it
    df_extracted = extract_var(df_repeated)

    # Save it as pickle file
    df_extracted.to_pickle(path_pkl+only_filename+'.pkl')

Count:  1 , filename:  JSON_Ghefar-3.txt
We are on device:  0  and pixel:  0
We are on device:  0  and pixel:  1
We are on device:  0  and pixel:  2
We are on device:  0  and pixel:  3
We are on device:  0  and pixel:  4
We are on device:  0  and pixel:  5
We are on device:  1  and pixel:  0
We are on device:  1  and pixel:  1
We are on device:  1  and pixel:  2
We are on device:  1  and pixel:  3
We are on device:  1  and pixel:  4
We are on device:  1  and pixel:  5
We are on device:  2  and pixel:  0
We are on device:  2  and pixel:  1
We are on device:  2  and pixel:  2
We are on device:  2  and pixel:  3
We are on device:  2  and pixel:  4
We are on device:  2  and pixel:  5
We are on device:  3  and pixel:  0
We are on device:  3  and pixel:  1
We are on device:  3  and pixel:  2
We are on device:  3  and pixel:  3
We are on device:  3  and pixel:  4
We are on device:  3  and pixel:  5
We are on device:  4  and pixel:  0
We are on device:  4  and pixel:  1
We are on device:  4  a

We are on device:  5  and pixel:  0
We are on device:  5  and pixel:  1
We are on device:  5  and pixel:  2
We are on device:  5  and pixel:  3
We are on device:  5  and pixel:  4
We are on device:  5  and pixel:  5
We are on device:  6  and pixel:  0
We are on device:  6  and pixel:  1
We are on device:  6  and pixel:  2
We are on device:  6  and pixel:  3
We are on device:  6  and pixel:  4
We are on device:  6  and pixel:  5
We are on device:  7  and pixel:  0
We are on device:  7  and pixel:  1
We are on device:  7  and pixel:  2
We are on device:  7  and pixel:  3
We are on device:  7  and pixel:  4
We are on device:  7  and pixel:  5
Count:  6 , filename:  JSON_Ghefar-8.txt
We are on device:  0  and pixel:  0
We are on device:  0  and pixel:  1
We are on device:  0  and pixel:  2
We are on device:  0  and pixel:  3
We are on device:  0  and pixel:  4
We are on device:  0  and pixel:  5
We are on device:  1  and pixel:  0
We are on device:  1  and pixel:  1
We are on device:  1  a

In [103]:
# Check the results
df_extracted = pd.read_pickle(path_pkl+'JSON_Fengjiu_2'+'.pkl')
df_extracted

Unnamed: 0,SampleNumber,Pixel,Filename,Temperature,Irradiation,MPPTdata,IVdataFor,IVdataRev,Area,Atmosphere,...,FrontContact,CSL1,CSL2,ABS,CSL3,CSL4,BackContact,Box_used,Pixelfilter,Bestpix
0,1,0,JSON_Fengjiu_2,Temperature_t Temperature 0 06/...,Irradiation_t Irradiation 0 06/...,MPPT_t MPPT_Irr MPPT_I...,IV_t IV_I_sc IV_V_oc I...,IV_t IV_I_sc IV_V_oc I...,0.18,N2,...,ITO,PEDOT:PSS,,,FACsPI,BCP,,,"[1, 1, 1, 1, 1, 1]","[0, 0, 0, 0, 0, 0]"
1,1,1,JSON_Fengjiu_2,Temperature_t Temperature 0 06/...,Irradiation_t Irradiation 0 06/...,MPPT_t MPPT_Irr MPPT_I...,IV_t IV_I_sc IV_V_oc I...,IV_t IV_I_sc IV_V_oc I...,0.18,N2,...,ITO,PEDOT:PSS,,,FACsPI,BCP,,,"[1, 1, 1, 1, 1, 1]","[0, 0, 0, 0, 0, 0]"
2,1,2,JSON_Fengjiu_2,Temperature_t Temperature 0 06/...,Irradiation_t Irradiation 0 06/...,MPPT_t MPPT_Irr MPPT_I...,IV_t IV_I_sc IV_V_oc I...,IV_t IV_I_sc IV_V_oc I...,0.18,N2,...,ITO,PEDOT:PSS,,,FACsPI,BCP,,,"[1, 1, 1, 1, 1, 1]","[0, 0, 0, 0, 0, 0]"
3,1,3,JSON_Fengjiu_2,Temperature_t Temperature 0 06/...,Irradiation_t Irradiation 0 06/...,MPPT_t MPPT_Irr MPPT_I...,IV_t IV_I_sc IV_V_oc I...,IV_t IV_I_sc IV_V_oc I...,0.18,N2,...,ITO,PEDOT:PSS,,,FACsPI,BCP,,,"[1, 1, 1, 1, 1, 1]","[0, 0, 0, 0, 0, 0]"
4,1,4,JSON_Fengjiu_2,Temperature_t Temperature 0 06/...,Irradiation_t Irradiation 0 06/...,MPPT_t MPPT_Irr MPPT_I...,IV_t IV_I_sc IV_V_oc I...,IV_t IV_I_sc IV_V_oc I...,0.18,N2,...,ITO,PEDOT:PSS,,,FACsPI,BCP,,,"[1, 1, 1, 1, 1, 1]","[0, 0, 0, 0, 0, 0]"
5,1,5,JSON_Fengjiu_2,Temperature_t Temperature 0 06/...,Irradiation_t Irradiation 0 06/...,MPPT_t MPPT_Irr MPPT_I...,IV_t IV_I_sc IV_V_oc I...,IV_t IV_I_sc IV_V_oc I...,0.18,N2,...,ITO,PEDOT:PSS,,,FACsPI,BCP,,,"[1, 1, 1, 1, 1, 1]","[0, 0, 0, 0, 0, 0]"
6,2,0,JSON_Fengjiu_2,Temperature_t Temperature 0 06/...,Irradiation_t Irradiation 0 06/...,MPPT_t MPPT_Irr MPPT_I...,IV_t IV_I_sc IV_V_oc I...,IV_t IV_I_sc IV_V_oc I...,0.18,N2,...,ITO,PEDOT:PSS,,,FACsPI,BCP,,,"[1, 1, 1, 1, 1, 1]","[0, 0, 0, 0, 1, 0]"
7,2,1,JSON_Fengjiu_2,Temperature_t Temperature 0 06/...,Irradiation_t Irradiation 0 06/...,MPPT_t MPPT_Irr MPPT_I...,IV_t IV_I_sc IV_V_oc I...,IV_t IV_I_sc IV_V_oc I...,0.18,N2,...,ITO,PEDOT:PSS,,,FACsPI,BCP,,,"[1, 1, 1, 1, 1, 1]","[0, 0, 0, 0, 1, 0]"
8,2,2,JSON_Fengjiu_2,Temperature_t Temperature 0 06/...,Irradiation_t Irradiation 0 06/...,MPPT_t MPPT_Irr MPPT_I...,IV_t IV_I_sc IV_V_oc I...,IV_t IV_I_sc IV_V_oc I...,0.18,N2,...,ITO,PEDOT:PSS,,,FACsPI,BCP,,,"[1, 1, 1, 1, 1, 1]","[0, 0, 0, 0, 1, 0]"
9,2,3,JSON_Fengjiu_2,Temperature_t Temperature 0 06/...,Irradiation_t Irradiation 0 06/...,MPPT_t MPPT_Irr MPPT_I...,IV_t IV_I_sc IV_V_oc I...,IV_t IV_I_sc IV_V_oc I...,0.18,N2,...,ITO,PEDOT:PSS,,,FACsPI,BCP,,,"[1, 1, 1, 1, 1, 1]","[0, 0, 0, 0, 1, 0]"
