In [13]:
import os
import re
import shutil
import pandas as pd
import zipfile

from em import COLUMNS_6P, compute_all_lq

### Loads the ALL SRIs to a pandas dataframe and saves CSV datasets for the diferente modelling strategies


In [14]:
def line_split(line):
    return re.findall(r'[^"\s]\S*|".+?"', line)

def load(zip_file, folder, manifest_fname, names):
    
    df = pd.read_csv( zip_file.open(manifest_fname), 
                     sep=" ",index_col=0,
                     names=names, engine="python")
    points = {'ID':[]}
    for index, row in df.iterrows():
        freq_lines = []

        with zip_file.open(folder + "/" + str(int(index),) +".sri") as fi1e:
            freq_idx = 0;
            points['ID'].append(index)
            for line in fi1e:
                line = line.decode().strip()
                if line.startswith('#') or line.startswith('!'): continue 
                token = line.split()
                values = [float(x) for x in token]

                if len(token) == len(COLUMNS_6P): 
                    for i in range(len(COLUMNS_6P)):
                        key = COLUMNS_6P[i]+"_"+str(freq_idx)
                        if key not in points: points[key] = []
                        points[key].append(values[i])
                else:
                    print("Invalid header!") 
                freq_idx = freq_idx + 1

    return (pd.concat((df, pd.DataFrame(points).set_index("ID")), axis=1 ), freq_idx)




In [15]:
names_inputs=['ID', 'Np', 'Ns', 'Dinp', 'Dins', 'Wp', 'Ws']
names_all=['ID', 'Np', 'Ns', 'Dinp', 'Dins', 'Wp', 'Ws', 'SRFp', 'SRFs']

data_folder = '../data/transf_65nm/'

''' Key labels (np, ns, subspace
    "balun" : dinp < dins + 2*ns*ws + 4*(ns-1))
    "close" : dinp > dins + 2*ns*ws + 4*(ns-1)) + 2
'''

data_files = {
       (1,1, ""): "../data/transf_65nm_1_1T.zip", 
       (1,1, "_balun"): "../data/transf_65nm_1_1T_balun.zip", 
       (1,2, ""): "../data/transf_65nm_1_2T.zip", 
       (2,1, ""): "../data/transf_65nm_2_1T.zip" 
}


if os.path.exists(data_folder): 
    shutil.rmtree(data_folder)
os.mkdir(data_folder)

In [16]:
dataset = {}

for file_key, filename in data_files.items():
    zf_data = zipfile.ZipFile(filename,  mode="r")
    trans_sub_space = f"{file_key[0]}_{file_key[1]}T{file_key[2]}"


    transf_TEST, freq_points = load(zf_data, f"transf_65nm_{trans_sub_space}/transf_test",
                manifest_fname= f"transf_65nm_{trans_sub_space}/input_samples_test.in",
                names = names_inputs)

    transf_TRAIN, freq_points = load(zf_data, f"transf_65nm_{trans_sub_space}/transf_training",
                manifest_fname= f"transf_65nm_{trans_sub_space}/input_samples_training.in",
                names = names_inputs)
    
    
    transf_TEST['SRFp'] = [200e9]*len(transf_TEST) 
    transf_TEST['SRFs'] = [200e9]*len(transf_TEST) 
    transf_TRAIN['SRFp'] = [200e9]*len(transf_TRAIN) 
    transf_TRAIN['SRFs'] = [200e9]*len(transf_TRAIN) 


    
    lq_test = [None for i in range(freq_points)]
    lq_train = [None for i in range(freq_points)]


    for i, f in enumerate(range(freq_points)):
        c = [col+"_"+str(f) for col in COLUMNS_6P] 
            
        lq_test[i] =  compute_all_lq(transf_TEST[c[0]].values,transf_TEST[c[1:]].values)
        lq_train[i] =  compute_all_lq(transf_TRAIN[c[0]].values,transf_TRAIN[c[1:]].values)


    for r, (index, row) in enumerate(transf_TEST.iterrows()):
        l_prev = lq_test[0][r,0]
        for i, f in enumerate(range(freq_points)):
            l_curr = lq_test[i][r,0]
            if l_prev > 0 and l_curr < 0:
                transf_TEST.loc[index, 'SRFp'] = (transf_TEST.loc[index, 'freq_'+str(i)] + transf_TEST.loc[index, 'freq_'+str(i-1)])/2
                break
        l_prev = lq_test[0][r,2]
        for i, f in enumerate(range(freq_points)):
            l_curr = lq_test[i][r,2]
            if l_prev > 0 and l_curr < 0:
                transf_TEST.loc[index, 'SRFs'] = (transf_TEST.loc[index, 'freq_'+str(i)] + transf_TEST.loc[index, 'freq_'+str(i-1)])/2
                break

    for r, (index, row) in enumerate(transf_TRAIN.iterrows()):
        l_prev = lq_train[0][r,0]
        for i, f in enumerate(range(freq_points)):
            l_curr = lq_train[i][r,0]
            if l_prev > 0 and l_curr < 0:
                transf_TRAIN.loc[index, 'SRFp'] = (transf_TRAIN.loc[index, 'freq_'+str(i)] + transf_TRAIN.loc[index, 'freq_'+str(i-1)])/2
                break
        l_prev = lq_train[0][r,2]
        for i, f in enumerate(range(freq_points)):
            l_curr = lq_train[i][r,2]
            if l_prev > 0 and l_curr < 0:
                transf_TRAIN.loc[index, 'SRFs'] = (transf_TRAIN.loc[index, 'freq_'+str(i)] + transf_TRAIN.loc[index, 'freq_'+str(i-1)])/2
                break
    
    
    dataset[file_key] = (transf_TRAIN, transf_TEST)






In [17]:
#one file per mode per freq
dfs_train = [[] for i in range(freq_points)]
dfs_test = [[] for i in range(freq_points)]
save_data = True

for file_key in data_files.keys():
    transf_nturn_TRAIN, transf_nturn_TEST = dataset[file_key]
    trans_sub_space = f"{file_key[0]}_{file_key[1]}T{file_key[2]}"

    zf_test = zipfile.ZipFile(data_folder + f'test_dataset_{trans_sub_space}.csv.zip',  mode="w", compression=zipfile.ZIP_DEFLATED)
    zf_train = zipfile.ZipFile(data_folder + f'train_dataset_{trans_sub_space}.csv.zip',  mode="w", compression=zipfile.ZIP_DEFLATED)


    for i, f in enumerate(range(freq_points)):
        c = names_all[1:] + [col+"_"+str(f) for col in COLUMNS_6P] 
        
        transf_TRAIN_2 = transf_nturn_TRAIN[c]
        transf_TEST_2 = transf_nturn_TEST[c]

        rm = {}
        for col in COLUMNS_6P:
            rm[col+"_"+str(f)] = col
        

        transf_TRAIN_2 = transf_TRAIN_2.rename(columns=rm)

        transf_TEST_2 = transf_TEST_2.rename(columns=rm)

        dfs_train[i].append(transf_TRAIN_2)
        dfs_test[i].append(transf_TEST_2)

        if save_data :
            zf_test.writestr("test_dataset_"+str(f)+".csv", transf_TEST_2.to_csv())
            zf_train.writestr("training_dataset_"+str(f)+".csv", transf_TRAIN_2.to_csv())

    if save_data :
        zf_test.close()
        zf_train.close()


In [9]:
# One file per frequency
save_data = True

if save_data :
    zf_test = zipfile.ZipFile(data_folder + f'test_dataset_allT.csv.zip',  mode="w", compression=zipfile.ZIP_DEFLATED)
    zf_train = zipfile.ZipFile(data_folder + f'train_dataset_allT.csv.zip',  mode="w", compression=zipfile.ZIP_DEFLATED)

df_allT_train = [None]*freq_points
df_allT_test = [None]*freq_points

for i, f in enumerate(range(freq_points)):
    df_allT_train[i] = pd.concat(dfs_train[i],keys=range(1,6))
    if save_data :
        zf_train.writestr("training_dataset_"+str(f)+".csv", df_allT_train[i].to_csv())


    df_allT_test[i] = pd.concat(dfs_test[i],keys=range(1,9))
    if save_data :
        zf_test.writestr("test_dataset_"+str(f)+".csv", df_allT_test[i] .to_csv())

if save_data :
    zf_test.close()
    zf_train.close()



# One with all 
df_allFT = pd.concat(df_allT_train,keys=range(freq_points))
df_allFT.to_csv(data_folder + 'train_dataset_allTF.csv.zip')


df_allFT = pd.concat(df_allT_test,keys=range(freq_points))
df_allFT.to_csv(data_folder + 'test_dataset_allTF.csv.zip')

###### Copyright (C) 2022 Instituto de Telecomunicações & IMSE CSIC