In [1]:
import pandas as pd
import re

In [2]:
names = [
    'program_num', 
    't_from_t0', 
    'times', 
    '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21'
]
df = pd.read_csv("./pressure.dat", names=names, header=None, delimiter=r"\s+")
original_df = pd.read_csv("./data.csv", index_col='index')
df['program_num'] = df['program_num'].apply(lambda x: re.sub(r'(\d{8}).0(\d{2}).(\d{3})', r'\1.\2', x))

In [3]:
# Combine the pressure columns
df['pressure'] = df[names[3:]].values.tolist()

# remove the old pressure columns
pruned = names[:3]
pruned.append('pressure')
df = df[pruned]

# Check to make sure there are no values with less than 21 pressure values
assert(sum(df['pressure'].apply(lambda x: len(x)) != 21) == 0)

In [7]:
# merge the dataframes using the time as the merge key
df = original_df.merge(df[['times', 't_from_t0', 'pressure']], on='times', how='left')

# remove any NaN values
df = df[df['pressure'].notnull()].reset_index(drop=True)

df.to_csv("./data_full.csv", index=False)

In [6]:
df

Unnamed: 0,program_num,port,times,heat_load_std,I_tor_A,W_dia,NPC1,NPC2,NPC3,NPC4,NPC5,PC1,PC2,I_A,int_heat_load,IA,t_from_t0,pressure
0,20180829.33,51,1535546109353961216,0.000000,-0.138504,0.254092,11891.0,11892.0,11892.0,11888.0,11889.0,8698.0,8700.0,0.731478,0.000000e+00,0.731478,-5.000000e-09,"[0.36749241542, 0.332632583959, 0.251078380018..."
1,20180829.33,51,1535546109363961344,64395.117188,8.706935,3.348157,11891.0,11892.0,11892.0,11888.0,11889.0,8698.0,8700.0,0.731478,1.543830e+09,0.731478,1.000012e-02,"[1.057414498159, 0.947555931423, 0.69486309612..."
2,20180829.33,51,1535546109373961216,60390.097656,5.281524,9.431624,11891.0,11892.0,11892.0,11888.0,11889.0,8698.0,8700.0,0.731478,-9.594422e+08,0.731478,1.999999e-02,"[1.724720217136, 1.53251256347, 1.109048975099..."
3,20180829.33,51,1535546109383961088,58781.707031,8.644573,16.379575,11891.0,11892.0,11892.0,11888.0,11889.0,8698.0,8700.0,0.731478,-1.510263e+09,0.731478,2.999987e-02,"[3.901976201601, 3.497563945234, 2.66421468398..."
4,20180829.33,51,1535546109393961216,56715.195312,17.597111,24.080982,11891.0,11892.0,11892.0,11888.0,11889.0,8698.0,8700.0,0.731478,6.951950e+08,0.731478,4.000000e-02,"[6.333443439186, 5.59794190167, 4.211142136067..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25071,20180927.46,51,1538059511369848832,159275.171875,1031.253204,525.249958,13049.0,13050.0,13051.0,13046.0,13047.0,500.0,501.0,0.038317,2.495682e+10,0.038317,1.960000e+00,"[31.072163571344, 30.541903340408, 29.17606508..."
25072,20180927.46,51,1538059511379848704,159000.484375,999.544978,524.031799,13049.0,13050.0,13051.0,13046.0,13047.0,500.0,501.0,0.038317,2.533332e+10,0.038317,1.970000e+00,"[34.462495844777, 34.154980022006, 32.85169953..."
25073,20180927.46,51,1538059511389848832,160588.906250,1023.430155,525.366556,13049.0,13050.0,13051.0,13046.0,13047.0,500.0,501.0,0.038317,2.317622e+10,0.038317,1.980000e+00,"[34.486286974168, 34.168923303897, 32.86225606..."
25074,20180927.46,51,1538059511399848960,160238.593750,1033.178553,525.804150,13049.0,13050.0,13051.0,13046.0,13047.0,500.0,501.0,0.038317,2.203804e+10,0.038317,1.990000e+00,"[34.748794172202, 34.427403392385, 33.09671192..."


In [None]:
# Cleaned the data
def interpolate_data(df: pd.DataFrame) -> pd.DataFrame:
    """Interpolates missing data in the dataframe"""
    df = df.drop("I_A", axis=1)

    df["simulated"] = df.iota.apply(lambda x: True if len(x) >= 1 else False)

    programs = df.program_num.unique()
    iota_list = []
    for program in programs:
        iota_list.append(
            df[df.program_num == program]
            .iota.apply(lambda x: x[-1] if len(x) >= 1 else None)
            .interpolate(method="linear", axis=0)
            .backfill(axis=0)
        )
    vol_list = []
    for program in programs:
        vol_list.append(
            df[df.program_num == program]
            .vol.apply(lambda x: x[0] if len(x) >= 1 else None)
            .interpolate(method="linear", axis=0)
            .backfill(axis=0)
        )
    phi_edge_list = []
    for program in programs:
        phi_edge_list.append(
            df[df.program_num == program]
            .vol.apply(lambda x: x[0] if len(x) >= 1 else None)
            .interpolate(method="linear", axis=0)
            .backfill(axis=0)
        )
    df["iota_edge"] = pd.concat(iota_list)
    df["vol"] = pd.concat(vol_list)
    df["phi_edge"] = pd.concat(phi_edge_list)

    return df