In [25]:
import pandas as pd
import json
from matplotlib import pyplot as plt

In [26]:
json_file_path = 'new_bosch_data.json'

In [27]:
with open(json_file_path, 'r') as j:
     contents = json.loads(j.read())

In [28]:
data = contents['rawDataBody']
df = pd.json_normalize(data, record_path =['dataBlock'])
# 
df = df.rename(columns={df.columns[0]: 'sensor_index',  
                        df.columns[1]: 'sensor_id',
                        df.columns[2]: 'time_since_power_on',
                        df.columns[3]: 'real_time_clock',
                        df.columns[4]: 'temperature',
                        df.columns[5]: 'pressure',
                        df.columns[6]: 'relative_humidity',
                        df.columns[7]: 'resistance_gas_sensor',
                        df.columns[8]: 'heater_profile_step_index',
                        df.columns[9]: 'scanning_enabled',
                        df.columns[10]: 'label_tab',
                        df.columns[11]: 'error_code'
})
df['real_time_clock'] = pd.to_datetime(df['real_time_clock'], unit='s', origin='unix') 
df.head()

Unnamed: 0,sensor_index,sensor_id,time_since_power_on,real_time_clock,temperature,pressure,relative_humidity,resistance_gas_sensor,heater_profile_step_index,scanning_enabled,label_tab,error_code
0,0,1903378739,3207,2021-05-06 01:12:12,30.099798,1016.898499,42.087475,2721594.75,0,1,0,0
1,1,1903366707,3220,2021-05-06 01:12:12,29.919815,1016.76239,43.269604,2127792.25,0,1,0,0
2,2,1903378227,3224,2021-05-06 01:12:12,30.371464,1017.123901,41.93903,2144502.5,0,1,0,0
3,3,1903405359,3227,2021-05-06 01:12:12,30.074945,1017.31427,42.065491,2446103.25,0,1,0,0
4,4,1903388711,3231,2021-05-06 01:12:12,30.209864,1017.001526,40.711624,868901.125,0,1,0,0


In [29]:
num_sensor = df['sensor_index'].nunique()
print(num_sensor)
num_steps = df['heater_profile_step_index'].nunique()
print(num_steps)

8
5


In [30]:
df_sensor_list = []
for i in range(num_sensor):
    df_temp = df[df['sensor_index'] == i]
    df_sensor_list.append(df_temp)

num_repeats_list = []
for i in range(num_sensor):
    num_repeats_list.append(len(df_sensor_list[i][df_sensor_list[i]['heater_profile_step_index']==i]))
print(num_repeats_list)
print(list(set(num_repeats_list)))
num_repeats = max(num_repeats_list)

[253, 253, 253, 253, 253, 0, 0, 0]
[0, 253]


In [31]:
def parse_df(df):
    df_sensor_list = []

    for i in range(num_sensor):
        df_exp_list = []
        exp_unique_id = 0

        df_sensor = df[df['sensor_index']==i].set_index('real_time_clock')
        df_sensor = df_sensor.add_suffix(f'_sensor_{i}')

        for i in range(0, df_sensor.shape[0], num_steps):
            df_temp = df_sensor[i:i+num_steps]
            df_temp['exp_unique_id'] = exp_unique_id
            exp_unique_id += 1
            df_exp_list.append(df_temp)
        
        df_merge = pd.concat(df_exp_list, axis=0)
        df_sensor_list.append(df_merge)
    
    return df_sensor_list


In [32]:
df_sensor_list = parse_df(df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]


In [33]:
df_dict = {}
for i in range(num_sensor):
    df_dict[f'sensor_{i}'] = df_sensor_list[i].reset_index()


In [34]:
df_dict['sensor_0']

Unnamed: 0,real_time_clock,sensor_index_sensor_0,sensor_id_sensor_0,time_since_power_on_sensor_0,temperature_sensor_0,pressure_sensor_0,relative_humidity_sensor_0,resistance_gas_sensor_sensor_0,heater_profile_step_index_sensor_0,scanning_enabled_sensor_0,label_tab_sensor_0,error_code_sensor_0,exp_unique_id
0,2021-05-06 01:12:12,0,1903378739,3207,30.099798,1016.898499,42.087475,2.721595e+06,0,1,0,0,0
1,2021-05-06 01:12:13,0,1903378739,4187,31.566181,1016.925476,41.819595,1.262950e+05,1,1,0,0,0
2,2021-05-06 01:12:14,0,1903378739,5586,32.590897,1016.964600,40.720627,6.296114e+04,2,1,0,0,0
3,2021-05-06 01:12:17,0,1903378739,8386,33.131020,1016.918396,38.084511,1.214421e+05,3,1,0,0,0
4,2021-05-06 01:12:18,0,1903378739,9519,32.050774,1016.958496,37.163151,5.416198e+06,4,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1259,2021-05-06 01:42:08,0,1903378739,1798936,43.111298,1016.934692,18.453411,3.903276e+07,4,0,0,0,251
1260,2021-05-06 01:42:09,0,1903378739,1799728,42.828583,1016.894409,18.551609,3.618774e+07,0,1,0,0,252
1261,2021-05-06 01:42:09,0,1903378739,1800708,43.626244,1016.907104,18.760057,1.890607e+06,1,1,0,0,252
1262,2021-05-06 01:42:11,0,1903378739,1802108,44.317890,1016.883911,18.809307,3.150769e+05,2,1,0,0,252


In [35]:
df_temp_list = []
for i in range(num_sensor):
    column_name = 'sensor_' + str(i)
    df_temp = df_dict[column_name]
    if i == 0:
        df_temp = df_temp
    else:
        column_names = ['heater_profile_step_index_sensor_'+str(i),'scanning_enabled_sensor_'+str(i),
                        'label_tab_sensor_'+str(i),'error_code_sensor_'+str(i),'exp_unique_id',
                        'real_time_clock','sensor_index_sensor_'+str(i),'sensor_id_sensor_'+str(i), 
                        'time_since_power_on_sensor_'+str(i)]
        df_temp.drop(column_names, axis=1, inplace=True)
        df_temp.dropna()
        
    # 
    df_temp_list.append(df_temp)
# 
df_all = pd.concat(df_temp_list, axis=1)
df_all.reset_index(inplace=True)
df_all.dropna()

Unnamed: 0,index,real_time_clock,sensor_index_sensor_0,sensor_id_sensor_0,time_since_power_on_sensor_0,temperature_sensor_0,pressure_sensor_0,relative_humidity_sensor_0,resistance_gas_sensor_sensor_0,heater_profile_step_index_sensor_0,...,relative_humidity_sensor_5,resistance_gas_sensor_sensor_5,temperature_sensor_6,pressure_sensor_6,relative_humidity_sensor_6,resistance_gas_sensor_sensor_6,temperature_sensor_7,pressure_sensor_7,relative_humidity_sensor_7,resistance_gas_sensor_sensor_7
0,0,2021-05-06 01:12:12,0.0,1.903379e+09,3207.0,30.099798,1016.898499,42.087475,2.721595e+06,0.0,...,43.085331,3.674366e+06,29.968113,1017.148804,41.818417,1.049180e+06,30.034039,1017.266785,42.907742,7.109568e+06
1,1,2021-05-06 01:12:13,0.0,1.903379e+09,4187.0,31.566181,1016.925476,41.819595,1.262950e+05,1.0,...,42.782749,1.600500e+05,31.611074,1017.204773,41.542645,5.756690e+04,31.657578,1017.319336,42.577545,3.488927e+05
2,2,2021-05-06 01:12:14,0.0,1.903379e+09,5586.0,32.590897,1016.964600,40.720627,6.296114e+04,2.0,...,41.571884,7.987520e+04,32.843937,1017.202881,40.378387,2.968460e+04,32.892296,1017.339111,41.434841,9.767264e+04
3,3,2021-05-06 01:12:17,0.0,1.903379e+09,8386.0,33.131020,1016.918396,38.084511,1.214421e+05,3.0,...,38.632622,1.488805e+05,33.532455,1017.213196,37.532791,5.649967e+04,33.654850,1017.348083,38.509724,1.767956e+05
4,4,2021-05-06 01:12:18,0.0,1.903379e+09,9519.0,32.050774,1016.958496,37.163151,5.416198e+06,4.0,...,37.528839,8.047152e+06,32.364212,1017.245972,36.486595,1.918501e+06,32.460518,1017.345825,37.436543,1.113044e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1259,1259,2021-05-06 01:42:08,0.0,1.903379e+09,1798936.0,43.111298,1016.934692,18.453411,3.903276e+07,4.0,...,19.323378,3.466596e+07,41.469479,1017.078735,19.241982,1.736053e+07,41.609013,1017.226318,20.130537,4.563788e+07
1260,1260,2021-05-06 01:42:09,0.0,1.903379e+09,1799728.0,42.828583,1016.894409,18.551609,3.618774e+07,0.0,...,19.397617,3.218861e+07,41.126431,1017.077026,19.319780,1.614187e+07,41.280731,1017.213562,20.193075,4.265278e+07
1261,1261,2021-05-06 01:42:09,0.0,1.903379e+09,1800708.0,43.626244,1016.907104,18.760057,1.890607e+06,1.0,...,19.608330,1.683172e+06,41.921909,1017.059753,19.534370,9.063952e+05,42.063557,1017.226624,20.436197,2.375870e+06
1262,1262,2021-05-06 01:42:11,0.0,1.903379e+09,1802108.0,44.317890,1016.883911,18.809307,3.150769e+05,2.0,...,19.689693,2.962106e+05,42.699993,1017.100586,19.601068,1.658031e+05,42.836285,1017.210999,20.520655,3.368421e+05


In [36]:
columns_to_delete = ['sensor_index_sensor_0','sensor_id_sensor_0','heater_profile_step_index_sensor_0',
                    'scanning_enabled_sensor_0','label_tab_sensor_0','error_code_sensor_0',
                    'time_since_power_on_sensor_0']
df_all.drop(columns_to_delete, axis=1, inplace=True)
df_all.dropna()

Unnamed: 0,index,real_time_clock,temperature_sensor_0,pressure_sensor_0,relative_humidity_sensor_0,resistance_gas_sensor_sensor_0,exp_unique_id,temperature_sensor_1,pressure_sensor_1,relative_humidity_sensor_1,...,relative_humidity_sensor_5,resistance_gas_sensor_sensor_5,temperature_sensor_6,pressure_sensor_6,relative_humidity_sensor_6,resistance_gas_sensor_sensor_6,temperature_sensor_7,pressure_sensor_7,relative_humidity_sensor_7,resistance_gas_sensor_sensor_7
0,0,2021-05-06 01:12:12,30.099798,1016.898499,42.087475,2.721595e+06,0.0,29.919815,1016.762390,43.269604,...,43.085331,3.674366e+06,29.968113,1017.148804,41.818417,1.049180e+06,30.034039,1017.266785,42.907742,7.109568e+06
1,1,2021-05-06 01:12:13,31.566181,1016.925476,41.819595,1.262950e+05,0.0,31.497604,1016.764893,42.967094,...,42.782749,1.600500e+05,31.611074,1017.204773,41.542645,5.756690e+04,31.657578,1017.319336,42.577545,3.488927e+05
2,2,2021-05-06 01:12:14,32.590897,1016.964600,40.720627,6.296114e+04,0.0,32.737137,1016.797424,41.771393,...,41.571884,7.987520e+04,32.843937,1017.202881,40.378387,2.968460e+04,32.892296,1017.339111,41.434841,9.767264e+04
3,3,2021-05-06 01:12:17,33.131020,1016.918396,38.084511,1.214421e+05,0.0,33.466724,1016.797729,38.814636,...,38.632622,1.488805e+05,33.532455,1017.213196,37.532791,5.649967e+04,33.654850,1017.348083,38.509724,1.767956e+05
4,4,2021-05-06 01:12:18,32.050774,1016.958496,37.163151,5.416198e+06,0.0,32.348362,1016.799561,37.724083,...,37.528839,8.047152e+06,32.364212,1017.245972,36.486595,1.918501e+06,32.460518,1017.345825,37.436543,1.113044e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1259,1259,2021-05-06 01:42:08,43.111298,1016.934692,18.453411,3.903276e+07,251.0,43.502319,1016.749146,18.371756,...,19.323378,3.466596e+07,41.469479,1017.078735,19.241982,1.736053e+07,41.609013,1017.226318,20.130537,4.563788e+07
1260,1260,2021-05-06 01:42:09,42.828583,1016.894409,18.551609,3.618774e+07,252.0,43.174095,1016.729431,18.450209,...,19.397617,3.218861e+07,41.126431,1017.077026,19.319780,1.614187e+07,41.280731,1017.213562,20.193075,4.265278e+07
1261,1261,2021-05-06 01:42:09,43.626244,1016.907104,18.760057,1.890607e+06,252.0,43.918915,1016.687500,18.684637,...,19.608330,1.683172e+06,41.921909,1017.059753,19.534370,9.063952e+05,42.063557,1017.226624,20.436197,2.375870e+06
1262,1262,2021-05-06 01:42:11,44.317890,1016.883911,18.809307,3.150769e+05,252.0,44.691517,1016.703979,18.754587,...,19.689693,2.962106e+05,42.699993,1017.100586,19.601068,1.658031e+05,42.836285,1017.210999,20.520655,3.368421e+05


In [37]:
first_column = df_all.pop('exp_unique_id')
df_all.insert(0, 'exp_unique_id', first_column)
df_all.drop('index', axis=1, inplace=True)
df_all.reset_index(inplace=True,drop=True)
df_all.dropna()


Unnamed: 0,exp_unique_id,real_time_clock,temperature_sensor_0,pressure_sensor_0,relative_humidity_sensor_0,resistance_gas_sensor_sensor_0,temperature_sensor_1,pressure_sensor_1,relative_humidity_sensor_1,resistance_gas_sensor_sensor_1,...,relative_humidity_sensor_5,resistance_gas_sensor_sensor_5,temperature_sensor_6,pressure_sensor_6,relative_humidity_sensor_6,resistance_gas_sensor_sensor_6,temperature_sensor_7,pressure_sensor_7,relative_humidity_sensor_7,resistance_gas_sensor_sensor_7
0,0.0,2021-05-06 01:12:12,30.099798,1016.898499,42.087475,2.721595e+06,29.919815,1016.762390,43.269604,2.127792e+06,...,43.085331,3.674366e+06,29.968113,1017.148804,41.818417,1.049180e+06,30.034039,1017.266785,42.907742,7.109568e+06
1,0.0,2021-05-06 01:12:13,31.566181,1016.925476,41.819595,1.262950e+05,31.497604,1016.764893,42.967094,1.124780e+05,...,42.782749,1.600500e+05,31.611074,1017.204773,41.542645,5.756690e+04,31.657578,1017.319336,42.577545,3.488927e+05
2,0.0,2021-05-06 01:12:14,32.590897,1016.964600,40.720627,6.296114e+04,32.737137,1016.797424,41.771393,5.687625e+04,...,41.571884,7.987520e+04,32.843937,1017.202881,40.378387,2.968460e+04,32.892296,1017.339111,41.434841,9.767264e+04
3,0.0,2021-05-06 01:12:17,33.131020,1016.918396,38.084511,1.214421e+05,33.466724,1016.797729,38.814636,1.088898e+05,...,38.632622,1.488805e+05,33.532455,1017.213196,37.532791,5.649967e+04,33.654850,1017.348083,38.509724,1.767956e+05
4,0.0,2021-05-06 01:12:18,32.050774,1016.958496,37.163151,5.416198e+06,32.348362,1016.799561,37.724083,5.832680e+06,...,37.528839,8.047152e+06,32.364212,1017.245972,36.486595,1.918501e+06,32.460518,1017.345825,37.436543,1.113044e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1259,251.0,2021-05-06 01:42:08,43.111298,1016.934692,18.453411,3.903276e+07,43.502319,1016.749146,18.371756,2.628274e+07,...,19.323378,3.466596e+07,41.469479,1017.078735,19.241982,1.736053e+07,41.609013,1017.226318,20.130537,4.563788e+07
1260,252.0,2021-05-06 01:42:09,42.828583,1016.894409,18.551609,3.618774e+07,43.174095,1016.729431,18.450209,2.463759e+07,...,19.397617,3.218861e+07,41.126431,1017.077026,19.319780,1.614187e+07,41.280731,1017.213562,20.193075,4.265278e+07
1261,252.0,2021-05-06 01:42:09,43.626244,1016.907104,18.760057,1.890607e+06,43.918915,1016.687500,18.684637,1.354050e+06,...,19.608330,1.683172e+06,41.921909,1017.059753,19.534370,9.063952e+05,42.063557,1017.226624,20.436197,2.375870e+06
1262,252.0,2021-05-06 01:42:11,44.317890,1016.883911,18.809307,3.150769e+05,44.691517,1016.703979,18.754587,2.194599e+05,...,19.689693,2.962106e+05,42.699993,1017.100586,19.601068,1.658031e+05,42.836285,1017.210999,20.520655,3.368421e+05


In [38]:
df_all

Unnamed: 0,exp_unique_id,real_time_clock,temperature_sensor_0,pressure_sensor_0,relative_humidity_sensor_0,resistance_gas_sensor_sensor_0,temperature_sensor_1,pressure_sensor_1,relative_humidity_sensor_1,resistance_gas_sensor_sensor_1,...,relative_humidity_sensor_5,resistance_gas_sensor_sensor_5,temperature_sensor_6,pressure_sensor_6,relative_humidity_sensor_6,resistance_gas_sensor_sensor_6,temperature_sensor_7,pressure_sensor_7,relative_humidity_sensor_7,resistance_gas_sensor_sensor_7
0,0.0,2021-05-06 01:12:12,30.099798,1016.898499,42.087475,2.721595e+06,29.919815,1016.762390,43.269604,2.127792e+06,...,43.085331,3.674366e+06,29.968113,1017.148804,41.818417,1.049180e+06,30.034039,1017.266785,42.907742,7.109568e+06
1,0.0,2021-05-06 01:12:13,31.566181,1016.925476,41.819595,1.262950e+05,31.497604,1016.764893,42.967094,1.124780e+05,...,42.782749,1.600500e+05,31.611074,1017.204773,41.542645,5.756690e+04,31.657578,1017.319336,42.577545,3.488927e+05
2,0.0,2021-05-06 01:12:14,32.590897,1016.964600,40.720627,6.296114e+04,32.737137,1016.797424,41.771393,5.687625e+04,...,41.571884,7.987520e+04,32.843937,1017.202881,40.378387,2.968460e+04,32.892296,1017.339111,41.434841,9.767264e+04
3,0.0,2021-05-06 01:12:17,33.131020,1016.918396,38.084511,1.214421e+05,33.466724,1016.797729,38.814636,1.088898e+05,...,38.632622,1.488805e+05,33.532455,1017.213196,37.532791,5.649967e+04,33.654850,1017.348083,38.509724,1.767956e+05
4,0.0,2021-05-06 01:12:18,32.050774,1016.958496,37.163151,5.416198e+06,32.348362,1016.799561,37.724083,5.832680e+06,...,37.528839,8.047152e+06,32.364212,1017.245972,36.486595,1.918501e+06,32.460518,1017.345825,37.436543,1.113044e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1264,,NaT,,,,,,,,,...,,,,,,,,,,
1265,,NaT,,,,,,,,,...,,,,,,,,,,
1266,,NaT,,,,,,,,,...,,,,,,,,,,
1267,,NaT,,,,,,,,,...,,,,,,,,,,


In [39]:
df_all.to_csv('structure_data.csv')

# Training test