In [1]:
import pandas as pd
import json
from matplotlib import pyplot as plt

In [2]:
json_file_path = 'bosch_data.json'

In [3]:
with open(json_file_path, 'r') as j:
     contents = json.loads(j.read())

In [4]:
data = contents['rawDataBody']
df = pd.json_normalize(data, record_path =['dataBlock'])
# 
df = df.rename(columns={df.columns[0]: 'sensor_index',  
                        df.columns[1]: 'sensor_id',
                        df.columns[2]: 'time_since_power_on',
                        df.columns[3]: 'real_time_clock',
                        df.columns[4]: 'temperature',
                        df.columns[5]: 'pressure',
                        df.columns[6]: 'relative_humidity',
                        df.columns[7]: 'resistance_gas_sensor',
                        df.columns[8]: 'heater_profile_step_index',
                        df.columns[9]: 'scanning_enabled',
                        df.columns[10]: 'label_tab',
                        df.columns[11]: 'error_code'
})
df['real_time_clock'] = pd.to_datetime(df['real_time_clock'], unit='s', origin='unix') 
df.head()

Unnamed: 0,sensor_index,sensor_id,time_since_power_on,real_time_clock,temperature,pressure,relative_humidity,resistance_gas_sensor,heater_profile_step_index,scanning_enabled,label_tab,error_code
0,0,1903378739,3290,2021-05-02 06:07:15,28.108484,1015.324036,44.888802,6599.298828,0,1,0,0
1,1,1903366707,3307,2021-05-02 06:07:15,27.895237,1015.259094,45.912663,5684.846191,0,1,0,0
2,2,1903378227,3311,2021-05-02 06:07:15,28.323999,1015.64032,45.144131,6175.22168,0,1,0,0
3,3,1903405359,3315,2021-05-02 06:07:15,28.041586,1015.897949,44.812817,5802.357422,0,1,0,0
4,4,1903388711,3319,2021-05-02 06:07:15,28.284918,1015.644653,43.40202,5684.846191,0,1,0,0


In [5]:
num_sensor = df['sensor_index'].nunique()
print(num_sensor)
num_steps = df['heater_profile_step_index'].nunique()
print(num_steps)

8
10


In [6]:
def parse_df(df):
    df_sensor_list = []

    for i in range(num_sensor):
        df_exp_list = []
        exp_unique_id = 0

        df_sensor = df[df['sensor_index']==i].set_index('real_time_clock')
        df_sensor = df_sensor.add_suffix(f'_sensor_{i}')

        for i in range(0, df_sensor.shape[0], num_steps):
            df_temp = df_sensor[i:i+num_steps]
            df_temp['exp_unique_id'] = exp_unique_id
            exp_unique_id += 1
            df_exp_list.append(df_temp)
        
        df_merge = pd.concat(df_exp_list, axis=0)
        df_sensor_list.append(df_merge)
    
    return df_sensor_list


In [7]:
df_sensor_list = parse_df(df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]


In [14]:
df_dict = {}
for i in range(num_sensor):
    df_dict[f'sensor_{i}'] = df_sensor_list[i].reset_index()
    


In [15]:
df_dict['sensor_0']

Unnamed: 0,real_time_clock,sensor_index_sensor_0,sensor_id_sensor_0,time_since_power_on_sensor_0,temperature_sensor_0,pressure_sensor_0,relative_humidity_sensor_0,resistance_gas_sensor_sensor_0,heater_profile_step_index_sensor_0,scanning_enabled_sensor_0,label_tab_sensor_0,error_code_sensor_0,exp_unique_id
0,2021-05-02 06:07:15,0,1903378739,3290,28.108484,1015.324036,44.888802,6.599299e+03,0,1,0,0,0
1,2021-05-02 06:07:16,0,1903378739,3569,28.012579,1015.395508,44.785908,6.717397e+04,1,1,0,0,0
2,2021-05-02 06:07:17,0,1903378739,4969,28.262440,1015.385681,44.020065,6.727990e+04,2,1,0,0,0
3,2021-05-02 06:07:21,0,1903378739,9169,28.613251,1015.371216,41.526806,6.463014e+04,3,1,0,0,0
4,2021-05-02 06:07:22,0,1903378739,9869,29.148304,1015.372314,41.253151,1.623130e+04,4,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10203,2021-05-02 09:11:15,0,1903378739,11041725,35.697926,1013.757996,23.853655,3.050314e+07,3,1,0,0,1020
10204,2021-05-02 09:11:16,0,1903378739,11042425,36.366795,1013.803589,23.995747,1.534657e+06,4,1,0,0,1020
10205,2021-05-02 09:11:16,0,1903378739,11043124,36.561146,1013.810547,24.021280,1.331600e+06,5,1,0,0,1020
10206,2021-05-02 09:11:17,0,1903378739,11043825,36.631817,1013.819214,24.005022,1.202584e+06,6,1,0,0,1020


In [12]:
pd.concat([df_dict['sensor_0'], df_dict['sensor_2'], df_dict['sensor_4'], df_dict['sensor_6']], axis=1)

Unnamed: 0,real_time_clock,sensor_index_sensor_0,sensor_id_sensor_0,time_since_power_on_sensor_0,temperature_sensor_0,pressure_sensor_0,relative_humidity_sensor_0,resistance_gas_sensor_sensor_0,heater_profile_step_index_sensor_0,scanning_enabled_sensor_0,...,time_since_power_on_sensor_6,temperature_sensor_6,pressure_sensor_6,relative_humidity_sensor_6,resistance_gas_sensor_sensor_6,heater_profile_step_index_sensor_6,scanning_enabled_sensor_6,label_tab_sensor_6,error_code_sensor_6,exp_unique_id
0,2021-05-02 06:07:15,0.0,1.903379e+09,3290.0,28.108484,1015.324036,44.888802,6599.298828,0.0,1.0,...,3326.0,28.228247,1015.742737,43.992817,5684.846191,0.0,1.0,0.0,0.0,0.0
1,2021-05-02 06:07:16,0.0,1.903379e+09,3569.0,28.012579,1015.395508,44.785908,67173.968750,1.0,1.0,...,3606.0,28.218307,1015.792664,43.931282,29418.525391,1.0,1.0,0.0,0.0,0.0
2,2021-05-02 06:07:17,0.0,1.903379e+09,4969.0,28.262440,1015.385681,44.020065,67279.898438,2.0,1.0,...,5006.0,28.588648,1015.787109,43.216057,30103.480469,2.0,1.0,0.0,0.0,0.0
3,2021-05-02 06:07:21,0.0,1.903379e+09,9169.0,28.613251,1015.371216,41.526806,64630.144531,3.0,1.0,...,9206.0,29.053436,1015.782898,40.572002,29337.611328,3.0,1.0,0.0,0.0,0.0
4,2021-05-02 06:07:22,0.0,1.903379e+09,9869.0,29.148304,1015.372314,41.253151,16231.295898,4.0,1.0,...,9906.0,29.605225,1015.724915,40.278606,7766.990234,4.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10242,NaT,,,,,,,,,,...,,,,,,,,,,
10243,NaT,,,,,,,,,,...,,,,,,,,,,
10244,NaT,,,,,,,,,,...,,,,,,,,,,
10245,NaT,,,,,,,,,,...,,,,,,,,,,


In [16]:
pd.concat([df_dict['sensor_1'], df_dict['sensor_3'], df_dict['sensor_5'], df_dict['sensor_7']], axis=1)

Unnamed: 0,real_time_clock,sensor_index_sensor_1,sensor_id_sensor_1,time_since_power_on_sensor_1,temperature_sensor_1,pressure_sensor_1,relative_humidity_sensor_1,resistance_gas_sensor_sensor_1,heater_profile_step_index_sensor_1,scanning_enabled_sensor_1,...,time_since_power_on_sensor_7,temperature_sensor_7,pressure_sensor_7,relative_humidity_sensor_7,resistance_gas_sensor_sensor_7,heater_profile_step_index_sensor_7,scanning_enabled_sensor_7,label_tab_sensor_7,error_code_sensor_7,exp_unique_id
0,2021-05-02 06:07:15,1,1903366707,3307,27.895237,1015.259094,45.912663,5684.846191,0,1,...,3330,28.236313,1015.705627,44.802956,8.097166e+03,0,1,0,0,0
1,2021-05-02 06:07:16,1,1903366707,3587,27.892712,1015.321411,45.839741,49430.390625,1,1,...,3610,28.216116,1015.788757,44.697643,9.756098e+04,1,1,0,0,0
2,2021-05-02 06:07:17,1,1903366707,4987,28.253698,1015.290833,45.056477,49545.191406,2,1,...,5010,28.609995,1015.802612,44.026154,9.443010e+04,2,1,0,0,0
3,2021-05-02 06:07:21,1,1903366707,9187,28.740910,1015.289734,42.244095,46715.328125,3,1,...,9210,29.122547,1015.782715,41.398540,8.704523e+04,3,1,0,0,0
4,2021-05-02 06:07:22,1,1903366707,9887,29.265987,1015.299683,41.943020,12112.036133,4,1,...,9910,29.672977,1015.791687,41.093056,2.122016e+04,4,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3445,2021-05-02 09:10:50,1,1903366707,11016919,36.872391,1013.661560,24.682991,769924.812500,5,1,...,11019387,39.679745,1014.146301,21.523655,1.438202e+06,5,1,0,0,344
3446,2021-05-02 09:10:51,1,1903366707,11017619,36.973377,1013.661865,24.664001,687940.875000,6,1,...,11020086,39.793381,1014.121155,21.494440,1.311140e+06,6,1,0,0,344
3447,2021-05-02 09:10:52,1,1903366707,11018180,37.425285,1013.655151,24.623985,146704.875000,7,1,...,11020786,40.283268,1014.110291,21.454405,2.252530e+05,7,1,0,0,344
3448,2021-05-02 09:10:52,1,1903366707,11018880,37.647457,1013.646667,24.539194,174624.828125,8,1,...,11021486,40.515587,1014.149353,21.385635,2.629012e+05,8,1,0,0,344


In [None]:
# df1 = df_sensor_list[1].reset_index()
# df2 = df_sensor_list[2].reset_index()
# df3 = df_sensor_list[3].reset_index()
# df4 = df_sensor_list[4].reset_index()