# Welcome
___
This is the notebook used in part 4 of lab 5, for converting data from the JSON objects given in the simulation into a pandas dataframe that is used for data analysis.

The data is all output to a single text file that contains many json objects. We need to read in each JSON object, determine if it contains the data that we are interested in from the simulation, and if it does store that data in a data frame.

In [2]:
import json
import pandas as pd
import numpy as np

We first open the log file and begin parsing the json objects:

In [3]:
json_dicts = []
blank_count = 0
with open(r"African Savanna - v0.41 (Windows)\SimResults\log_221129_192514.txt") as f:
    for line in f:
        try:
            cleaned = line.strip().replace(' ', '')
            json_dicts.append(json.loads(cleaned))
        except Exception as e:
            blank_count = blank_count + 1
            
print("Completed.")
print(f"Skipped parsing for {blank_count} blank json entries")

Completed.
Skipped parsing for 1320 blank json entries


We can verify that the json data has been converted and appended to the list by printing the results and asserting that they look correct:

In [4]:
print(json_dicts[0:2])

[[{'deviceId': '6386981d996cc6001debfec6_5', 'timestamp': 68.5, 'sensors': [{'id': 1669765557628, 'type': 31, 'input': {'pulseOxygen': [36, 30, 24.24908, 0.4948791]}}]}], {'status': 'OK', 'result': [{'id': '6386981d996cc6001debfec6_5', 'result': [{'timestamp': 65.43766, '1669765154374': {'id': 1669765154374, 'type': 2, 'name': 'Arduino', 'info': {'current': None, 'voltage': 0.0}, 'pin': 3}}, {'timestamp': 65.43766, '1669765154374': {'id': 1669765154374, 'type': 2, 'name': 'Arduino', 'info': {'current': 0.0, 'voltage': 0.0}, 'pin': 22}}, {'timestamp': 65.43766, '1669765154374': {'id': 1669765154374, 'type': 2, 'name': 'Arduino', 'info': {'current': 0.0, 'voltage': 0.0}, 'pin': 21}}, {'timestamp': 65.43766, '1669765154374': {'id': 1669765154374, 'type': 2, 'name': 'Arduino', 'info': {'current': 0.0, 'voltage': 5.0}, 'pin': 24}}, {'timestamp': 65.43766, '1669765543730': {'id': 1669765543730, 'type': 10, 'name': 'Resistor', 'info': {'current': 0.0, 'voltage': 0.0}}}, {'timestamp': 65.43766

The results of creating the json dictionary list seems sufficient and we can proceed with conversion of data from json objects into a pandas dataframe.

What information do we need from the sim results json objects?

        - device id / device type
        - animal id    
        - Location information (X, Y coordinates)
        - Pulseoximeter information (oxygen level, heart rate)
        - Temperature information (temp)
        - Sound information (sound volume level)

In [54]:
sound_data_list = []
pox_data_list = []
temp_data_list =[]
gps_data_list = []

# create lists of pertinent datapoints for every type of data collected
for json_dict in json_dicts:

    if isinstance(json_dict, list):
        continue

    if json_dict['status'] == "OK": # these are the only things that we want

        for thing in json_dict['result']:

            animal_num = thing['id'].split('_')[-1] # to get the animal number

            for instance in thing['result']:
                
                timestamp = instance['timestamp']
                timestamp = round(timestamp, 1)

                if 'message' in list(list(instance.values())[1]['info'].keys()):

                    device_type = json.loads(list(instance.values())[1]['info']['message'])['device']

                    if device_type == 'POX':
                        data = json.loads(list(instance.values())[1]['info']['message'])['data']
                        if data != 'Nodata.':
                            oxygen = float(data['oxygen'])
                            pulse = float(data['pulse'])

                            pox_data_list.append([timestamp, animal_num, oxygen, pulse])

                        

                    elif device_type == 'sound':

                        sound = float(json.loads(list(instance.values())[1]['info']['message'])['data'])

                        sound_data_list.append([timestamp, animal_num, sound])

                    elif device_type == 'temperature':

                        temperature = float(json.loads(list(instance.values())[1]['info']['message'])['data'])

                        temp_data_list.append([timestamp, animal_num, temperature])

                    elif device_type == 'GPS':

                        gpsX = float(json.loads(list(instance.values())[1]['info']['message'])['data'][0])
                        gpsY = float(json.loads(list(instance.values())[1]['info']['message'])['data'][1])

                        gps_data_list.append([timestamp, animal_num, gpsX, gpsY])

# create dataframes from list data
sounds_df = pd.DataFrame(sound_data_list, columns=['timestamp', 'animal-id', 'sound'])
pox_df = pd.DataFrame(pox_data_list, columns=['timestamp', 'animal-id', 'oxygen', 'pulse'])
temp_df = pd.DataFrame(temp_data_list, columns=['timestamp', 'animal-id', 'temperature'])
gps_df = pd.DataFrame(gps_data_list, columns=['timestamp', 'animal-id', 'gpsX', 'gpsY'])

# write data to csv files
sounds_df.to_csv('sound_data.csv', index=False)
pox_df.to_csv('pox_data.csv', index=False)
temp_df.to_csv('temp_data.csv', index=False)
gps_df.to_csv('gps_data.csv', index=False)


Unnamed: 0,timestamp,animal-id,sound
0,71.5,5,0.0
1,71.6,5,0.0
2,71.7,5,0.0
3,71.8,5,0.0
4,71.9,5,0.0
...,...,...,...
11535,384.3,28,0.0
11536,384.4,28,0.0
11537,384.5,28,0.0
11538,384.6,28,0.0
