In [1]:
txt_dates = ["20121106", "20121106", "20130808", "20130808", "20130814", "20130814", "20130904", "20130904", "20130916", "20130916", "20130917", "20130917", "20130926", "20130926", "20131031", "20131101", "20120907", "20120907", "20120907", "20120912", "20120920", "20120923", "20120923", "20120927", "20120927", "20121006", "20130801", "20130801", "20130816", "20130816", "20130820", "20130821", "20130821", "20130825", "20130825", "20130830", "20130830", "20130905", "20130905", "20130908", "20130908", "20130916", "20130917", "20130917", "20130919", "20130920", "20130920", "20130926", "20130926", "20140827", "20140827", "20140829", "20140829", "20140903", "20140903", "20140906", "20140906", "20140912", "20140912", "20140915", "20140915", "20140917", "20140917", "20140919", "20140919", "20140923", "20140923", "20140929", "20140929", "20140930", "20140930"]

In [9]:
import numpy as np
from copy import deepcopy
import json

# variable declarations

model = {
    "id": "Flight Track",
    "name": "ER2",
    "availability": "{}/{}",
    "model": {
        "gltf": "https://s3.amazonaws.com/visage-czml/iphex_HIWRAP/img/er2.gltf",
        "scale": 100.0,
        "minimumPixelSize": 32,
        "maximumScale": 150.0
    },
    "position": {
        "cartographicDegrees": []
    },
    "path": {
        "material": {
            "solidColor": {
                "color": {
                    "rgba": [0, 255, 128, 255]
                }
            }
        },
        "width": 1,
        "resolution": 5
    },
    "properties": {
        "roll": {},
        "pitch": {},
        "heading": {}
    }
}

czml_head = {
    "id": "document",
    "name": "wall czml",
    "version": "1.0"
}

# class declaration

class FlightTrackCzmlWriter:

    def __init__(self, length):
        self.model = deepcopy(model)
        self.length = length
        self.model['position']['cartographicDegrees'] = [0] * 4 * length
        self.model['properties']['roll']['number'] = [0] * 2 * length
        self.model['properties']['pitch']['number'] = [0] * 2 * length
        self.model['properties']['heading']['number'] = [0] * 2 * length

    def set_time(self, time_window, time_steps):
        epoch = time_window[0]
        end = time_window[1]
        self.model['availability'] = "{}/{}".format(epoch, end)
        self.model['position']['epoch'] = epoch
        self.model['position']['cartographicDegrees'][0::4] = time_steps
        self.model['properties']['roll']['epoch'] = epoch
        self.model['properties']['pitch']['epoch'] = epoch
        self.model['properties']['heading']['epoch'] = epoch
        self.model['properties']['roll']['number'][0::2] = time_steps
        self.model['properties']['pitch']['number'][0::2] = time_steps
        self.model['properties']['heading']['number'][0::2] = time_steps

    def set_position(self, longitude, latitude, altitude):
        self.model['position']['cartographicDegrees'][1::4] = longitude
        self.model['position']['cartographicDegrees'][2::4] = latitude
        self.model['position']['cartographicDegrees'][3::4] = altitude

    def set_orientation(self, roll, pitch, heading):
        self.model['properties']['roll']['number'][1::2] = roll
        self.model['properties']['pitch']['number'][1::2] = pitch
        self.model['properties']['heading']['number'][1::2] = heading

    def set_with_df(self, df):
        self.set_time(*self.get_time_info(df['timestamp']))
        self.set_position(df['lon'], df['lat'], df['height_msl'])
        self.set_orientation(df['roll'], df['pitch'], df['track'])

    def get_time_info(self, time):
        time_window = time[[0, -1]].astype(np.string_)
        time_window = np.core.defchararray.add(time_window, np.string_('Z'))
        time_window = np.core.defchararray.decode(time_window, 'UTF-8')
        time_steps = (time - time[0]).astype(int)
        return time_window, time_steps

    def get_string(self):
        return json.dumps([czml_head, self.model])

# class declaration

class FlightTrackReader:
    """
    Reads the Level L1 instrument data
    """

    def __init__(self, rni):
        """
        Initiate the object. Needs (rni) row_name_index_map (a hash)

        Args:
            rni (dictonary): dictonary of row_name_index_map. It formed by the column name as key and its position in the L1 data as value.
                             Needed to know position of data column to take during read.
        """
        self.converters = {}
        for i in range(33):
            # initially converters ignores all columns data. 33 cols for flight nav dataset 
            self.converters[i] = self.ignore
        # not ignore them according to need
        self.converters[rni["time"]] = self.string_to_date
        self.converters[rni["latitude"]] = self.string_to_float
        self.converters[rni["longitude"]] = self.string_to_float
        self.converters[rni["altitude"]] = self.string_to_float
        self.converters[rni["heading"]] = self.string_to_float
        self.converters[rni["pitch"]] = self.string_to_float
        self.converters[rni["roll"]] = self.string_to_float
        self.row_name_index_map = rni # future ref.

    def read_csv(self, infile):
        """
        Read the level L1 data from text file.

        Args:
            infile (generator): data that is read from the file (s3)
        """
        rni = self.row_name_index_map
        data = np.loadtxt(infile, delimiter=',', converters=self.converters)
        time = data[:, rni["time"]]
        latitude = data[:, rni["latitude"]]
        longitude = data[:, rni["longitude"]]
        altitude = data[:, rni["altitude"]]
        heading = data[:, rni["heading"]] * np.pi / 180. - np.pi / 2.
        pitch = data[:, rni["pitch"]] * np.pi / 180.
        roll = data[:, rni["roll"]] * np.pi / 180.

        mask = np.logical_not(np.isnan(latitude))
        mask = np.logical_and(mask, np.logical_not(np.isnan(longitude)))
        mask = np.logical_and(mask, np.logical_not(np.isnan(altitude)))
        mask = np.logical_and(mask, np.logical_not(np.isnan(heading)))
        mask = np.logical_and(mask, np.logical_not(np.isnan(pitch)))
        mask = np.logical_and(mask, np.logical_not(np.isnan(roll)))

        _, unique_idx = np.unique(time, return_index=True)
        unique = np.copy(mask)
        unique[:] = False
        unique[unique_idx] = True

        mask = np.logical_and(mask, unique)

        time = time[mask].astype('datetime64[s]')
        time_window = time[[0, -1]].astype(np.string_)
        time_window = np.core.defchararray.add(time_window, np.string_('Z'))
        self.time_window = np.core.defchararray.decode(time_window, 'UTF-8')
        self.time_steps = (time - time[0]).astype(int).tolist()[::5]
        self.latitude = latitude[mask][::5]
        self.longitude = longitude[mask][::5]
        self.altitude = altitude[mask][::5]
        self.heading = heading[mask][::5]
        self.pitch = pitch[mask][::5]
        self.roll = roll[mask][::5]
        self.length = mask[mask][::5].size

    """
    Below are functions needed for column converters during numpy loadtext
    """

    def string_to_float(self, str):
        value = np.nan
        try:
            value = float(str)
        except:
            pass
        return value

    def string_to_date(self, str):
        time = np.datetime64(str, 's')
        return time.astype(np.int64)

    def ignore(self, value):
        return np.nan


In [14]:
"""
nav_to_czml takes in nav data from various aricrafts (er-2 and dc-8) of olympex camapign.
The generated czml can be used to plot the flight track in the CESIUM.
"""

import boto3
import os

from nav_reader_writer import FlightTrackCzmlWriter 
from nav_reader_writer import FlightTrackReader

def data_pre_process(bucket_name="ghrc-fcx-field-campaigns-szg", field_campaign = "Olympex", input_data_dir = "instrument-raw-data", output_data_dir = "instrument-processed-data", instrument_name = "nav", row_name_index_map={}):
    """
    gets raw file path to s3 defined path.
    converts it to czml.
    puts converted file to s3 defined path.

    Args:
        bucket_name (str, optional): source bucket. Defaults to "ghrc-fcx-field-campaigns-szg".
        field_campaign (str, optional): name of field campaign. Case sensitive. Defaults to "Olympex".
        input_data_dir (str, optional): folder name where raw data sits. Case sensitive. Defaults to "instrument-raw-data".
        output_data_dir (str, optional): folder name where converted data will be stored. Case sensitive. Defaults to "instrument-processed-data".
        instrument_name (str, optional): instrument from which data is collected. Defaults to "nav_er2".
        row_name_index_map (hash): Hash formed by the column name as key and its position in the L1 data as value. Needed to know position of data column to take during read.
    """
    s3_resource = boto3.resource('s3')
    s3bucket = s3_resource.Bucket(bucket_name)    
    keys = []
    for obj in s3bucket.objects.filter(
            Prefix=f"{field_campaign}/{input_data_dir}/{instrument_name}/hs3_navgh"):
        keys.append(obj.key)

    result = keys

    result = sorted(result, reverse=True) # when multiple data for a single date, convert first one later
    # TODO: when multiple datafiles available for a single day, first merge them. Then create czml with the merged file.

    s3_client = boto3.client('s3')
    for infile in result:
        s3_file = s3_client.get_object(Bucket=bucket_name, Key=infile)
        data = s3_file['Body'].iter_lines()
        reader = FlightTrackReader(row_name_index_map)
        reader.read_csv(data)

        writer = FlightTrackCzmlWriter(reader.length)
        writer.set_time(reader.time_window, reader.time_steps)
        writer.set_position(reader.longitude, reader.latitude, reader.altitude)
        writer.set_orientation(reader.roll, reader.pitch, reader.heading)

        output_czml = writer.get_string()
        output_name = os.path.splitext(os.path.basename(infile))[0]
        output_name_wo_time = output_name.split("-")[0];
        outfile = f"{field_campaign}/{output_data_dir}/{instrument_name}/{output_name_wo_time}.czml"
        s3_client.put_object(Body=output_czml, Bucket=bucket_name, Key=outfile)
        print(infile+" conversion done.")

def globalHawk():
    # bucket_name = os.getenv('RAW_DATA_BUCKET')
    bucket_name="ghrc-fcx-field-campaigns-szg"
    field_campaign = "Hs3"
    input_data_dir = "instrument-raw-data"
    output_data_dir = "instrument-processed-data"
    instrument_name = "nav"
    # modify "row_name_index_map" according to the data manual and the data availability
    row_name_index_map = {
        "time": 1,
        "latitude": 2,
        "longitude": 3,
        "altitude": 5,
        "heading": 13,
        "pitch": 16,
        "roll": 17
    }
    data_pre_process(bucket_name, field_campaign, input_data_dir, output_data_dir, instrument_name, row_name_index_map)

globalHawk()

Hs3/instrument-raw-data/nav/hs3_navgh_N871NA_IWG1_20130808-1804.txt conversion done.
Hs3/instrument-raw-data/nav/hs3_navgh_N871NA_IWG1_20130808-1753.txt conversion done.
Hs3/instrument-raw-data/nav/hs3_navgh_N871NA_IWG1_20121106-2323.txt conversion done.
Hs3/instrument-raw-data/nav/hs3_navgh_N871NA_IWG1_20121106-2151.txt conversion done.
Hs3/instrument-raw-data/nav/hs3_navgh_N871NA_IWG1_20121106-2103.txt conversion done.
