# Convert NASA data to Orion format

In this notebook we download the data from the Telemanom S3 bucket and reformat it
as Orion pipelines expect.

## Download the data

In [1]:
import io
import os
import urllib
import zipfile

DATA_URL = 'https://s3-us-west-2.amazonaws.com/telemanom/data.zip'

if not os.path.exists('data'):
    response = urllib.request.urlopen(DATA_URL)
    bytes_io = io.BytesIO(response.read())
    
    with zipfile.ZipFile(bytes_io) as zf:
        zf.extractall()

In [2]:
train_signals = os.listdir('data/train')
test_signals = os.listdir('data/test')

In [3]:
train_signals == test_signals

True

## Convert the NPY matrices to CSVs

We convert the NPY matrices to CSV files with two columns: `timestamp` and `value`.

For this, what we do is loading both the train and test matrices for each signals
and concantenate them to generate a single matrix for each signal.

Afterwards, we add a timestamp column by taking the value 1222819200 (2008-10-01T00:00:00)
as for the first row and then increasing the timestamp by 21600 seconds (6h) for each other row.

In [5]:
import pandas as pd
import numpy as np

In [6]:
def build_df(signal):
    train = np.load('data/train/' + signal)
    test = np.load('data/test/' + signal)
    value = np.concatenate([train[:, 0], test[:, 0]])
    index = np.array(range(len(value)))
    timestamp = index * 21600 + 1222819200
    
    data = pd.DataFrame({'timestamp': timestamp, 'value': value})
    
    return data

data = build_df('S-1.npy')

In [7]:
data.head()

Unnamed: 0,timestamp,value
0,1222819200,-0.366359
1,1222840800,-0.394108
2,1222862400,0.403625
3,1222884000,-0.362759
4,1222905600,-0.370746


## Store the results as CSV

In [8]:
os.makedirs('csv', exist_ok=True)

In [9]:
for signal in train_signals:
    data = build_df(signal)
    name = signal[:-4]
    data.to_csv('csv/' + name + '.csv', index=False)

In [10]:
nasa = pd.read_csv('csv/S-1.csv')

In [11]:
nasa.head()

Unnamed: 0,timestamp,value
0,1222819200,-0.366359
1,1222840800,-0.394108
2,1222862400,0.403625
3,1222884000,-0.362759
4,1222905600,-0.370746
