# Gerando sére temporal para os dados de VTEC
___

Os dados de VTEC são disponibilizados no diretório data/tecmap_txt. O papel deste notebook é processar cada arquivo neste diretório, convertendo cada arquivo para uma matriz. A cada matriz será associado um instante. Combinando as várias matrizes, tem-se um série temporal.

In [1]:
import os
import sys
import numpy as np
import pandas as pd
import datetime

from pathos.multiprocessing import ProcessPool

import utils

from utils import local_tecmap_txt

In [2]:
files =  os.listdir(local_tecmap_txt)

In [3]:
def file_to_array(file): 
    date = None
    array = np.zeros((141,181))
    with open(os.path.join(local_tecmap_txt, file), "r") as text:
        date = text.readline().split()
        header = text.readline()
        data = []
        for line in text.readlines():
            elem_of_line = [float(m) for m in line.split()]
            i = int((elem_of_line[0] - 260.0)*2.0)
            j = int((elem_of_line[1] + 60.0)*2.0)
            if (elem_of_line[2] == 999.000):
                array[i][j] = np.nan
            else:
                array[i][j] = elem_of_line[2]
    
    year = int(date[1])
    month = int(date[2])
    day = int(date[3])
    hour = int(date[4])
    minute = int(date[5])
    
    index = datetime.datetime(year, month, day, hour, minute, 
                              tzinfo=datetime.timezone.utc)
    return (index, array)

In [4]:
%%time
data = ProcessPool(nodes=4).map(file_to_array, files)

CPU times: user 1.5 s, sys: 2.63 s, total: 4.12 s
Wall time: 2min 25s


In [5]:
labels = ['time-index', 'vtec']
# use orient='index' to create dataframe using dictionary keys as rows
data_series_tec = pd.DataFrame.from_records(data, columns=labels)

In [6]:
data_series_tec['time-index'] = pd.to_datetime(data_series_tec['time-index'])
data_series_tec.index = data_series_tec['time-index']
del data_series_tec['time-index']

In [7]:
%%time
data_series_tec.sort_index(inplace=True)

CPU times: user 889 µs, sys: 1.14 ms, total: 2.03 ms
Wall time: 1.66 ms


In [8]:
data_series_tec.head()

Unnamed: 0_level_0,vtec
time-index,Unnamed: 1_level_1
2013-12-01 00:00:00+00:00,"[[nan, nan, nan, nan, nan, nan, nan, nan, nan,..."
2013-12-01 00:10:00+00:00,"[[nan, nan, nan, nan, nan, nan, nan, nan, nan,..."
2013-12-01 00:20:00+00:00,"[[nan, nan, nan, nan, nan, nan, nan, nan, nan,..."
2013-12-01 00:30:00+00:00,"[[nan, nan, nan, nan, nan, nan, nan, nan, nan,..."
2013-12-01 00:40:00+00:00,"[[nan, nan, nan, nan, nan, nan, nan, nan, nan,..."


In [9]:
data_series_tec.tail()

Unnamed: 0_level_0,vtec
time-index,Unnamed: 1_level_1
2014-02-28 23:10:00+00:00,"[[nan, nan, nan, nan, nan, nan, nan, nan, nan,..."
2014-02-28 23:20:00+00:00,"[[nan, nan, nan, nan, nan, nan, nan, nan, nan,..."
2014-02-28 23:30:00+00:00,"[[nan, nan, nan, nan, nan, nan, nan, nan, nan,..."
2014-02-28 23:40:00+00:00,"[[nan, nan, nan, nan, nan, nan, nan, nan, nan,..."
2014-02-28 23:50:00+00:00,"[[nan, nan, nan, nan, nan, nan, nan, nan, nan,..."


In [10]:
data_series_tec.to_pickle("./data/data_series_tec.pkl.xz", compression='xz')