In [1]:
from datetime import datetime

import pandas as pd
import numpy as np

In [2]:
def as_dict(value=None, unit=None, f_field=None):
    """Create a dictionary from input pair key-value"""

    return {
        'value': value,
        'units': unit,
        'file field': f_field
    }

In [3]:
def get_data(uri):
    data = open(f_path, 'r')

    if data.readline().strip()[0] is '%':
        data.close()
        columns, data, metadata = mapper_new_data(uri)
    else:
        data.close()
        columns, data, metadata = mapper_old_data(uri)

    return columns, data, metadata


def _as_dict(value=None, unit=None, f_field=None):
    return {
        'value': value,
        'units': unit,
        'file field': f_field
    }


def mapper_new_data(uri):
    col_names = []
    data = []
    metadata = {
        'general': {},
        'time': {},
        'geolocation': {},
        'transmit': {},
        'pattern': {},
        'radial': {},
        'bragg': {},
        'other': {}                
                }
    tmp = {}

    # There are several tables in a rvl file. We need only first.
    # We also need to get an information about col names only for first table
    # The <tab_flag> is used for excepting of data overwriting in similar fields

    tab_flag = True

    with open(uri, 'r') as data_file:
        for line in data_file:
            line = line.strip()
            if line[0] is '%':
                try:
                    tag, value = [el.strip() for el in line.replace('%', '').split(': ')]
                except ValueError:
                    tag = line.replace('%', '')
                # General metadata parsing                
                if tag == 'FileType':
                    metadata['general']['type'] = _as_dict(value=value, 
                                                           f_field=tag)
                    
                elif tag == 'UUID':
                    metadata['general']['id'] = _as_dict(value=value, 
                                                         f_field=tag)
                    
                elif tag == 'Manufacturer':
                    metadata['general']['manufacture'] = _as_dict(value=value, 
                                                                  f_field=tag)
                
                elif tag == 'Site':
                    metadata['general']['site'] = _as_dict(value=value.split()[0], 
                                                           f_field=tag)
                
                # Time metadata
                elif tag == 'TimeStamp':
                    t_arr = value.split()
                    date_time = datetime(*[int(x) for x in t_arr])
                    metadata['time']['stamp'] = _as_dict(value=date_time, 
                                                         f_field=tag)
        
                # Table data and metadata parsing methods
                elif tag == 'TableColumnTypes' and tab_flag:
                    col_names = value.split()

                elif tag == 'TableColumns' and tab_flag:
                    tmp['col_num'] = int(value)
                
                elif tag == 'TableRows' and tab_flag:
                    tmp['row_num'] = int(value)
                
                elif tag == 'TableEnd:' and tab_flag:
                    tab_flag = False
                    
                # Other different metadata                   
                elif tag == 'DopplerResolutionHzPerBin':
                    metadata['other']['doppler_resolution'] = _as_dict(value=float(value), 
                                                                       unit='Hz/bin',
                                                                       f_field=tag)
                elif tag == 'RangeResolutionKMeters':
                    metadata['other']['range_resolution'] = _as_dict(value=float(value), 
                                                                     unit='km',
                                                                     f_field=tag)
                elif tag == 'CurrentVelocityLimit':
                    scale, unit = value.split()
                    metadata['other']['current_vel_lim'] = _as_dict(value=float(value), 
                                                                    unit=unit,
                                                                    f_field=tag)
    
            elif line[0] is not '%' and tab_flag:
                data.append(line.split())

        # Validation of data reading 
        if len(col_names) != tmp['col_num']:
            raise ValueError

        if len(data) != tmp['row_num']:
            raise ValueError

        elif len(data[0]) != tmp['col_num']:
            raise ValueError

    return col_names, data, metadata

In [14]:
def month_to_int(m_name):
    
    months_db = {
        'January': 1,
        'February': 2,
        'March': 3,
        'April': 4,
        'May':5,
        'June': 6,
        'July': 7,
        'August': 8,
        'September': 9,
        'October': 10,
        'November': 11,
        'December': 12,
    }

    return months_db[m_name]


def mapper_old_data(uri):
    col_names = []
    data = []
    metadata = {
    
        'time': {}
    
    }
    
    with open(uri, 'r') as data_file:
        row_number = 0
        for line in data_file:
            if row_number < 3:
                if row_number == 1:
                    col_names = line.strip().split('!')[-1].split(': ')[-1]
                elif row_number == 2:
                    meta = line.strip().split()
                    print meta
     
                    year = int(meta[5])
                    month = month_to_int(meta[3])
                    day = int(meta[4][0])
                    hour, mnt, sec = [int(x) for x in meta[0].split(':')] 
                    date_time = datetime(year, month, day, hour, mnt, sec)
                    metadata['time']['stamp'] = as_dict(value=date_time)
                    metadata['time']['zone'] = as_dict(value=meta[1])
                    
                row_number += 1

            else:

                data.append([float(el) for el in line.split()])
                
    return col_names, data, metadata

In [15]:
f_path = '/Users/artmoi/Documents/code/geo-spaas-vagrant/' \
         'shared/django-geo-spaas-hf-radar/hf_radar/dev/2003-09-03_0000_fedje.tvf'

In [16]:
cols, data, meta = get_data(f_path)
meta

['00:00:00', 'GMT', 'Wednesday,', 'September', '3,', '2003', 'Org:', "60\xb053.034'N,", "004\xb012.609'E"]


{'time': {'stamp': {'file field': None,
   'units': None,
   'value': datetime.datetime(2003, 9, 3, 0, 0)},
  'zone': {'file field': None, 'units': None, 'value': 'GMT'}}}

In [18]:
cols, data

('dx dy u v eu ev gridflag cov lat lon n1 n2 n3 n4 n5 n6',
 [[-24.0,
   -6.0,
   5.273,
   -44.448,
   22.95,
   77.05,
   0.0,
   -1659.56,
   60.829178,
   3.767028,
   2.0,
   3.0,
   0.0,
   0.0,
   0.0,
   0.0],
  [-24.0,
   -3.0,
   6.299,
   -56.053,
   4.76,
   34.96,
   0.0,
   -70.6,
   60.856174,
   3.766654,
   4.0,
   3.0,
   0.0,
   0.0,
   0.0,
   0.0],
  [-24.0,
   0.0,
   10.903,
   -87.06,
   2.92,
   37.16,
   0.0,
   85.45,
   60.883175,
   3.766278,
   2.0,
   2.0,
   0.0,
   0.0,
   0.0,
   0.0],
  [-21.0,
   -18.0,
   -9.607,
   6.403,
   14.91,
   26.62,
   0.0,
   -378.54,
   60.721359,
   3.823723,
   2.0,
   2.0,
   0.0,
   0.0,
   0.0,
   0.0],
  [-21.0,
   -15.0,
   -18.093,
   35.996,
   16.27,
   31.26,
   0.0,
   -490.55,
   60.748356,
   3.823398,
   2.0,
   2.0,
   0.0,
   0.0,
   0.0,
   0.0],
  [-21.0,
   -6.0,
   0.194,
   -15.139,
   6.39,
   23.2,
   0.0,
   -113.31,
   60.82935,
   3.822418,
   2.0,
   2.0,
   0.0,
   0.0,
   0.0,
   0.0],
  [-21

In [35]:
df = pd.DataFrame(np.array(data), columns=cols)

In [38]:
data = np.array(data)
data.shape

(897, 18)

In [104]:
datetime

TypeError: descriptor 'astimezone' of 'datetime.datetime' object needs an argument

In [34]:
cols

['LOND',
 'LATD',
 'VELU',
 'VELV',
 'VFLG',
 'ESPC',
 'ETMP',
 'MAXV',
 'MINV',
 'EDVC',
 'ERTC',
 'XDST',
 'YDST',
 'RNGE',
 'BEAR',
 'VELO',
 'HEAD',
 'SPRC']

In [105]:
a, b = [1, 2]

In [109]:
b

2