In [1]:
# Script to merge ASOS/AWOS reports with measured reports for 2007-2018

In [2]:
# Import modules

import pandas as pd
import numpy as np
from scipy.spatial import KDTree
from datetime import datetime
import time

In [19]:
# Open up the measured reports and the ASOS/AWOS reports as CSV's

measured = pd.read_csv('/home/nathane1/research/EnvironmentalData/SRs_WithElev+Pop/2019SR_WithPop')
measured['UTC_begin_time'] = pd.to_datetime(measured.UTC_begin_time)

dot = pd.read_csv('/home/nathane1/research/EnvironmentalData/CertainDOT.csv')
dot['Observation_Time'] = pd.to_datetime(dot.Observation_Time)

In [None]:
# Print the measured dataset as a reference
measured

In [None]:
# Print the automated dataset as a reference
dot

In [10]:
# Create data structures for kd-tree

measured_lat = measured['location_1_lat']
measured_lon = measured['location_1_lon']
measured_grid = [measured_lat,measured_lon]
measured_grid = np.array(measured_grid)
measured_grid = measured_grid.transpose()

dot_lat = dot['Latitude']
dot_lon = dot['Longitude']
dot_grid = [dot_lat,dot_lon]
dot_grid = np.array(dot_grid)
dot_grid = dot_grid.transpose()

In [None]:
# Create data structures for time matching

measured_time = measured['UTC_begin_time']

dot_time = dot['Observation_Time']
dot_time

In [12]:
# Create a kdtree

mytree = KDTree(measured_grid)

In [13]:
# Set up the kdtree function and the time matching function

def kdtree_match(auto_grid):
    global distance
    global indexes
    distance, indexes = mytree.query(auto_grid) # <- Specifies the distance from the reference point and the index number of that point
    return distance, indexes

def time_match(auto_time,measured_time):
    global time_del
    index_time = measured_time[indexes]
    datetime_time = auto_time.to_pydatetime()
    datetime_match = index_time.to_pydatetime() # <- Convert the two times selected from above to datetime objects
    parsed_time = datetime.strptime(str(datetime_time), '%Y-%m-%d %H:%M:%S') 
    parsed_match = datetime.strptime(str(datetime_match), '%Y-%m-%d %H:%M:%S') # <- Convert the datetime objects to parsed strings
    time_del = abs(parsed_time - parsed_match) # <- Find the difference between the two times 
    time_del = time_del.total_seconds() # <- Return the difference between the times in the integer number of seconds
    return time_del

In [21]:
# Define a function to check whether or not automated event needs to be appended

def event_in_spacetime(automated_events):
    event_index = 0
    for event in dot_grid:
        kdtree_match(event)
        time_match(dot_time[event_index],measured_time)
        if ((distance < 0.05) & (time_del < 600.0)):
            continue
        else:
            measured.loc[len(measured.index) + 1] = {'Unnamed: 0': len(measured.index) + 1, 'event_id': dot['Station_ID'][event_index],'magnitude': dot['Wind_Gust'][event_index], 
                                                     'magnitude_type': 'MG', 'location_1_lat': dot['Latitude'][event_index], 'location_1_lon': dot['Longitude'][event_index], 'wfo':'NA', 
                                                     'UTC_begin_time': dot['Observation_Time'][event_index], 'UTC_end_time': dot['Observation_Time'][event_index], 'delta_time': 0, 
                                                     'event_narrative':'NA', 'episode_narrative': 'NA', 'elevation_m': 'NA', 'avg_pop': 'NA'}
        event_index += 1    
    print(measured)

In [22]:
# Loop through records in each dataset to find events to be added

start_time = time.time()
event_in_spacetime(dot_grid)
print('Program complete!')
print('Took --- %s seconds ---' %(time.time() - start_time))

      Unnamed: 0 event_id  magnitude magnitude_type  location_1_lat  \
0              0   861581    58.0000             MG       32.710000   
1              1   861584    56.0000             MG       32.870000   
2              2   861582    65.0000             MG       32.800000   
3              3   849004    52.0000             MG       45.930000   
4              4   848193    54.0000             MG       47.020000   
...          ...      ...        ...            ...             ...   
2275        2275    RPLI4    57.2355             MG       41.391972   
2276        2276    RQCI4    61.5551             MG       41.517100   
2277        2277    RDAI4    51.8359             MG       41.496333   
2278        2278    RCDI4    60.4752             MG       41.978000   
2279        2279    RCDI4    65.8748             MG       41.978000   

      location_1_lon  wfo      UTC_begin_time         UTC_end_time  \
0         -95.880000  FWD 2019-10-21 04:23:00      10/21/2019 4:23   
1      

In [23]:
# Export the new dataset to a CSV

measured.to_csv('/home/nathane1/research/EnvironmentalData/MSRs/2019MSRs.csv')