In [1]:
import pandas as pd
import json
from shapely.geometry import Point, Polygon
from datetime import datetime

In [38]:
MIN_LON = -120
MAX_LON = -115
MIN_LAT = 31.5
MAX_LAT = 38

def convert_longitude(longitude):
    """Converts longitude from [0,360] to [-180, 180]

    Args:
        longitude (float): longitude in [0,360]

    Returns:
        float: longitude in [-180, 180]
    """
    if longitude > 180:
        return longitude - 360
    else:
        return longitude
    
def read_sites(station_list_file = 'stations.txt'):
    """Read sites from the stations file

    Args:
        station_list_file (str, optional): path to stations file. Defaults to 'stations.txt'.

    Returns:
        dataframe: pandas dataframe with fields as Site, Lat, Lon
        List: list of unique sites
    """
    sites_df = pd.read_csv(station_list_file, delim_whitespace=True)
    sites_df['Site'] = sites_df['Site'].str.upper()
    sites_df['Longitude'] = sites_df['Longitude'].apply(lambda x: convert_longitude(x))
    sites_df = sites_df[(sites_df['Latitude'] >= MIN_LAT) & (sites_df['Latitude'] <= MAX_LAT) &
                    (sites_df['Longitude'] >= MIN_LON) & (sites_df['Longitude'] <= MAX_LON)]
    sites_df = sites_df.reset_index(drop=True)
    sites_df = sites_df[['Site', 'Latitude', 'Longitude']].drop_duplicates()
    sites_list = list(sites_df.itertuples(index=False, name=None))
    return sites_df, sites_list

sites_df,_ = read_sites('/root/data/rrr/integrated_weather_dataset/config/stations.txt')

In [44]:
sites_df[sites_df['Site']=='DREW']

Unnamed: 0,Site,Latitude,Longitude
105,DREW,36.005849,-119.958368


In [39]:
# Load the JSON file
file_path = '/root/data/rrr/integrated_weather_dataset/data/raw/Flash_Flood/ffw_cp.json'

In [40]:
with open(file_path, 'r') as f:
    data = json.load(f)

In [41]:
def is_point_in_polygon(lat, lon, polygon):
    point = Point(lon, lat)
    return polygon.contains(point)

In [42]:
ffw_data = []

In [46]:
if 'FFWs' in data and data['FFWs']:
    for ffw in data['FFWs']:
        ffw_tmp = {}
        begin = ffw.get('begin', 'No Begin Time')
        end = ffw.get('end', 'No End Time')
        vertices = ffw.get('vertices', [])
        
        polygon = Polygon([(-1*vertex['lon'], vertex['lat']) for vertex in ffw['vertices']])
        
        ffw_sites = []
        for idx, row in sites_df.iterrows():
            site_lat = row['Latitude']
            site_lon = row['Longitude']
            site = row['Site']
            if is_point_in_polygon(site_lat, site_lon, polygon):
                ffw_sites.append(site)
        ffw_tmp["begin"] = begin
        ffw_tmp["end"] = end
        ffw_tmp["sites"] = ffw_sites
        ffw_data.append(ffw_tmp)

KeyboardInterrupt: 

In [47]:
ffw_data

[{'begin': '2005-02-21T21:50:00Z',
  'end': '2005-02-22T00:45:00Z',
  'sites': ['DREW', 'P547']},
 {'begin': '2005-05-06T01:43:00Z', 'end': '2005-05-06T02:45:00Z', 'sites': []},
 {'begin': '2005-05-06T02:45:00Z', 'end': '2005-05-06T04:45:00Z', 'sites': []},
 {'begin': '2005-07-23T22:37:00Z', 'end': '2005-07-23T23:30:00Z', 'sites': []},
 {'begin': '2005-07-23T23:47:00Z',
  'end': '2005-07-24T00:45:00Z',
  'sites': ['P557', 'P568', 'P591', 'P616', 'P811', 'P812']},
 {'begin': '2005-07-24T00:42:00Z',
  'end': '2005-07-24T01:45:00Z',
  'sites': ['P562',
   'P568',
   'P569',
   'P570',
   'P579',
   'P591',
   'P616',
   'P811',
   'P812',
   'THCP']},
 {'begin': '2005-07-24T20:30:00Z', 'end': '2005-07-24T21:30:00Z', 'sites': []},
 {'begin': '2005-07-24T21:26:00Z',
  'end': '2005-07-24T23:30:00Z',
  'sites': ['BEPK', 'P573']},
 {'begin': '2005-07-24T22:14:00Z',
  'end': '2005-07-25T00:15:00Z',
  'sites': ['P569', 'P570']},
 {'begin': '2005-07-24T23:17:00Z',
  'end': '2005-07-25T01:15:00Z',

In [48]:
tropo_path = '/root/data/rrr/ES3-TACLS/AR/dataset/troposphere_data/2004.csv'
trop_df = pd.read_csv(tropo_path)

In [49]:
trop_df

Unnamed: 0,Timestamp,Site,Latitude,Longitude,ZWD
0,2004-01-01 00:00:00.000000,AGMT,34.594282,-116.429377,27.7
1,2004-01-01 00:04:59.180331,AGMT,34.594282,-116.429377,28.3
2,2004-01-01 00:09:58.360654,AGMT,34.594282,-116.429377,28.7
3,2004-01-01 00:14:57.540985,AGMT,34.594282,-116.429377,29.2
4,2004-01-01 00:19:56.721309,AGMT,34.594282,-116.429377,29.6
...,...,...,...,...,...
16203789,2004-12-30 02:38:30.655736,WWMT,33.955313,-116.653855,54.6
16203790,2004-12-30 02:43:29.836067,WWMT,33.955313,-116.653855,54.5
16203791,2004-12-30 02:48:29.016391,WWMT,33.955313,-116.653855,54.5
16203792,2004-12-30 02:53:28.196722,WWMT,33.955313,-116.653855,54.5


In [50]:
trop_df['Timestamp'] = pd.to_datetime(trop_df['Timestamp'])

In [None]:
trop_df['ffw'] = 0

# Process each FFW entry
for ffw in ffw_data:
    begin = pd.to_datetime(ffw['begin']).tz_localize(None)
    end = pd.to_datetime(ffw['end']).tz_localize(None)
    sites = ffw['sites']

    # Filter the DataFrame for rows matching the time range and site
    mask = (trop_df['Timestamp'] >= begin) & (trop_df['Timestamp'] <= end) & trop_df['Site'].isin(sites)
    
    # Set the 'ffw' column to 1 for the matching rows
    trop_df.loc[mask, 'ffw'] = 1