## Download ECMWF xml

In [115]:
import sys, os
import requests
import getpass
from datetime import datetime
from pathlib import Path
import re

import pandas as pd
import xml.etree.ElementTree as ET
from dateutil import rrule

In [116]:
save_dir = Path("/home/turnerm/sync/aa_repo_data/Data/public/exploration/phl/ecmwf_hindcast")

## Get typhoon names

In [46]:
filename = "../../IBF-Typhoon-model/data/wind_data/input/typhoon_events.csv"
df_typhoons = pd.read_csv(filename)
df_typhoons.columns = ["local", "international", "year"]
for cname in ["local", "international"]:
    df_typhoons[cname] = df_typhoons[cname].str.lower()

## Convert to CSV

In [111]:
def xml2csv(filename):
    #print(f"{filename}")
    tree = ET.parse(filename)
    root = tree.getroot()
    try:
        model_name=root.find('header/generatingApplication/model/name').text 
    except AttributeError:
        model_name = ''

    prod_center=root.find('header/productionCenter').text
    baseTime=root.find('header/baseTime').text

    ## Create one dictonary for each time point, and append it to a list
    for members in root.findall('data'):
        mtype=members.get('type')
        if mtype not in ['forecast', 'ensembleForecast']:
            continue
        for members2 in members.findall('disturbance'):
            cyclone_name = [name.text.lower().strip() for name in members2.findall('cycloneName')]
            if not cyclone_name:
                continue
            cyclone_name = cyclone_name[0].lower()
            if cyclone_name not in list(df_typhoons["international"]):
                continue
            # print(f"Found typhoon {cyclone_name}")
            for members3 in members2.findall('fix'):
                tem_dic = {}
                tem_dic['mtype']=[mtype]
                tem_dic['product']=[re.sub('\s+',' ',prod_center).strip().lower()]
                tem_dic['cyc_number'] = [name.text for name in members2.findall('cycloneNumber')]
                tem_dic['ensemble']=[members.get('member')]
                tem_dic['speed'] = [name.text for name in members3.findall('cycloneData/maximumWind/speed')]
                tem_dic['pressure'] = [name.text for name in members3.findall('cycloneData/minimumPressure/pressure')]
                time = [name.text for name in members3.findall('validTime')]
                tem_dic['time'] = ['/'.join(time[0].split('T')[0].split('-'))+', '+time[0].split('T')[1][:-1]]
                tem_dic['lat'] = [name.text for name in members3.findall('latitude')]
                tem_dic['lon']= [name.text for name in members3.findall('longitude')] 
                tem_dic['lead_time']=[members3.get('hour')]
                tem_dic['forecast_time'] = ['/'.join(baseTime.split('T')[0].split('-'))+', '+baseTime.split('T')[1][:-1]]
                tem_dic1 = dict( [(k,''.join(str(e).lower().strip() for e in v)) for k,v in tem_dic.items()])
                # Save to CSV
                outfile = save_dir / f"csv/{cyclone_name}_all.csv"
                pd.DataFrame(tem_dic1, index=[0]).to_csv(outfile, mode='a', header=not os.path.exists(outfile), index=False)
            

In [112]:
# Get list of filenames
filename_list = sorted(list(Path(save_dir / "xml").glob('*.xml')))
for filename in filename_list:
    xml2csv(filename)

KeyboardInterrupt: 

## AKI CSVs

In [3]:
import pandas as pd
import xarray as xr

In [5]:
data_dir = "/home/turnerm/sync/aa_repo_data/Data/public/exploration/phl/ecmwf_forecast/CSVS2"

In [6]:
typhoon_name = "rai"
df = pd.read_csv(f"{data_dir}/{typhoon_name}_all.csv")

In [None]:
track = xr.Dataset(
    data_vars={
        "time_step": ("time", np.full_like(df.forecast_time, 3, dtype=float)),
        "max_sustained_wind": (
            "time", df.cyc_speed,  # conversion from kn to meter/s
        ),
        #"environmental_pressure": (
        #    "time",
        #    [1010]*len(dta_dict['SID']),
        #),
        #"central_pressure": ("time", dta_dict['USA_PRES']),
        "lat": ("time", df.lat),
        "lon": ("time", df.lon),
        #"radius_max_wind": ("time",dta_dict['USA_RMW']),
        #"radius_oci": ("time", [np.nan]*len(dta_dict['USA_RMW'])),
        "basin": ("time", ['WP']*len(df)),


    },
    coords={"time": pd.date_range("1980-01-01", periods=len(dta_dict['SID']), freq="3H"),},
    attrs={
        "max_sustained_wind_unit": "m/s",
        "central_pressure_unit": "mb",
        "name": typhoon,
        "sid": typhoon,  # +str(forcast_df.ensemble_number),
        "orig_event_flag": True,
        "data_provider": 'ibtracs_usa',
        "id_no": typhoon,
        "basin": 'wp',
        "category": dta_dict['Catagory'][0],         

    },
)
track = track.set_coords(["lat", "lon"])

In [None]:

    dta_dict=data.query('SID==@typhoon').to_dict('list') 
    track = xr.Dataset(
        data_vars={
            "time_step": ("time", np.full_like(dta_dict['Timestep'], 3, dtype=float)),
            "max_sustained_wind": (
                "time", dta_dict['USA_WIND'],  # conversion from kn to meter/s
            ),
            "environmental_pressure": (
                "time",
                [1010]*len(dta_dict['SID']),
            ),
            "central_pressure": ("time", dta_dict['USA_PRES']),
            "lat": ("time", dta_dict['LAT']),
            "lon": ("time", dta_dict['LON']),
            "radius_max_wind": ("time",dta_dict['USA_RMW']),
            "radius_oci": ("time", [np.nan]*len(dta_dict['USA_RMW'])),
            "basin": ("time", ['WP']*len(dta_dict['USA_RMW'])),


        },
        coords={"time": pd.date_range("1980-01-01", periods=len(dta_dict['SID']), freq="3H"),},
        attrs={
            "max_sustained_wind_unit": "m/s",
            "central_pressure_unit": "mb",
            "name": typhoon,
            "sid": typhoon,  # +str(forcast_df.ensemble_number),
            "orig_event_flag": True,
            "data_provider": 'ibtracs_usa',
            "id_no": typhoon,
            "basin": 'wp',
            "category": dta_dict['Catagory'][0],         

        },
    )
    track = track.set_coords(["lat", "lon"])
    return track