In [1]:
import pandas as pd

In [26]:
data = {"Date" : [], "ONI": []}

date_map = {'DJF': "01", 'JFM': "02", 'FMA': "03", 'MAM': "04", 'AMJ': "05", 'MJJ': "06", 'JJA': "07", 'JAS': "08", 'ASO': "09", 'SON': "10", 'OND': "11", 'NDJ': "12"}

with open("../../Data/ONI.txt") as f:
    for line in f:
        if line == " SEAS  YR   TOTAL   ANOM\n": # Skip header row
            continue
        values = list(filter(lambda item: item, line.rstrip("\n").split(" ")))

        data["Date"].append(f"{values[1]}-{date_map[values[0]]}-01")
        data["ONI"].append(values[3])

ONI_df = pd.DataFrame(data)
ONI_df.set_index("Date", inplace=True)

In [27]:
def parse_space_separated(input_string):
    table = []

    for lines in input_string.split("\n"):
        elements = lines.split(" ") # Multiple Spaces are delimiters in file
        elements = [element for element in elements if element != ""] # Remove empty elements

        if len(elements) > 0 and elements[0].isnumeric(): # Skip empty lines and title/header rows
            table.append(elements)
    
    return table

def create_date_index(year_list):
    months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]
    date_index = [f"{year}-{month}-01" for year in year_list for month in months] # join together time index in YYYY-MM-DD format

    return date_index

SOI_df = pd.DataFrame()

with open("../../Data/SOI.txt", "r") as f:
    file_content = f.read()
    file_content = file_content.replace("-999.9", " -999.9 ") # -999.9 is wide enough that it screws up " " as delimiter scheme. This code pads -999.9 values with a " " on each side

    _ = file_content.split("(STAND TAHITI - STAND DARWIN)  SEA LEVEL PRESS")[1] # 1st table will not be used. 1st table contains the SLP anomalies data
    standardised = file_content.split("(STAND TAHITI - STAND DARWIN)  SEA LEVEL PRESS")[2] # 2nd table will be used. 2nd table contains SOI data i.e. the standarised SLP anomalies data

    table = parse_space_separated(standardised)

    years = [row[0] for row in table]
    data = [x for row in table for x in row[1:]]
    date_index = create_date_index(years)

    SOI_df = pd.DataFrame(data=data, columns = ["SOI"], index = date_index)

    SOI_df.replace(to_replace="-999.9", value = float('nan'), inplace=True) # replace default values with nan
    SOI_df.replace(to_replace="999.9", value = float('nan'), inplace=True)

    SOI_df.dropna(axis=0, inplace=True) 


In [28]:
enso_years = {
        "El Niño": [],
        "La Niña": [],
        "Neutral": []
    }

with open("../../Data/Past ENSO Events.txt") as f:
    file_content = f.read()
    
    enso = ""
   

    for line in file_content.split("\n"):
        if len(line) == 0: 
            continue
        if line == 'El Niño' or line == 'Neutral' or line == 'La Niña': # process subheadings and store it 
            enso = line
            continue
        if enso == "": #skip if enso still uninitialised
            continue
        enso_years[enso].extend([int(line)] if len(line)==4 else list(range(int(line[:4]), int(line[-4:]) + 1))) # deal with lines that have e.g. '1904' vs. lines that have e.g. '2017-2020'


In [38]:
merged_df = SOI_df.merge(ONI_df, how="outer", left_index=True, right_index=True) # merge all dataframes by Date Index. Keep all data
merged_df = merged_df.astype(float) 

enso_column = []

for date in merged_df.index:
    enso = None
    
    if int(date[:4]) in enso_years["El Niño"]:
        enso = "El Niño"
    
    elif int(date[:4]) in enso_years["Neutral"]:
        enso = "Neutral"
    
    elif int(date[:4]) in enso_years["La Niña"]:
        enso = "La Niña"

    enso_column.append(enso)

merged_df["ENSO"] = enso_column
merged_df.dropna(inplace=True)

In [40]:
merged_df.to_csv("SOI_ONI_ENSO.csv")