
----
This notebook demonstrates how to get some climate indices such as the Southern Oscillation Index (SOI) used to monitor El-Nino/Southern Oscillation (ENSO) and Pacific Decadal Oscillation (PDO) variability.

The data are read from the NOAA PSL web site.

In [1]:
# load packages
import pandas as pd

In [2]:
# read data from PSL 

# data are here:
#soi_url = 'https://psl.noaa.gov/gcos_wgsp/Timeseries/Data/soi.long.data'
#pdo_url = 'https://psl.noaa.gov/gcos_wgsp/Timeseries/Data/pdo.long.data'
soi_url = 'https://psl.noaa.gov/data/timeseries/month/data/nino34.long.anom.data'
pdo_url = 'https://psl.noaa.gov/data/timeseries/month/data/pdo.timeseries.sstens.data'
# data are listed as each year in a separate row, each month of the 
#   year in a separate column; here we define the column headings to
#   match this
col_names = ['year','Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']

# read data
#   skiprows = 1: skip top line (has year range)
#   header = None: no header
#   delim_whitespace = True: columns separated by white space
#   names = col_names: use the column headings defined above
#   on_bad_lines = 'skip': use this to ignore lines that don't have enough columns
soi = pd.read_csv(soi_url, skiprows = 1, header = None, delim_whitespace = True,
                  names = col_names, on_bad_lines = 'skip')
pdo = pd.read_csv(pdo_url, skiprows = 1, header = None, delim_whitespace = True,
                  names = col_names, on_bad_lines = 'skip')

# drop the last 9 lines (these have metadata we don't need
#   and the partial data for 2024)
soi.drop(soi.tail(9).index,inplace=True)
pdo.drop(pdo.tail(9).index,inplace=True)

  soi = pd.read_csv(soi_url, skiprows = 1, header = None, delim_whitespace = True,
  pdo = pd.read_csv(pdo_url, skiprows = 1, header = None, delim_whitespace = True,


In [3]:
# reset dataframe to be continuous (each row a different month)
#   At this point the dataFrame has each year as a row, each month
#   of that year as a different column.  Instead, we want to have
#   just two columns, first one with the continuous date and the
#   second with the value for that month

A = soi.set_index('year')
B = A.stack().reset_index()
B.columns = ['year','month','SOI']

# Now convert year/month to a proper date
B['day'] = 15
B['month'] = pd.to_datetime(B.month,format='%b').dt.month
B['date'] = pd.to_datetime(B[['year','month','day']])
B.drop(columns = ['year','month','day'], inplace = True)
soi_new = B.reindex(columns = ['date','SOI'])

A = pdo.set_index('year')
B = A.stack().reset_index()
B.columns = ['year','month','PDO']

# Now convert year/month to a proper date
B['day'] = 15
B['month'] = pd.to_datetime(B.month,format='%b').dt.month
B['date'] = pd.to_datetime(B[['year','month','day']])
B.drop(columns = ['year','month','day'], inplace = True)
pdo_new = B.reindex(columns = ['date','PDO'])

In [4]:
pdo_new.head()

Unnamed: 0,date,PDO
0,1870-01-15,0.085
1,1870-02-15,0.017
2,1870-03-15,-0.068
3,1870-04-15,-0.593
4,1870-05-15,-0.587


In [5]:
soi_new.head()

Unnamed: 0,date,SOI
0,1870-01-15,-1.0
1,1870-02-15,-1.2
2,1870-03-15,-0.83
3,1870-04-15,-0.81
4,1870-05-15,-1.27
