In [18]:
import os
import xarray as xr
import geopandas as gpd
from shapely.geometry import Point
import pandas as pd
import numpy as np

In [19]:
import timeit

In [20]:
df_eez = gpd.read_file('../palauEEZ.geojson')
eez = df_eez['geometry'].values[0]

In [21]:
def palau_eez(df):
    in_palau = []
    longlat = df[['longitude','latitude']].values.tolist()
    for x in longlat:
        point = Point(x[0],x[1])
        if eez.contains(point) or point.within(eez):
            in_palau.append((x[0],x[1]))
    return df[df[['longitude', 'latitude']].apply(tuple, axis=1).isin(in_palau)]

In [22]:
def get_lat_name(df):
    for lat_name in ['lat', 'latitude']:
        if lat_name in df.columns.values.tolist():
            return lat_name
    raise RuntimeError("Couldn't find a latitude coordinate")
    
def get_lon_name(df):
    for lon_name in ['lon','long','longitude']:
        if lon_name in df.columns.values.tolist():
            return lon_name
    raise RuntimeError("Couldn't find a latitude coordinate")
#currently slices by year
def compute_mean(df):
    df_list = []
    for year in df['year'].unique().tolist():
        sub = global_mean(df[df['year'] == year])
        sub['year'] = year
        df_list.append(sub.set_index([get_lat_name(df), get_lon_name(df),'year']))
    result = pd.concat(df_list)
    return result

In [23]:
def remove_fillvalue(df):
    df = df[df['CRW_DHW'] != -32768]
    df = df[df['CRW_DHW_mask'] != -5]
    
    df = df[df['CRW_HOTSPOT'] != -32768]
    df = df[df['CRW_HOTSPOT_mask'] != -5]
    
    df = df[df['CRW_SSTANOMALY'] != -32768]
    df = df[df['CRW_SSTANOMALY_mask'] != -5]
    
    df = df[df['CRW_BAA_mask'] != -5]
    df = df[df['CRW_BAA_mask'] != -5]
    
    df = df[df['CRW_BAA_7D_MAX'] != -5]
    df = df[df['CRW_BAA_7D_MAX_mask'] != -5]
    
    df = df[df['CRW_SST'] != -32768]
    return df
    

In [24]:
def filter_box(df):
    if "lat" in df:
        filtered = df[(df["lat"]>= 0.99)&
                                    (df["lat"]<=14.01)&(df["lon"]<=140.51)&
                                    (df["lon"]>=127.49)]
    else:
        filtered = df[(df["latitude"]>= 0.99)&
                                    (df["latitude"]<=14.01)&(df["longitude"]<=140.51)&
                                    (df["longitude"]>=127.49)]
    return filtered

In [12]:
CRW_folder = "./CRW/"

In [13]:
CRW = os.listdir("./CRW")

In [14]:
CRW.sort()

In [16]:
if not os.path.exists("./CRW/pkl"):
    os.makedirs("./CRW/pkl")
    
if not os.path.exists("./CRW/csv"):
    os.makedirs("./CRW/csv")
    
if not os.path.exists("./CRW/average"):
    os.makedirs("./CRW/average")

if not os.path.exists("./CRW/average/pkl"):
    os.makedirs("./CRW/average/pkl")

if not os.path.exists("./CRW/average/csv"):
    os.makedirs("./CRW/average/csv")

if not os.path.exists("./CRW/box/pkl"):
    os.makedirs("./CRW/box/pkl")
if not os.path.exists("./CRW/box/pkl"):
    os.makedirs("./CRW/box/pkl")

In [220]:
for y in range(1985,2023+1):
# for y in range(2003,2023+1):

    start_time = timeit.default_timer()
    
    current_year = [x for x in CRW if str(y) in x]
    current_year.sort()
    
    current_year_df_list = []
    for filename in current_year:
        ds = xr.open_dataset(CRW_folder+filename)
        df = ds.to_dataframe()
        current_year_df_list.append(df)
    current_year_df = pd.concat(current_year_df_list)
    current_year_df = current_year_df.reset_index().drop_duplicates()
    current_year_df['year'] = current_year_df['time'].dt.year
    current_year_df = current_year_df[current_year_df['year']==y]
    
    
    current_year_df = remove_fillvalue(current_year_df)
    #Not done to save time, but necessary if you need to filter the raw data and not only the averages
#     current_year_df = palau_eez(current_year_df)
    current_year_df.to_pickle("./CRW/pkl/"+str(y)+".pkl")
    current_year_df.to_csv("./CRW/csv/"+str(y)+".csv")
    
    BAA = current_year_df[['year','latitude','longitude','CRW_BAA',"CRW_BAA_7D_MAX"]]
#     BAA_mode = BAA.groupby(['year','latitude','longitude']).agg(pd.Series.mode)
    BAA_mode = BAA.groupby(['year','latitude','longitude']).agg(lambda x: max(pd.Series.mode(x).to_list()) if len(pd.Series.mode(x).to_list()) > 0 else np.nan)
    BAA_sum = BAA.groupby(['year','latitude','longitude']).sum()
    BAA_max= BAA.groupby(['year','latitude','longitude'])['CRW_BAA'].max().to_frame()

    BAA_mode = BAA_mode.reset_index()
    BAA_sum = BAA_sum.reset_index()
    BAA_max = BAA_max.reset_index()
    
    current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()
    current_year_average = current_year_average.reset_index()
    
    current_year_average['CRW_BAA_mode'] = BAA_mode["CRW_BAA"]
    current_year_average['CRW_BAA_7D_MAX_mode'] = BAA_mode["CRW_BAA_7D_MAX"]

    current_year_average['CRW_BAA_sum'] = BAA_sum["CRW_BAA"]
    current_year_average['CRW_BAA_7D_MAX_sum'] = BAA_sum["CRW_BAA_7D_MAX"]
    current_year_average['CRW_BAA_1Y_MAX'] = BAA_max['CRW_BAA']
    
    current_year_average.rename(columns={'CRW_BAA':'CRW_BAA_average','CRW_BAA_7D_MAX':"CRW_BAA_7D_MAX_average"}, inplace=True)
    
    filtered_average = palau_eez(current_year_average)
    filtered_average = filtered_average.reset_index(drop= True)

    filtered_average.to_pickle("./CRW/average/pkl/"+str(y)+".pkl")
    filtered_average.to_csv("./CRW/average/csv/"+str(y)+".csv")
    
    elapsed = timeit.default_timer() - start_time
    print("Time to execute year " + str(y)+ " "  + str(elapsed))


  current_year_df.groupby(['latitude', 'longitude','year']).mean()
  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2003 119.97671516700211


  current_year_df.groupby(['latitude', 'longitude','year']).mean()
  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2004 120.30314258300132


  current_year_df.groupby(['latitude', 'longitude','year']).mean()
  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2005 118.86364662499909


  current_year_df.groupby(['latitude', 'longitude','year']).mean()
  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2006 118.29341299999942


  current_year_df.groupby(['latitude', 'longitude','year']).mean()
  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2007 118.64637416700134


  current_year_df.groupby(['latitude', 'longitude','year']).mean()
  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2008 119.44359858399912


  current_year_df.groupby(['latitude', 'longitude','year']).mean()
  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2009 118.69125995899958


  current_year_df.groupby(['latitude', 'longitude','year']).mean()
  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2010 121.62332666600196


  current_year_df.groupby(['latitude', 'longitude','year']).mean()
  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2011 119.24290670900155


  current_year_df.groupby(['latitude', 'longitude','year']).mean()
  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2012 118.5521632500022


  current_year_df.groupby(['latitude', 'longitude','year']).mean()
  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2013 119.64235637499951


  current_year_df.groupby(['latitude', 'longitude','year']).mean()
  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2014 119.60962350000045


  current_year_df.groupby(['latitude', 'longitude','year']).mean()
  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2015 118.29077241599953


  current_year_df.groupby(['latitude', 'longitude','year']).mean()
  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2016 121.21148483300203


  current_year_df.groupby(['latitude', 'longitude','year']).mean()
  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2017 121.56302841599972


  current_year_df.groupby(['latitude', 'longitude','year']).mean()
  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2018 120.20655404200079


  current_year_df.groupby(['latitude', 'longitude','year']).mean()
  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2019 119.7363003749997


  current_year_df.groupby(['latitude', 'longitude','year']).mean()
  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2020 121.21571858300013


  current_year_df.groupby(['latitude', 'longitude','year']).mean()
  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2021 121.55780650000088


  current_year_df.groupby(['latitude', 'longitude','year']).mean()
  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2022 121.40824312499899


  current_year_df.groupby(['latitude', 'longitude','year']).mean()
  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2023 62.916370499999175


In [30]:
import re

In [40]:
if not os.path.exists("./CRW/box/"):
    os.makedirs("./CRW/box/")
    
CRW = os.listdir("./CRW/box/")
CRW.sort()
CRW_folder = "./CRW/box/"

if not os.path.exists("./CRW/box/pkl"):
    os.makedirs("./CRW/box/pkl")
if not os.path.exists("./CRW/box/csv"):
    os.makedirs("./CRW/box/csv")

In [34]:
str(y)

'1985'

In [41]:
#Make pkl and csv corresponding to box area for CRW instead of based on EEZ
for y in range(1985,2023+1):
# for y in range(2003,2023+1):

    start_time = timeit.default_timer()
    
    current_year = [x for x in CRW if str(y) in x]
    current_year.sort()
    
    current_year_df_list = []
    for filename in current_year:
        ds = xr.open_dataset(CRW_folder+filename)
        df = ds.to_dataframe()
        current_year_df_list.append(df)
    current_year_df = pd.concat(current_year_df_list)
    current_year_df = current_year_df.reset_index().drop_duplicates()
    current_year_df['year'] = current_year_df['time'].dt.year
    current_year_df = current_year_df[current_year_df['year']==y]
    
    
    current_year_df = remove_fillvalue(current_year_df)
    current_year_df.to_pickle("./CRW/box/pkl/"+str(y)+".pkl")
    current_year_df.to_csv("./CRW/box/csv/"+str(y)+".csv")
    
    elapsed = timeit.default_timer() - start_time
    print("Time to execute year " + str(y)+ " "  + str(elapsed))

Time to execute year 1985 178.0025853330153
Time to execute year 1986 240.67917950000265
Time to execute year 1987 247.6655328339839
Time to execute year 1988 246.60850974998903
Time to execute year 1989 231.33764675000566
Time to execute year 1990 221.0973745000083
Time to execute year 1991 225.53407279099338
Time to execute year 1992 221.5694198750134
Time to execute year 1993 221.05491704199812
Time to execute year 1994 226.82686274999287
Time to execute year 1995 226.79407695800182
Time to execute year 1996 229.9175215830037
Time to execute year 1997 219.5067469579808
Time to execute year 1998 221.79930991702713
Time to execute year 1999 218.53415233301348
Time to execute year 2000 218.76469862498925
Time to execute year 2001 219.2044569159916
Time to execute year 2002 218.19796854202286
Time to execute year 2003 219.51863166701514
Time to execute year 2004 217.57248324999819
Time to execute year 2005 217.3872418749961
Time to execute year 2006 218.16547508299118
Time to execute ye

In [350]:
if not os.path.exists("./CRW/weekly/pkl"):
    os.makedirs("./CRW/weekly/pkl")
    
if not os.path.exists("./CRW/weekly/csv"):
    os.makedirs("./CRW/weekly/csv")

In [351]:
for y in range(1985,2023+1):
    df = pd.read_pickle("./CRW/pkl/" + str(y) + ".pkl")
    times = df['time']
    max_time = times.max()
    min_time = times.min()
    
    end = datetime.datetime(max_time.year, max_time.month, max_time.day, max_time.hour,max_time.minute,max_time.second) 
    start = datetime.datetime(min_time.year,min_time.month,min_time.day,min_time.hour,min_time.minute,min_time.second)
    
    
    df_list = []
    while (start <= end):
        filtered_df = palau_eez(df[df['time'] == start])
        start += relativedelta(weeks=+1)
        df_list.append(filtered_df)
    df_year = pd.concat(df_list)
    df_year = df_year.reset_index(drop=True)
    
    df_year.to_pickle("./CRW/weekly/pkl/"+str(y)+".pkl")
    df_year.to_csv("./CRW/weekly/csv/"+str(y)+".csv")

In [323]:
max_time.year

1985

In [324]:
start = datetime.date(max_time.year, max_time.month, max_time.day) 
end = datetime.date(min_time.year,min_time.month,min_time.day)

In [326]:
end

datetime.date(1985, 4, 1)

In [336]:
import datetime
from dateutil.relativedelta import relativedelta

In [341]:
start = datetime.datetime(1985, 1, 1,12,0,0)
end = datetime.datetime(2023,12,31, 12,0,0) 
# "[(1979-01-01):1:(1980-01-01)]"

prev = None
while (start <= end):
    
    print(start)
    start += relativedelta(weeks=+1)
    print(df[df['time'] == start])
    break

1985-01-01 12:00:00
Empty DataFrame
Columns: [time, latitude, longitude, CRW_BAA, CRW_BAA_mask, CRW_BAA_7D_MAX, CRW_BAA_7D_MAX_mask, CRW_DHW, CRW_DHW_mask, CRW_HOTSPOT, CRW_HOTSPOT_mask, CRW_SEAICE, CRW_SST, CRW_SSTANOMALY, CRW_SSTANOMALY_mask, year]
Index: []


In [309]:
l = os.listdir("./CRW/pkl")
l.sort()

In [275]:
#Twice weekly sample
for x in os.listdir("./CRW/pkl"):
    t = pd.read_pickle("./CRW/pkl/" + x)
    break
#     crw_dfs_list.append(pd.read_pickle("./CRW/average/pkl/"+x))

In [301]:
len([x for x in os.listdir("./CRW/pkl") if "pkl" in x]) * len(r.groupby(['latitude','longitude']).size())

785460

In [306]:
# storage space (GB) required to have all geojson produced, the average size is about 3MB
(365*len([x for x in os.listdir("./CRW/pkl") if "pkl" in x])*3)/1024

41.7041015625

In [307]:
# storage space (GB) required to have twice weekly geojson produced, the average size is about 3MB
((365/3.5)*len([x for x in os.listdir("./CRW/pkl") if "pkl" in x])*3)/1024

11.915457589285715

In [308]:
# storage space (GB) required to have weekly geojson produced, the average size is about 3MB
((365/7)*len([x for x in os.listdir("./CRW/pkl") if "pkl" in x])*3)/1024

5.957728794642858

In [295]:
len(r)

7351100

In [298]:
r.groupby(['latitude','longitude']).size()

latitude  longitude 
1.625     132.725006    365
1.675     132.475006    365
          132.524994    365
          132.574997    365
          132.625000    365
                       ... 
11.525    134.875000    365
          134.925003    365
          134.975006    365
          135.024994    365
          135.074997    365
Length: 20140, dtype: int64

In [266]:
crw_dfs_list = []
for x in os.listdir("./CRW/average/pkl"):
    crw_dfs_list.append(pd.read_pickle("./CRW/average/pkl/"+x))
crw_df = pd.concat(crw_dfs_list)

In [268]:
master_crw = crw_df.sort_values(by=['year','latitude','longitude']).reset_index(drop=True)


In [272]:
master_crw.to_pickle("./CRW/average/pkl/master_CRW"+".pkl")
master_crw.to_csv("./CRW/average/master_CRW"+".csv")

In [238]:
CFS = os.listdir("./CFS")
CFS_folder = "./CFS/"

if not os.path.exists("./CFS/pkl"):
    os.makedirs("./CFS/pkl")
    
if not os.path.exists("./CFS/csv"):
    os.makedirs("./CFS/csv")

In [240]:
for y in range(1979,2023+1):
# for y in range(2003,2023+1):

    start_time = timeit.default_timer()
    
    current_year = [x for x in CFS if str(y) in x]
    current_year.sort()
    
    current_year_df_list = []
    for filename in current_year:
        ds = xr.open_dataset(CFS_folder+filename)
        df = ds.to_dataframe()
        current_year_df_list.append(df)
    
    current_year_df = pd.concat(current_year_df_list)
    current_year_df = current_year_df.reset_index().drop_duplicates()
    current_year_df['year'] = current_year_df['time'].dt.year
    current_year_df = current_year_df[current_year_df['year']==y]
    
    current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()
    current_year_average = current_year_average.reset_index()
    
    filtered_average = palau_eez(current_year_average)
    filtered_average = filtered_average.reset_index(drop= True)
    
    
    filtered_average.to_pickle("./CFS/pkl/1Y_CFS"+str(y)+".pkl")
    filtered_average.to_csv("./CFS/csv/1Y_CFS"+str(y)+".csv")
    
    
    elapsed = timeit.default_timer() - start_time
    print("Time to execute year " + str(y)+ " "  + str(elapsed))
    

  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 1979 0.6670064170029946


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 1980 0.7467555830007768


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 1981 0.6682730000029551


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 1982 0.6524761249966105


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 1983 0.6402515829977347


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 1984 0.6541193749944796


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 1985 0.6398876249950263


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 1986 0.6389126669964753


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 1987 0.6361759579958743


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 1988 0.6379252080005244


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 1989 0.6378342919997522


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 1990 0.6336483749983017


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 1991 0.6367369169965968


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 1992 0.651795999998285


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 1993 0.6405752089995076


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 1994 0.6611395419968176


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 1995 0.6387230830005137


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 1996 0.6435308329964755


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 1997 0.6362241660026484


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 1998 0.6334743749976042


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 1999 0.6326056660036556


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2000 0.6357432500008144


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2001 0.628678792003484


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2002 0.6314211670032819


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2003 0.6383243750024121


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2004 0.6302701249951497


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2005 0.6369791249962873


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2006 0.6373476250009844


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2007 0.6592835000046762


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2008 0.638644665996253


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2009 0.6326069999995525


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2010 0.6333737500026473


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2011 0.6356636249984149


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2012 0.6359584169986192


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2013 0.6430631250041188


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2014 0.6377942500039353


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2015 0.634587125001417


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2016 0.6384458329994231


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2017 0.6343470409992733


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2018 0.6323188329988625


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2019 0.6330348750052508


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2020 0.6357905420009047


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2021 0.6337910840011318


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


Time to execute year 2022 0.6343277090054471
Time to execute year 2023 0.29574987499654526


  current_year_average = current_year_df.groupby(['latitude', 'longitude','year']).mean()


In [246]:
cfs_dfs_list = []
for x in os.listdir("./CFS/pkl"):
    cfs_dfs_list.append(pd.read_pickle("./CFS/pkl/"+x))
cfs_df = pd.concat(cfs_dfs_list)

In [263]:
master_cfs = cfs_df.sort_values(by=['year','latitude','longitude']).reset_index(drop=True)
master_cfs['sst'] = master_cfs['ocnsst'] - 273.15
master_cfs.to_pickle("./CFS/pkl/master_CFS"+".pkl")
master_cfs.to_csv("./CFS/csv/master_CFS"+".csv")

In [264]:
master_cfs

Unnamed: 0,latitude,longitude,year,ocnsst,sst
0,1.75,132.25,1979,302.017456,28.867462
1,1.75,132.75,1979,302.021484,28.871490
2,2.25,130.25,1979,301.977783,28.827789
3,2.25,130.75,1979,301.997925,28.847931
4,2.25,131.25,1979,302.007935,28.857941
...,...,...,...,...,...
9040,11.25,133.25,2023,302.460022,29.310028
9041,11.25,133.75,2023,302.463135,29.313141
9042,11.25,134.25,2023,302.465424,29.315430
9043,11.25,134.75,2023,302.463440,29.313446
