# Imports

In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
# import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams.update(plt.rcParamsDefault)

In [None]:
# For plotting maps
import os
os.environ["PROJ_LIB"] = os.path.join(os.environ["CONDA_PREFIX"], "share", "proj")

# !conda install -c conda-forge basemap
from mpl_toolkits.basemap import Basemap

# # !pip install cartopy
# import cartopy.crs as ccrs
# import cartopy.feature as cfeature

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import mpl_toolkits.basemap # -> install 1.3.2
import sys
print(mpl_toolkits.basemap.__version__)

# Percentage coverage maps

In [None]:
regime_names_dict = {
    1: 'ICE I',
    2: 'ICE II',
    3: 'SUBTR I',
    4: 'SUBTR II',
    6: 'SUBP + UP I',
    7: 'SUBP + UP II',
    5: 'SUBP + UP III',
}
len(regime_names_dict)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Define the cluster_color_dict
cluster_color_dict = {
    1: sns.color_palette("light:blue", as_cmap=False, n_colors=10)[3],
    2: 'blue',
    3: sns.color_palette("autumn", as_cmap=False, n_colors=5)[4],#'yellow',
    4: 'orange',
    5: 'green',
    6: 'gray',
    7: sns.color_palette("light:green", as_cmap=False, n_colors=5)[2] # [2] is for regimes on map
}

In [None]:
sns.color_palette("light:green", as_cmap=False, n_colors=5)

In [None]:
from matplotlib.colors import LinearSegmentedColormap

# Define the colors
color1 = cluster_color_dict[5]  # RGB value
color2 = (1, 1, 1)  # White color
# Create a custom colormap
colors = [color1, color2]
cmap = LinearSegmentedColormap.from_list('custom_cmap', colors)

# Plot a colorbar to visualize the colormap
plt.figure(figsize=(6, 1))
plt.imshow([list(range(100))], cmap=cmap)
plt.colorbar(orientation='horizontal')
plt.show()

In [None]:
%%time
# months=['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
months=['jul']
track_df_list = []
for y in range(1958, 2019):
    for m in months:
        data_df = pd.read_pickle(f"output_reg_1958_2018/adaptive_hc_clusters_{y}_{m}.pkl")
        if 'predicted_cluster' in data_df.columns:
                data_df = data_df.rename(columns={'predicted_cluster': 'cluster'})
        track_df_list.append(data_df[['cluster', 'grid_id', 'nav_lat', 'nav_lon', 'time_counter']])
print(len(track_df_list))

In [None]:
df_tracked_concatenated = pd.concat(track_df_list, ignore_index=True)

In [None]:
# Group by 'lat' and 'lon', and count the occurrences of each cluster
df_grouped = df_tracked_concatenated.groupby(['nav_lat', 'nav_lon', 'cluster']).size()
df_grouped

In [None]:
# Convert the resulting series into a dataframe and reset index
df_result = df_grouped.to_frame(name='count').reset_index()
df_result

In [None]:
df_result['count'].max()

In [None]:
cluster_number = 2 #3,4,7,5,1,2

# Filter the dataframe based on the cluster number
df_filtered = df_result[df_result['cluster'] == str(cluster_number)]
df_filtered['count_perc'] = (df_filtered['count']/61)*100
df_filtered

In [None]:
df_filtered['count_perc'].max()

In [None]:
df_filtered['count_perc'].min()

In [None]:
fig = plt.figure(figsize=(20, 16), edgecolor='w')
world_map = Basemap(projection='cyl', resolution='c',
            llcrnrlat=-90, urcrnrlat=90,
            llcrnrlon=-180, urcrnrlon=180, )

# m.shadedrelief()
## Fill the land mass and lakes
world_map.fillcontinents(color='black') #color_lake='aqua'

# Define the colors
color1 = cluster_color_dict[cluster_number]  # RGB value
# color1 = (0.5019607843137255, 0.803921568627451, 0.19607843137254902)
color2 = (1, 1, 1)  # White color
# Create a custom colormap
colors = [color2, color1]
cmap_biomes = LinearSegmentedColormap.from_list('custom_cmap', colors)

# plt.title('Subtropical I', fontsize=20)
world_map_scatter =world_map.scatter(df_filtered['nav_lon'], df_filtered['nav_lat'],
                                     s = 5, c = df_filtered['count_perc'],
                                     vmin=1, vmax =100, 
#                                      cmap='Purples', 
                                     cmap=cmap_biomes, 
                                     edgecolors='none')

# 3-Oranges, 4-Wistia, 5-Purples, 7 - Greens, 1,2 - Blues

cbar = plt.colorbar(world_map_scatter, shrink = 0.5, pad=0.01)
cbar.ax.tick_params(labelsize=18)

cbar.set_label('Percentage coverage', fontsize=24)
plt.show()

# Zone Analysis

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Define the cluster_color_dict
cluster_color_dict = {
    1: sns.color_palette("light:blue", as_cmap=False, n_colors=10)[3],
    2: 'blue',
    3: sns.color_palette("autumn", as_cmap=False, n_colors=5)[4],#'yellow',
    4: 'orange',
    5: 'green',
    6: 'gray',
    7: sns.color_palette("light:green", as_cmap=False, n_colors=5)[2]
}

In [None]:
regime_names_dict = {
    1: 'ICE I',
    2: 'ICE II',
    3: 'SUBTR I',
    4: 'SUBTR II',
    5: 'SUBP + UP III', #dark green
    6: 'SUBP + UP I',
    7: 'SUBP + UP II',
    
}
len(regime_names_dict)

In [None]:
def get_zoned(appended_data):
    
    zone_ARCTIC = appended_data.loc[appended_data['nav_lat'] > 70.0]
    zone_ARCTIC['zone'] = 'ARCTIC'
        
    zone_NORTH_ATLANTIC= appended_data.loc[(appended_data['nav_lon'] >= -75.0) & (appended_data['nav_lon'] <= 0.0)]
    zone_NORTH_ATLANTIC = zone_NORTH_ATLANTIC.loc[(zone_NORTH_ATLANTIC['nav_lat'] >= 10) & (zone_NORTH_ATLANTIC['nav_lat'] <= 70)]
    zone_NORTH_ATLANTIC['zone'] = 'NORTH_ATLANTIC'
    
    zone_EQ= appended_data.loc[(appended_data['nav_lat'] >= -10.0) & (appended_data['nav_lat'] <= 10.0)]
    zone_EQ_PACIFIC_1 = zone_EQ.loc[(zone_EQ['nav_lon'] >= 105.0) & (zone_EQ['nav_lon'] <= 180.0)]
    zone_EQ_PACIFIC_2 = zone_EQ.loc[(zone_EQ['nav_lon'] >= -180.0) & (zone_EQ['nav_lon'] <= -80.0)]
    zone_EQ_PACIFIC = pd.concat([zone_EQ_PACIFIC_1, zone_EQ_PACIFIC_2])
    zone_EQ_PACIFIC['zone'] = 'EQ_PACIFIC'
    
    zone_SOUTHERN_OCEAN = appended_data.loc[appended_data['nav_lat'] <= -45]
    zone_SOUTHERN_OCEAN['zone'] = 'SOUTHERN_OCEAN'
    
    return zone_ARCTIC, zone_NORTH_ATLANTIC, zone_EQ_PACIFIC, zone_SOUTHERN_OCEAN

In [None]:
def calc_seasonal_zonal_coverage(df):
    total_area = df['area'].sum()
    clusters_list = np.sort(df['cluster'].unique())
    perc_cov = []
    for c in clusters_list:
        if c == 6:
            continue
        df_c = df.loc[df['cluster']==c]
        area_c = df_c['area'].sum()
        perc_cov.append((area_c/total_area)*100)
    return clusters_list, perc_cov

In [None]:
# https://matplotlib.org/stable/api/markers_api.html
def plot_perc_cov(data, year=None, is_annual=False, month=None, is_seasonal=False):
    # for z in zones:
    return 0

In [None]:
zones = ['ARCTIC','NORTH_ATLANTIC', 'EQ_PACIFIC', 'SOUTHERN_OCEAN']

In [None]:
months_12 = {
            'jan':'-01-',
            'feb':'-02-',
            'mar':'-03-',
            'apr':'-04-',
            'may':'-05-',
            'jun':'-06-',
            'jul':'-07-',
            'aug':'-08-',
            'sep':'-09-',
            'oct':'-10-',
            'nov':'-11-',
            'dec':'-12-',
}

In [None]:
for m in months:
    for yr in range(1958,2019):
        print(yr, m)
        data_df = pd.read_pickle(f"output_reg_1958_2018/adaptive_hc_clusters_{yr}_{m}.pkl")
        print(data_df.columns)
        print(data_df)
        break

In [None]:
data_df

## Seasonal Perceentage Coverage over 61 years

In [None]:
%%time

# months=['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
months=['jan']

year_list = []
month_list = []
biome_list = []
zone_list = []
perc_cov_list =[]

for m in months:
    for yr in range(1958,2019):
        print(yr, m)
        data_df = pd.read_pickle(f"output_reg_1958_2018/adaptive_hc_clusters_{yr}_{m}.pkl")
        if 'predicted_cluster' in data_df.columns:
                data_df = data_df.rename(columns={'predicted_cluster': 'cluster'})
        data_df['cluster'] = data_df['cluster'].astype(int)
        zone_ARCTIC, zone_NORTH_ATLANTIC, zone_EQ_PACIFIC, zone_SOUTHERN_OCEAN = get_zoned(data_df[['nav_lat', 
                                                                                                    'nav_lon', 
                                                                                                    'cluster',
                                                                                                    'e1t', 'e2t', 
                                                                                                    'area']])
        
        clusters_list, perc_cov = calc_seasonal_zonal_coverage(df = zone_ARCTIC)
        for c,p in zip(clusters_list,perc_cov): 
            year_list.append(yr) 
            month_list.append(m)
            biome_list.append(c)
            zone_list.append('ARCTIC')
            perc_cov_list.append(p) 
        
        clusters_list, perc_cov = calc_seasonal_zonal_coverage(df = zone_NORTH_ATLANTIC)
        for c,p in zip(clusters_list,perc_cov): 
            year_list.append(yr) 
            month_list.append(m)
            biome_list.append(c)
            zone_list.append('NORTH_ATLANTIC')
            perc_cov_list.append(p)
        
        clusters_list, perc_cov = calc_seasonal_zonal_coverage(df = zone_EQ_PACIFIC)
        for c,p in zip(clusters_list,perc_cov): 
            year_list.append(yr) 
            month_list.append(m)
            biome_list.append(c)
            zone_list.append('EQ_PACIFIC')
            perc_cov_list.append(p)
        
        clusters_list, perc_cov = calc_seasonal_zonal_coverage(df = zone_SOUTHERN_OCEAN)
        for c,p in zip(clusters_list,perc_cov): 
            year_list.append(yr) 
            month_list.append(m)
            biome_list.append(c)
            zone_list.append('SOUTHERN_OCEAN')
            perc_cov_list.append(p)
        

### Over 1 month but 61 years

In [None]:
df = pd.DataFrame({'month':month_list, 'year':year_list, 
                   'zone':zone_list, 'cluster': biome_list, 
                   'coverage': perc_cov_list})
df

In [None]:
df_zone = df.loc[df['zone'] == 'SOUTHERN_OCEAN']
# cls_list = [3,4,7] ## N Atlantic, E Pacific
cls_list = [3,5,7] ## S Ocean

averages_covs = []
for cls in cls_list:
    cov_list = []
    year_list = []
    for yr in range(1958, 2019):
        df_zone_cov = df_zone.loc[df_zone['year']==yr]
        df_zone_cov = df_zone_cov.loc[df_zone_cov['cluster'] == cls]
        cov_list.append(df_zone_cov['coverage'].mean())
        year_list.append(yr)
    averages_covs.append([cov_list,year_list] )

In [None]:
# averages_covs[0][1]

In [None]:
# Plotting
fig, ax = plt.subplots(figsize=(10, 6), dpi=200)
ax.grid(zorder=0)

for i, cov in enumerate(averages_covs):    
    plt.plot(averages_covs[i][1], averages_covs[i][0], label=regime_names_dict[cls_list[i]], 
             color=cluster_color_dict[cls_list[i]], marker='.', linestyle='-', zorder=3)

plt.xlabel('Years', fontsize=20)
# plt.errorbar(months, cov[1])    
plt.ylabel("% Coverage in ", fontsize=20)
# plt.title(selected_z)
legend_properties = {'weight':'bold', 'size':8}
plt.legend(bbox_to_anchor=(1.18, 1.0), prop=legend_properties)
plt.xticks(rotation=45)
ax.tick_params(axis='x', labelsize=15)
ax.tick_params(axis='y', labelsize=15)
plt.tight_layout()
plt.show()

### Over 12 months

In [None]:
# Create DataFrame
df = pd.DataFrame({'month':month_list, 'year':year_list, 
                   'zone':zone_list, 'cluster': biome_list, 
                   'coverage': perc_cov_list})
df

### Select Zones below

In [None]:
months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
zones = ['ARCTIC','NORTH_ATLANTIC', 'EQ_PACIFIC', 'SOUTHERN_OCEAN']

In [None]:
selected_z = zones[1] ## Here!

selected_z

In [None]:
df_zone = df.loc[df['zone'] == selected_z]
df_zone

In [None]:
# cls_list = np.sort(df_zone['cluster'].unique())

# cls_list = [1,2,5,7] ## ARCTIC
cls_list = [3,4,7] ## N Atlantic, E Pacific
# cls_list = [3,5,7] ## S Ocean

averages_covs = []
for cls in cls_list:
    print(cls)
    mean_list = []
    error_list = []
    for m in months:
        print(m)
        df_zone_month = df_zone.loc[df_zone['month'] == m]
        df_zone_month = df_zone_month.loc[df_zone_month['cluster'] == cls]
#         print(df_zone_month)
        mean_list.append(df_zone_month['coverage'].mean())
        error_list.append(df_zone_month['coverage'].std())
    averages_covs.append([mean_list,error_list] )
#     print()

In [None]:
# averages_covs[2]
cls_list

In [None]:
averages_covs[1]

In [None]:
# Plotting
fig, ax = plt.subplots(figsize=(10, 6))
ax.grid(zorder=0)

for i, cov in enumerate(averages_covs):
#     print(i)
    ## cov[0] has mean and cov[1] has std dev.
    
    plt.plot(months, cov[0], label=regime_names_dict[cls_list[i]], 
             color=cluster_color_dict[cls_list[i]], marker='.', linestyle='-', zorder=3)

    # plt.plot(months, temperature, marker='o', color='blue', label='Temperature')
    plt.fill_between(months, 
                     [t - std for t, std in zip(cov[0], cov[1])],[t + std for t, std in zip(cov[0], cov[1])], 
                     alpha=0.1, zorder=3, color=cluster_color_dict[cls_list[i]]
                     # label=f'{cls_list[i]}-Std Dev'
                    )
plt.xlabel('Months', fontsize=20)
# plt.errorbar(months, cov[1])    
plt.ylabel("% Coverage in North Atlantic", fontsize=20)
# plt.title(selected_z)
legend_properties = {'weight':'bold', 'size':8}
plt.legend(bbox_to_anchor=(1.18, 1.0), prop=legend_properties)
plt.xticks(rotation=45)
ax.tick_params(axis='x', labelsize=15)
ax.tick_params(axis='y', labelsize=15)
plt.tight_layout()
plt.show()

# # Set x-axis label
# plt.xlabel('Years', fontsize=20)

# # Set y-axis label
# plt.ylabel('% Coverage in Southern Ocean', fontsize=20)

# # Add legend
# # plt.legend(fontsize=20)
# legend_properties = {'weight':'bold', 'size':8}
# ax.legend(prop=legend_properties, bbox_to_anchor=(1.18, 1.0))

# # Show plot
# plt.xticks(rotation=45)
# ax.tick_params(axis='x', labelsize=15)
# ax.tick_params(axis='y', labelsize=15)
# plt.tight_layout()
# plt.show()

## Physical Variables Analysis

In [None]:
months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']

In [None]:
def get_year_month(df, yrmonth):
    df['time_counter'] = df['time_counter'].astype("string")
    df = df.loc[df['time_counter'].str.contains(yrmonth, case=False)]
    return df.reset_index()

In [None]:
def round_nav_lat(df):
    '''
    Round up the coordinates to 2 decimal places
    '''
    df['nav_lat'] = df['nav_lat'].apply(lambda x:round(x,2))
    df['nav_lon'] = df['nav_lon'].apply(lambda x:round(x,2))
    return df

In [None]:
vars = ['SST', 'SAL', 'ice_frac', 'MLD', 'ALK', 'DICP', 'fco2_pre']

### Over Biomes + 12 months + 61 years

In [None]:
# physical_vars_df = pd.read_pickle(f"output_reg_1958_2018/ocean_data_{2009}_df.pkl")
physical_vars_df = pd.read_pickle(f"output_reg_1958_2018/adaptive_hc_clusters_{1960}_{'jan'}.pkl")
physical_vars_df

In [None]:
def analyse_physical_variables_in_biomes(zone_ARCTIC):
    return

In [None]:
def calc_area_weighted_mean(df,phy_var):
    weighted_avg_mean = (df[phy_var] * df['e1t'] * df['e2t']).sum() / (df['e1t'] * df['e2t']).sum()
    return weighted_avg_mean

In [None]:
zones_names_dict={
    'ARCTIC':'Arctic',
    'NORTH_ATLANTIC': 'North Atlantic',
    'EQ_PACIFIC': 'Eq. Pacific',
    'SOUTHERN_OCEAN': 'Southern Ocean'
}

In [None]:
vars = ['SST', 'SAL', 'ice_frac', 'MLD', 'ALK', 'DICP', 'fco2_pre']
phy_var_dict={
    'SST': 'SST ($^\circ$C)',
    'SAL': 'Salinity',
    'fco2_pre': u'fCO$_2$ pre-ind. (\u03bc-atm)',
    'MLD': 'MLD (m)'
}

In [None]:
%%time

phy_var='somxl010'

col_year=[]
col_month=[]
col_cluster_label=[]
col_zone=[]
col_phy_var_avg=[]
col_phy_var_wtd_avg=[]
col_phy_var_std=[]

months=['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
monthly_data_dict = {}

for m in months:
    print(m)
    ARCTIC_details = {}
    NORTH_ATLANTIC_details = {}
    EQ_PACIFIC_details = {}
    SOUTHERN_OCEAN_details = {}
    for yr in range(1958,2019):
        data_df = pd.read_pickle(f"output_reg_1958_2018/adaptive_hc_clusters_{yr}_{m}.pkl")
        mld_df = pd.read_pickle(f"../dataset/MLD/ORCA025_{yr}_MLD.pkl")
        mld_df = get_year_month(df = mld_df , yrmonth = months_12[m])
        mld_df = round_nav_lat(mld_df)
        data_df = data_df.merge(mld_df[['nav_lat', 'nav_lon', 'somxl010']],on=['nav_lat', 'nav_lon'])
        
        if 'predicted_cluster' in data_df.columns:
                data_df = data_df.rename(columns={'predicted_cluster': 'cluster'})
        data_df['cluster'] = data_df['cluster'].astype(int)
        
        zone_ARCTIC, zone_NORTH_ATLANTIC, zone_EQ_PACIFIC, zone_SOUTHERN_OCEAN = get_zoned(data_df)
        
        cluster_list=zone_ARCTIC['cluster'].unique()
        for c in cluster_list:
            col_year.append(yr)
            col_month.append(m)
            col_cluster_label.append(c)
            col_zone.append("ARCTIC")
            
            df_ = zone_ARCTIC.loc[zone_ARCTIC['cluster']==c]
            
            col_phy_var_avg.append(df_[phy_var].mean())
            col_phy_var_std.append(df_[phy_var].std())
            col_phy_var_wtd_avg.append(calc_area_weighted_mean(df=df_, phy_var=phy_var))
            
        
        cluster_list=zone_NORTH_ATLANTIC['cluster'].unique()
        for c in cluster_list:
            col_year.append(yr)
            col_month.append(m)
            col_cluster_label.append(c)
            col_zone.append("NORTH_ATLANTIC")
            
            df_ = zone_NORTH_ATLANTIC.loc[zone_NORTH_ATLANTIC['cluster']==c]
            
            col_phy_var_avg.append(df_[phy_var].mean())
            col_phy_var_std.append(df_[phy_var].std())
            col_phy_var_wtd_avg.append(calc_area_weighted_mean(df=df_, phy_var=phy_var))
            
        cluster_list=zone_EQ_PACIFIC['cluster'].unique()
        for c in cluster_list:
            col_year.append(yr)
            col_month.append(m)
            col_cluster_label.append(c)
            col_zone.append("EQ_PACIFIC")
            
            df_ = zone_EQ_PACIFIC.loc[zone_EQ_PACIFIC['cluster']==c]
            
            col_phy_var_avg.append(df_[phy_var].mean())
            col_phy_var_std.append(df_[phy_var].std())
            col_phy_var_wtd_avg.append(calc_area_weighted_mean(df=df_, phy_var=phy_var))
            
        
        cluster_list=zone_SOUTHERN_OCEAN['cluster'].unique()
        for c in cluster_list:
            col_year.append(yr)
            col_month.append(m)
            col_cluster_label.append(c)
            col_zone.append("SOUTHERN_OCEAN")
            
            df_ = zone_SOUTHERN_OCEAN.loc[zone_SOUTHERN_OCEAN['cluster']==c]
            
            col_phy_var_avg.append(df_[phy_var].mean())
            col_phy_var_std.append(df_[phy_var].std())
            col_phy_var_wtd_avg.append(calc_area_weighted_mean(df=df_, phy_var=phy_var))
            
        
#         break
#     break

In [None]:
# Create DataFrame
df = pd.DataFrame({'month':col_month, 'year':col_year, 'zone':col_zone, 'cluster': col_cluster_label, 
                   'var_average': col_phy_var_avg, 
                   'var_wtd_avg':col_phy_var_wtd_avg,
                   'var_std_dev':col_phy_var_std})
df

In [None]:
df.to_pickle("MLD_zonal_analysis.pkl")

### Select zones and plot the info

In [None]:
df = pd.read_pickle("MLD_zonal_analysis.pkl")
df

In [None]:
# mld_df = pd.read_pickle(f"../dataset/MLD/ORCA025_{1958}_MLD.pkl")
# mld_df = get_year_month(df = mld_df , yrmonth = months_12['jan'])
# mld_df = round_nav_lat(mld_df)
# mld_df

In [None]:
mld_df['somxl010'].max()

In [None]:
zone_name='SOUTHERN_OCEAN' #'ARCTIC' 'SOUTHERN_OCEAN' 'EQ_PACIFIC'

In [None]:
df_zone = df.loc[df['zone'] == zone_name]
df_zone

In [None]:
# cls_list = np.sort(df_zone['cluster'].unique())

# cls_list = [1,2,5,7] ## ARCTIC
# cls_list = [3,4,7] ## N Atlantic, E Pacific
cls_list = [3,5,7] ## S Ocean

averages_covs = []
for cls in cls_list:
    print(cls)
    mean_list = []
    error_list = []
    for m in months:
        print(m)
        df_zone_month = df_zone.loc[df_zone['month'] == m]
        df_zone_month = df_zone_month.loc[df_zone_month['cluster'] == cls]
#         print(df_zone_month)
        mean_list.append(df_zone_month['var_wtd_avg'].mean())
        error_list.append(df_zone_month['var_wtd_avg'].std())
    averages_covs.append([mean_list,error_list] )
#     print()

In [None]:
# averages_covs[3]

In [None]:
# 0.29467082 + 2.8515444

In [None]:
# Plotting
fig, ax = plt.subplots(figsize=(10, 6), dpi=200)
ax.grid(zorder=0)

for i, cov in enumerate(averages_covs):
#     print(i)
    ## cov[0] has mean and cov[1] has std dev.
    
    plt.plot(months, cov[0], label=regime_names_dict[cls_list[i]], 
             color=cluster_color_dict[cls_list[i]], marker='.', linestyle='-', zorder=3)

    # plt.plot(months, temperature, marker='o', color='blue', label='Temperature')
    plt.fill_between(months, 
                     [t - std for t, std in zip(cov[0], cov[1])],[t + std for t, std in zip(cov[0], cov[1])], 
                     alpha=0.1, zorder=3, color=cluster_color_dict[cls_list[i]]
                     # label=f'{cls_list[i]}-Std Dev'
                    )
plt.xlabel('Months', fontsize=20)
# plt.errorbar(months, cov[1])    
plt.ylabel(" MLD (m) in Southern Ocean", fontsize=20)
# plt.ylabel(f"{phy_var_dict[phy_var]} in {zones_names_dict[zone_name]}", fontsize=16)
# plt.ylabel("SST ($^\circ$C) in Southern Ocean", fontsize=20)
# plt.title(selected_z)
legend_properties = {'weight':'bold', 'size':8}
plt.legend(bbox_to_anchor=(1.18, 1.0), prop=legend_properties)
plt.xticks(rotation=45)
ax.tick_params(axis='x', labelsize=15)
ax.tick_params(axis='y', labelsize=15)
plt.tight_layout()
plt.show()

# # Set x-axis label
# plt.xlabel('Years', fontsize=20)

# # Set y-axis label
# plt.ylabel('% Coverage in Southern Ocean', fontsize=20)

# # Add legend
# # plt.legend(fontsize=20)
# legend_properties = {'weight':'bold', 'size':8}
# ax.legend(prop=legend_properties, bbox_to_anchor=(1.18, 1.0))

# # Show plot
# plt.xticks(rotation=45)
# ax.tick_params(axis='x', labelsize=15)
# ax.tick_params(axis='y', labelsize=15)
# plt.tight_layout()
# plt.show()

### Over clusters

In [None]:
merged_df_list = []

for m in months:
    print(m)
    df_month = get_year_month(df = physical_vars_df , yrmonth = months_12[m])
    d = df_month[['MLD', 'nav_lat', 'nav_lon']]
    df_cluster = pd.read_pickle(f"output_tracking/adaptive_hc_clusters_{2009}_{m}.pkl")
    if 'predicted_cluster' in df_cluster.columns:
            df_cluster = df_cluster.rename(columns={'predicted_cluster': 'cluster'})
    df_cluster['cluster'] = df_cluster['cluster'].astype(int)
    merged_df = pd.merge(df_cluster,d, on = ['nav_lat', 'nav_lon'], how = 'left')
    merged_df['month'] = m
    merged_df_list.append(merged_df)

vars_df = pd.concat(merged_df_list)
vars_df

In [None]:
vars_df['cluster'].unique()

In [None]:
# SST_lst = [] 
# SAL_lst = [] 
# ice_frac_lst = [] 
# MLD_lst = []
# ALK_lst = [] 
# DICP_lst = []
# fco2_pre_lst = []
cluster_lst = np.sort(vars_df['cluster'].unique())
physical_var_lst_all = []
var_name = 'ICE' ##change here

for c in cluster_lst:
    print(c)
    physical_var_lst = []
    _df_m = vars_df.loc[vars_df['cluster'] == c]
    for m in months:
        print(m)
        _df_ = _df_m.loc[_df_m['month'] == m]
        print(_df_[var_name].mean())
        physical_var_lst.append(_df_[var_name].mean())
        print()
    break
    physical_var_lst_all.append(physical_var_lst)

In [None]:
len(physical_var_lst_all)

In [None]:
plt.figure(figsize=(10, 6))
for i, physical_var_lst in enumerate(physical_var_lst_all):
    # if i == 6:
    #     continue
    plt.plot(months, physical_var_lst, label=cluster_lst[i], marker='o')
    plt.xlabel("Months")
    plt.ylabel(var_name)

plt.legend(bbox_to_anchor=(1.1, 0.8))
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
## Hi Miri, I will be staying home as I have monster cramps today.
## I will join from the Zoom link today and ask the questions.

#### Over zones

In [None]:
MLD_zone_NORTH_ATLANTIC = []
MLD_zone_EQ_PACIFIC = []
MLD_zone_SOUTHERN_OCEAN = []

for m in months:
    df_month = get_year_month(df = physical_vars_df , yrmonth = months_12[m])
    zone_NORTH_ATLANTIC, zone_EQ_PACIFIC, zone_SOUTHERN_OCEAN = get_zoned(appended_data=df_month)
    MLD_zone_NORTH_ATLANTIC.append(zone_NORTH_ATLANTIC['ALK'].mean())
    MLD_zone_EQ_PACIFIC.append(zone_EQ_PACIFIC['ALK'].mean())
    MLD_zone_SOUTHERN_OCEAN.append(zone_SOUTHERN_OCEAN['ALK'].mean())

In [None]:
MLD_zone_NORTH_ATLANTIC = []
MLD_zone_EQ_PACIFIC = []
MLD_zone_SOUTHERN_OCEAN = []

for m in months:
    df_month = pd.read_pickle(f"output_reg_1958_2018/adaptive_hc_clusters_{2009}_{m}.pkl")
    zone_NORTH_ATLANTIC, zone_EQ_PACIFIC, zone_SOUTHERN_OCEAN = get_zoned(appended_data=df_month)
    MLD_zone_NORTH_ATLANTIC.append(zone_NORTH_ATLANTIC['fco2_pre'].mean())
    MLD_zone_EQ_PACIFIC.append(zone_EQ_PACIFIC['fco2_pre'].mean())
    MLD_zone_SOUTHERN_OCEAN.append(zone_SOUTHERN_OCEAN['fco2_pre'].mean())

In [None]:
# Create subplots with 1 row and 3 columns
fig, axs = plt.subplots(1, 3, figsize=(15, 5))

axs[0].plot(months, MLD_zone_NORTH_ATLANTIC)
axs[0].set_title("zone_NORTH_ATLANTIC")
axs[0].set_xlabel('Months')
axs[0].set_ylabel('fco2_pre Averaged over months')

axs[1].plot(months, MLD_zone_EQ_PACIFIC)
axs[1].set_title("zone_EQ_PACIFIC")
axs[1].set_xlabel('Months')
axs[1].set_ylabel('fco2_pre Averaged over months')

axs[2].plot(months, MLD_zone_SOUTHERN_OCEAN)
axs[2].set_title("zone_SOUTHERN_OCEAN")
axs[2].set_xlabel('Months')
axs[2].set_ylabel('fco2_pre Averaged over months')

# axs[0].legend()

# Adjust layout
plt.tight_layout()

# Show plot
plt.show()

## Percentage Coverage

In [None]:
# year_1 = 2009
# month_1 = 'jan'
# df_1 = pd.read_pickle(f"output_tracking/adaptive_hc_clusters_{year_1}_{month_1}.pkl")
# df_1.columns

In [None]:
months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
zones = ['NORTH_ATLANTIC', 'EQ_PACIFIC', 'SOUTHERN_OCEAN']

In [None]:
NORTH_ATLANTIC_details = {}
EQ_PACIFIC_details = {}
SOUTHERN_OCEAN_details = {}
for m in months:
    print(m)
    data_df = pd.read_pickle(f"output_tracking/adaptive_hc_clusters_{2009}_{m}.pkl")
    if 'predicted_cluster' in data_df.columns:
            data_df = data_df.rename(columns={'predicted_cluster': 'cluster'})
    # print(data_df.columns)
    data_df['cluster'] = data_df['cluster'].astype(int)
    zone_NORTH_ATLANTIC, zone_EQ_PACIFIC, zone_SOUTHERN_OCEAN = get_zoned(
        data_df[['nav_lat', 'nav_lon', 'cluster','e1t', 'e2t', 'area']])
    NORTH_ATLANTIC_details[m] = calc_seasonal_zonal_coverage(df = zone_NORTH_ATLANTIC)
    EQ_PACIFIC_details[m] = calc_seasonal_zonal_coverage(df = zone_EQ_PACIFIC)
    SOUTHERN_OCEAN_details[m] = calc_seasonal_zonal_coverage(df = zone_SOUTHERN_OCEAN)
    # if sum(zone_area_details[m].values()) !=100.0:
    #     print(sum(zone_area_details[m].values()))
    #     raise Exception

In [None]:
# SOUTHERN_OCEAN_details

In [None]:
data = EQ_PACIFIC_details

# Marker types for each integer
marker_types = {1: 'o', 2: 's', 3: '<', 4: '>', 5: 'P', 6: 'X', 7: 'D'} #d, v

# Plotting
plt.figure(figsize=(10, 6))

for num in [1, 2, 3, 4, 5, 6, 7]:
    plt.plot(list(data.keys()), [month_data.get(num, None) for month_data in data.values()], 
             label=f'Province {num}', marker=marker_types[num], 
             linestyle='-', color='black', linewidth=0.7)

# Set x-axis label
plt.xlabel('Months')

# Set y-axis label
plt.ylabel('% Coverage in Southern Ocean')

# Add legend
plt.legend()

# Show plot
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
sum(zone_area_details['jan'].values)

In [None]:
zone_area_details[''] 