# Exploratory analysis for machine learning algorithm -Random forest- for coral bleaching classification 

## Load dataset

In [None]:
'''
    Import libraries
'''
from sklearn import datasets
import pandas as pd
import numpy as np
import sklearn
import seaborn as sb
import matplotlib.pyplot as plt
from numpy import mean 
from numpy import std
import pingouin as pg
from scipy.stats import shapiro
from scipy.stats import levene 
from scipy.stats import bartlett
from scipy.stats import kruskal
from scipy.stats import mannwhitneyu    
import scikit_posthocs as sp 
from scipy import stats
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
from sklearn.ensemble import RandomForestClassifier
# check scikit-learn version
print(sklearn.__version__)

In [None]:
'''
    Load full dataset
'''
data = pd.read_csv('df_sst_clouds.csv', low_memory = False)
len(data)

In [None]:
'''
    Subset DF by SEVERITY_CODE [0,1,2,3]
'''
#data = data.dropna() # drop rows that contains NaN's 
data = data[(data.SEVERITY_CODE == 0)|(data.SEVERITY_CODE == 1)|(data.SEVERITY_CODE == 2)|(data.SEVERITY_CODE == 3)] 
#data = data[(data.YEAR >= 2005)] # First year with more than 100 records
#list(data.columns)
data = data.dropna()
len(data)

In [None]:
'''
    ADD colums grouped by SEVERITY CODE combinations
'''
data['SEVERITY_CODE2'] = np.where(data['SEVERITY_CODE']==0, "0","123")
data['SEVERITY_CODE3'] = np.where(data['SEVERITY_CODE']<=1,"01","23")
data['SEVERITY_CODE5'] = np.where(data['SEVERITY_CODE']<=2,"012","3")

In [None]:
data.head(5) 

In [None]:
'''
    Define dataset
'''
X = data.drop(['SEVERITY_CODE','COUNTRY','DAY', 'MONTH','YEAR','JD','lat','lon','full_date','ITEM_ID'], axis=1) # dependent variables
y = data['SEVERITY_CODE'] # labels (indipendent variable)

In [None]:
'''
    Simple statistical description of variables
'''
var_stats = X.describe()
#var_stats.to_csv('var_desc_stats.csv')
var_stats

In [None]:
'''
    Define dataset(s) for correllogram DAY of REPORT "dor"
'''
data_dor = data.loc[:, ~data.columns.str.contains('adj')] # select only day of freport
data_dor = data_dor.loc[:, ~data_dor.columns.str.contains('_a')] # drop "CF_a..." variables
X = data_dor.drop(['DHW_class','SEVERITY_CODE','SEVERITY_CODE2','SEVERITY_CODE3', 'COUNTRY', 'DAY', 'MONTH','YEAR','JD','lat','lon','full_date','ITEM_ID','SST_std7','SST_std30','SST_std90'], axis=1) # dependent variables
y = data_dor['SEVERITY_CODE'] # labels (indipendent variable)

In [None]:
'''
    Correlation plot for date of report
'''
pearsoncorr = X.corr(method='pearson')
#spaermancorr
plt.figure(figsize=(20,15))
sb.heatmap(pearsoncorr, 
            vmin=-1, vmax=1, center=0,
            xticklabels=pearsoncorr.columns,
            yticklabels=pearsoncorr.columns,
            cmap='coolwarm',
            #annot=True,
            linewidth=0.5,
            fmt='.2g')
#plt.savefig('corr_plot.pdf', dpi=300)
plt.show()

In [None]:
'''
Pearson significance
'''
from scipy.stats.stats import pearsonr
pval = X.rcorr(stars=False)
pval.to_csv('corr_pval_features.csv')
pval

In [None]:
'''
Histogram (all together)
''' 
X['CF_a_runmean30'].hist(figsize=(20,17))
plt.show()
#plt.savefig('General_histograms.pdf', dpi=300)

In [None]:
'''
Normality test
'''
normality = pg.normality(X['CF_a_runmean30'], method='normaltest')
#normality.to_csv('normality_test_features.csv')
from scipy.stats import normaltest
stat, p = normaltest(X['CF_a_runmean30'])
print('Statistics=%.3f, p=%.3f' % (stat, p))

## Create DF´s grouped by SEVERITY_CODE´s

In [None]:
'''
    Group data by SEVERITY_CODE
    Create four df depending severity level
'''
dfSC0 = data[data['SEVERITY_CODE'] == 0]
dfSC1 = data[data['SEVERITY_CODE'] == 1]
dfSC2 = data[data['SEVERITY_CODE'] == 2]
dfSC3 = data[data['SEVERITY_CODE'] == 3]

'''
    === SEVERITY CODE2 grouped 0 and 1 where "0" = 0 and "1" = 123 ===
'''
dfSC20 = data[data['SEVERITY_CODE2'] == '0']
dfSC21 = data[data['SEVERITY_CODE2'] == '123']

'''
    === SEVERITY CODE3 grouped 0 and 1 where "0" = 01, and "1" = 23 ===
'''
dfSC30 = data[data['SEVERITY_CODE3'] == '01']
dfSC31 = data[data['SEVERITY_CODE3'] == '23']


In [None]:
data.columns

# 1. How does DHW vary with bleaching severity?

In [None]:
'''
    Boxplot (general boxplot DHW by SEVERITY_CODE per variable)
     ** in this step select the most convenient plot ** 
'''
#make grouped boxplot
#plt.figure(figsize=(18,10))
pal = sb.color_palette('PuBu')
DHWboxplot = sb.boxplot(y='DHW', x='SEVERITY_CODE', 
                 data=data, 
                 palette=pal,
                 hue='SEVERITY_CODE')
plt.show()

In [None]:
'''
       Shapiro-Wilks or normaltest
         ** test for normality ** 
'''
stats, p = shapiro(dfSC0.DHW)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC0.DHWrunmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC0.DHWrunmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC0.DHWrunmean90)
print(stats)
print(p)
print("")

stats, p = shapiro(dfSC1.DHW)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC1.DHWrunmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC1.DHWrunmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC1.DHWrunmean90)
print(stats)
print(p)
print("")

stats, p = shapiro(dfSC2.DHW)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC2.DHWrunmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC2.DHWrunmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC2.DHWrunmean90)
print(stats)
print(p)
print("")

stats, p = shapiro(dfSC3.DHW)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC3.DHWrunmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC3.DHWrunmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC3.DHWrunmean90)
print(stats)
print(p)
print("")


In [None]:
'''
            Levene Test
    ** test for equal variances ** 
'''
stats, p = levene(dfSC0.DHWrunmean7,dfSC1.DHWrunmean7,dfSC2.DHWrunmean7,dfSC3.DHWrunmean7)
print(stats)
print(p)

In [None]:
from scipy.stats import f_oneway
aov = f_oneway(dfSC0.DHW,dfSC1.DHW,dfSC2.DHW,dfSC3.DHW)
from statsmodels.stats.multicomp import pairwise_tukeyhsd
tukey = pairwise_tukeyhsd(endog=data['CF_a_runmean30'],
                          groups=data['SEVERITY_CODE'],
                          alpha=0.05)

print("                     ANOVA             ")
print(aov)
print(" ")
print("                 Tukey post-hoc        ")
print(tukey)

In [None]:
'''
                            Kruskal Wallis
    ** Compare if differences between groups by SEVERITY_CODE **
'''
SC_DHW_Kruskal = stats.kruskal(dfSC0.DHW,dfSC1.DHW,dfSC2.DHW,dfSC3.DHW, nan_policy='omit')
# SC_DHW_9_Kruskal = stats.kruskal(dfSC0.DHW_9,dfSC1.DHW_9,dfSC2.DHW_9,dfSC3.DHW_9, nan_policy='omit')
# SC_DHWrunmean7_Kruskal = stats.kruskal(dfSC0.DHWrunmean7,dfSC1.DHWrunmean7,dfSC2.DHWrunmean7,dfSC3.DHWrunmean7, nan_policy='omit')
# SC_DHWrunmean30_Kruskal = stats.kruskal(dfSC0.DHWrunmean30,dfSC1.DHWrunmean30,dfSC2.DHWrunmean30,dfSC3.DHWrunmean30, nan_policy='omit')
# kruskal_test = stats.kruskal(dfSC20.DHW,dfSC21.DHW, nan_policy='omit')
print(SC_DHW_Kruskal)
# print(SC_DHW_9_Kruskal)
# print(SC_DHWrunmean30_Kruskal)
# print(SC_DHWrunmean7_Kruskal)
# print(kruskal_test)


In [None]:
'''
         Dunn post_hoc test
    ** Which group is different **
'''
DHW_dunn = sp.posthoc_dunn(data, 'DHW', 'SEVERITY_CODE', 'bonferroni')
DHW_9_dunn = sp.posthoc_dunn(data, 'DHW_9', 'SEVERITY_CODE', 'bonferroni')
DHWrunmean7_dunn = sp.posthoc_dunn(data, 'DHWrunmean7', 'SEVERITY_CODE', 'bonferroni')
DHWrunmean30_dunn = sp.posthoc_dunn(data, 'DHWrunmean30', 'SEVERITY_CODE', 'bonferroni')
print('DHW_dunn')
print(DHW_dunn)
# print('DHW_9_dunn')
# print(DHW_9_dunn)
# print('DHWrunmean7_dunn')
# print(DHWrunmean7_dunn)
# print('DHWrunmean30_dunn')
# print(DHWrunmean30_dunn)

In [None]:
'''
    Boxplot (general boxplot DHW by SEVERITY_CODE2 per variable)
     ** in this step select the most convenient plot ** 
'''
#make grouped boxplot
#plt.figure(figsize=(18,10))
pal = sb.color_palette('PuBu')
DHWboxplot = sb.boxplot(y='DHW', x='SEVERITY_CODE2', 
                 data=data, 
                 palette=pal,
                 hue='SEVERITY_CODE2')
plt.show()
DHW_adj_dateboxplot = sb.boxplot(y='DHWrunmean7', x='SEVERITY_CODE2', 
                 data=data, 
                 palette=pal,
                 hue='SEVERITY_CODE2')
plt.show()
DHWrunmean30boxplot = sb.boxplot(y='DHWrunmean30', x='SEVERITY_CODE2', 
                 data=data, 
                 palette=pal,
                 hue='SEVERITY_CODE2')
plt.show()
DHWrunmean30_adj_dateboxplot = sb.boxplot(y='DHWrunmean90', x='SEVERITY_CODE2', 
                 data=data, 
                 palette=pal,
                 hue='SEVERITY_CODE2')
plt.show()

In [None]:
'''
    In this case there are only two groups, so it can be either KW or MWu
                            Kruskal Wallis
    ** Compare if differences between groups by SEVERITY_CODE2 **
'''
SC2_DHW_Kruskal = stats.kruskal(dfSC20.DHW,dfSC21.DHW, nan_policy='omit')
SC2_DHW_9_Kruskal = stats.kruskal(dfSC20.DHW_9,dfSC21.DHW_9, nan_policy='omit')
SC2_DHWrunmean7_Kruskal = stats.kruskal(dfSC20.DHWrunmean7,dfSC21.DHWrunmean7, nan_policy='omit')
SC2_DHWrunmean30_Kruskal = stats.kruskal(dfSC20.DHWrunmean30,dfSC21.DHWrunmean30, nan_policy='omit')
SC2_DHWrunmean90_Kruskal = stats.kruskal(dfSC20.DHWrunmean90,dfSC21.DHWrunmean90, nan_policy='omit')
print(SC2_DHW_Kruskal)
print(SC2_DHW_9_Kruskal)
print(SC2_DHWrunmean7_Kruskal)
print(SC2_DHWrunmean30_Kruskal)
print(SC2_DHWrunmean90_Kruskal)

In [None]:
'''
                Mann Whitney U-Test DHW's SEVERITY_CODE2
'''
from scipy.stats import mannwhitneyu
stat, p = mannwhitneyu(dfSC20.DHW,dfSC21.DHW)
print('Statistics Mann-Whitney U test dfSC20.DHW-dfSC21.DHW =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfSC20.DHW_9,dfSC21.DHW_9)
print('Statistics Mann-Whitney U test fSC20.DHW_9-dfSC21.DHW_9 =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfSC20.DHWrunmean7,dfSC21.DHWrunmean7)
print('Statistics Mann-Whitney U test fSC20.DHWrunmean7-dfSC21.DHWrunmean7 =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfSC20.DHWrunmean30,dfSC21.DHWrunmean30)
print('Statistics Mann-Whitney U test fSC20.DHWrunmean3-dfSC21.DHWrunmean30 =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfSC20.DHWrunmean90,dfSC21.DHWrunmean90)
print('Statistics Mann-Whitney U test fSC20.DHWrunmean7-dfSC21.DHWrunmean7 =%.3f, p=%.3f' % (stat, p))

In [None]:
'''
    Boxplot (general boxplot DHW by SEVERITY_CODE3 per variable)
     ** in this step select the most convenient plot ** 
'''
#make grouped boxplot
#plt.figure(figsize=(18,10))
pal = sb.color_palette('PuBu')
DHWboxplot = sb.boxplot(y='DHW', x='SEVERITY_CODE3', 
                 data=data, 
                 palette=pal,
                 hue='SEVERITY_CODE3')
plt.show()
DHW_adj_dateboxplot = sb.boxplot(y='DHWrunmean7', x='SEVERITY_CODE3', 
                 data=data, 
                 palette=pal,
                 hue='SEVERITY_CODE3')
plt.show()
DHWrunmean30boxplot = sb.boxplot(y='DHWrunmean30', x='SEVERITY_CODE3', 
                 data=data, 
                 palette=pal,
                 hue='SEVERITY_CODE3')
plt.show()
DHWrunmean30_adj_dateboxplot = sb.boxplot(y='DHWrunmean90', x='SEVERITY_CODE3', 
                 data=data, 
                 palette=pal,
                 hue='SEVERITY_CODE3')
plt.show()

In [None]:
'''
    In this case there are only two groups, so it can be either KW or MWu
                            Kruskal Wallis
    ** Compare if differences between groups by SEVERITY_CODE3 **
'''
SC3_DHW_Kruskal = stats.kruskal(dfSC30.DHW,dfSC31.DHW, nan_policy='omit')
SC3_DHW_9_Kruskal = stats.kruskal(dfSC30.DHW_9,dfSC31.DHW_9, nan_policy='omit')
SC3_DHWrunmean7_Kruskal = stats.kruskal(dfSC30.DHWrunmean7,dfSC31.DHWrunmean7, nan_policy='omit')
SC3_DHWrunmean30_Kruskal = stats.kruskal(dfSC30.DHWrunmean30,dfSC31.DHWrunmean30, nan_policy='omit')
SC3_DHWrunmean90_Kruskal = stats.kruskal(dfSC30.DHWrunmean90,dfSC31.DHWrunmean90, nan_policy='omit')
print(SC3_DHW_Kruskal)
print(SC3_DHW_9_Kruskal)
print(SC3_DHWrunmean7_Kruskal)
print(SC3_DHWrunmean30_Kruskal)
print(SC3_DHWrunmean90_Kruskal)

In [None]:
'''
                Mann Whitney U-Test DHW's SEVERITY_CODE3
'''
from scipy.stats import mannwhitneyu
stat, p = mannwhitneyu(dfSC30.DHW,dfSC31.DHW)
print('Statistics Mann-Whitney U test dfSC20.DHW-dfSC21.DHW =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfSC30.DHW_9,dfSC31.DHW_9)
print('Statistics Mann-Whitney U test fSC20.DHW_9-dfSC21.DHW_9 =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfSC30.DHWrunmean7,dfSC31.DHWrunmean7)
print('Statistics Mann-Whitney U test fSC20.DHWrunmean7-dfSC21.DHWrunmean7 =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfSC30.DHWrunmean30,dfSC31.DHWrunmean30)
print('Statistics Mann-Whitney U test fSC20.DHWrunmean3-dfSC21.DHWrunmean30 =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfSC30.DHWrunmean90,dfSC31.DHWrunmean90)
print('Statistics Mann-Whitney U test fSC20.DHWrunmean7-dfSC21.DHWrunmean7 =%.3f, p=%.3f' % (stat, p))

# 2. How does cloud cover vary with bleaching severity?

In [None]:
'''
    Boxplot (general boxplot CLOUD COVER by SEVERITY per variable)
        ** in this step select the most convenient plot ** 
'''
#make grouped boxplot
plt.figure(figsize=(18,10))
pal = sb.color_palette('PuBu')
DHWrunmean30boxplot = sb.boxplot(y='CFrunmean30', x='SEVERITY_CODE', 
                 data=data, 
                 palette=pal,
                 hue='SEVERITY_CODE')
plt.show()

In [None]:
'''
       Shapiro-Wilks or normaltest
         ** test for normality ** 
'''
stats, p = shapiro(dfSC0.CFrunmean90)
print(stats)
print(p)
normality = pg.normality(dfSC3.DHW, method='normaltest') # normal test for large dataset
#homoscedasticity = pg.homoscedasticity(data, method='levene') # bartlett 
#normality.to_csv('normality_test_features.csv')

In [None]:
'''
            Levene Test
    ** test for equal variances ** 
'''
stats, p = levene(dfSC0.CFrunmean90,dfSC1.CFrunmean90,dfSC2.CFrunmean90,dfSC3.CFrunmean90)
print(stats)
print(p)

In [None]:
'''
                 Kruskal Wallis
    ** Compare if differences between groups **
'''
SC_CF_Kruskal = stats.kruskal(dfSC0.CF,dfSC1.CF,dfSC2.CF,dfSC3.CF, nan_policy='omit')

SC_CFrunmean7_Kruskal = stats.kruskal(dfSC0.CFrunmean7,dfSC1.CFrunmean7,dfSC2.CFrunmean7,dfSC3.CFrunmean7, nan_policy='omit')

SC_CFrunmean30_Kruskal = stats.kruskal(dfSC0.CFrunmean30,dfSC1.CFrunmean30,dfSC2.CFrunmean30,dfSC3.CFrunmean30, nan_policy='omit')

SC_CFrunmean90_Kruskal = stats.kruskal(dfSC0.CFrunmean90,dfSC1.CFrunmean90,dfSC2.CFrunmean90,dfSC3.CFrunmean90, nan_policy='omit')

#print(SC_CF_Kruskal)
print(SC_CFrunmean7_Kruskal)
print(SC_CFrunmean30_Kruskal)
print(SC_CFrunmean90_Kruskal)

In [None]:
'''
         Dunn post_hoc test
    ** Which group is different **
'''
CF_dunn = sp.posthoc_dunn(data, 'CF', 'SEVERITY_CODE')
CFrunmean7_dunn = sp.posthoc_dunn(data, 'CFrunmean7', 'SEVERITY_CODE')
CFrunmean30_dunn = sp.posthoc_dunn(data, 'CFrunmean30', 'SEVERITY_CODE')
CFrunmean90_dunn = sp.posthoc_dunn(data, 'CFrunmean90', 'SEVERITY_CODE')
print('CF_dunn')
print(CF_dunn)
print('CFrunmean7_dunn')
print(CFrunmean7_dunn)
print('CFrunmean30_dunn')
print(CFrunmean30_dunn)
print('CFrunmean90_dunn')
print(CFrunmean90_dunn)

In [None]:
'''
    Boxplot (general boxplot CLOUD COVER by SEVERITY2 per variable)
        ** in this step select the most convenient plot ** 
'''
#make grouped boxplot
#plt.figure(figsize=(18,10))
pal = sb.color_palette('PuBu')
DHWrunmean30boxplot = sb.boxplot(y='CFrunmean30', x='SEVERITY_CODE2', 
                 data=data, 
                 palette=pal,
                 hue='SEVERITY_CODE2')
plt.show()


In [None]:
'''
       Shapiro-Wilks or normaltest 
         ** test for normality ** 
'''
stats, p = shapiro(dfSC20.CF)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC20.CFrunmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC20.CFrunmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC20.CFrunmean90)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC21.CF)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC21.CFrunmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC21.CFrunmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC21.CFrunmean90)
print(stats)
print(p)
print("")

In [None]:
'''
            Levene Test 
    ** test for equal variances ** 
'''
stats, p = levene(dfSC20.CF,dfSC21.CF)
print(stats)
print(p)
print("")
stats, p = levene(dfSC20.CFrunmean7,dfSC21.CFrunmean7)
print(stats)
print(p)
print("")
stats, p = levene(dfSC20.CFrunmean30,dfSC21.CFrunmean30)
print(stats)
print(p)
print("")
stats, p = levene(dfSC20.CFrunmean90,dfSC21.CFrunmean90)
print(stats)
print(p)

In [None]:
'''
                Mann Whitney U-Test CF's SEVERITY_CODE2
     === SEVERITY CODE2 grouped as 0 and 1 where "0" = 0 and "1" = 123 ===
'''
from scipy.stats import mannwhitneyu
stat, p = mannwhitneyu(dfSC20.CF,dfSC21.CF)
print('Statistics Mann-Whitney U test fSC20.CF-dfSC21.CF =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfSC20.CFrunmean7,dfSC21.CFrunmean7)
print('Statistics Mann-Whitney U test fSC20.CFrunmean7-dfSC21.CFrunmean7 =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfSC20.CFrunmean30,dfSC21.CFrunmean30)
print('Statistics Mann-Whitney U test fSC20.CFrunmean30-dfSC21.CFrunmean30 =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfSC20.CFrunmean90,dfSC21.CFrunmean90)
print('Statistics Mann-Whitney U test fSC20.CFrunmean90-dfSC21.CFrunmean90 =%.3f, p=%.3f' % (stat, p))

In [None]:
'''
    Boxplot (general boxplot CLOUD COVER by SEVERITY3 per variable)
        ** in this step select the most convenient plot ** 
'''
#make grouped boxplot
#plt.figure(figsize=(18,10))
pal = sb.color_palette('PuBu')
DHWrunmean30boxplot = sb.boxplot(y='CFrunmean30', x='SEVERITY_CODE3', 
                 data=data, 
                 palette=pal,
                 hue='SEVERITY_CODE3')
plt.show()

In [None]:
'''
       Shapiro-Wilks or normaltest 
         ** test for normality ** 
'''
stats, p = shapiro(dfSC30.CF)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC30.CFrunmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC30.CFrunmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC30.CFrunmean90)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC31.CF)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC31.CFrunmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC31.CFrunmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC31.CFrunmean90)
print(stats)
print(p)
print("")

In [None]:
'''
            Levene Test 
    ** test for equal variances ** 
'''
stats, p = levene(dfSC30.CF,dfSC31.CF)
print(stats)
print(p)
print("")
stats, p = levene(dfSC30.CFrunmean7,dfSC31.CFrunmean7)
print(stats)
print(p)
print("")
stats, p = levene(dfSC30.CFrunmean30,dfSC31.CFrunmean30)
print(stats)
print(p)
print("")
stats, p = levene(dfSC30.CFrunmean90,dfSC31.CFrunmean90)
print(stats)
print(p)

In [None]:
'''
                    Mann Whitney U-Test CF's SEVERITY_CODE3
       === SEVERITY CODE3 grouped 0 and 1 where "0" = 01, and "1" = 23 ===
'''
from scipy.stats import mannwhitneyu
stat, p = mannwhitneyu(dfSC30.CF,dfSC31.CF)
print('Statistics Mann-Whitney U test dfSC30.CF-dfSC31.CF =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfSC30.CFrunmean7,dfSC31.CFrunmean7)
print('Statistics Mann-Whitney U test fSC30.CFrunmean7-dfSC31.CFrunmean7 =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfSC30.CFrunmean30,dfSC31.CFrunmean30)
print('Statistics Mann-Whitney U test fSC30.CFrunmean30-dfSC31.CFrunmean30 =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfSC30.CFrunmean90,dfSC31.CFrunmean90)
print('Statistics Mann-Whitney U test fSC90.CFrunmean90-dfSC31.CFrunmean90 =%.3f, p=%.3f' % (stat, p))

In [None]:
'''
        ===    CF's anomaly SEVERITY_CODE 0 vs 2 + 3  ===
            
    Boxplot (general boxplot Cloud_Cover metrics by SEVERITY_CODE and hue=DHW_class)
'''
dfSC2and3 = data[data['SEVERITY_CODE3'] == '23']
dfSC2and3vsSC0 = dfSC2and3.append(dfSC0)
dfSC2and3vsSC0['SEVERITY_CODE6'] = np.where(dfSC2and3vsSC0['SEVERITY_CODE']>=2,"23","0")
#make grouped boxplot
#plt.figure(figsize=(18,10))
pal = sb.color_palette('Paired')
DHWboxplot = sb.violinplot(y='CF_a_runmean90', x='SEVERITY_CODE6', 
                 data=dfSC2and3vsSC0,
                 palette=pal,
                 hue='SEVERITY_CODE6')

stat, p = mannwhitneyu(dfSC2and3vsSC0[dfSC2and3vsSC0.SEVERITY_CODE6 == '23'].CFrunmean90,dfSC2and3vsSC0[dfSC2and3vsSC0.SEVERITY_CODE6 == '0'].CFrunmean90)
print('Statistics Mann-Whitney U test dfSC23.CF_a,dfSC0.CFrunmean =%.3f, p=%.3f' % (stat, p))

# 3. Is cloud cover anomaly a better variable for addressing the previous question?

In [None]:
'''
    Boxplot (general boxplot CLOUD COVER by SEVERITY per variable)
        ** in this step select the most convenient plot ** 
'''
#make grouped boxplot
#plt.figure(figsize=(18,10))
pal = sb.color_palette('PuBu')
CF_a_runmean90boxplot = sb.boxplot(y='CF_a_runmean90', x='SEVERITY_CODE', 
                 data=data, 
                 palette=pal,
                 hue='SEVERITY_CODE')
plt.show()

In [None]:
'''
       Shapiro-Wilks or normaltest
         ** test for normality ** 
'''
stats, p = shapiro(dfSC0.CF_a)
print(stats)
print(p)
print('')
stats, p = shapiro(dfSC1.CF_a)
print(stats)
print(p)
print('')
stats, p = shapiro(dfSC2.CF_a)
print(stats)
print(p)
print('')
stats, p = shapiro(dfSC3.CF_a)
print(stats)
print(p)
print('')

stats, p = shapiro(dfSC0.CF_a_runmean7)
print(stats)
print(p)
print('')
stats, p = shapiro(dfSC1.CF_a_runmean7)
print(stats)
print(p)
print('')
stats, p = shapiro(dfSC2.CF_a_runmean7)
print(stats)
print(p)
print('')
stats, p = shapiro(dfSC3.CF_a_runmean7)
print(stats)
print(p)
print('')

stats, p = shapiro(dfSC0.CF_a_runmean30)
print(stats)
print(p)
print('')
stats, p = shapiro(dfSC1.CF_a_runmean30)
print(stats)
print(p)
print('')
stats, p = shapiro(dfSC2.CF_a_runmean30)
print(stats)
print(p)
print('')
stats, p = shapiro(dfSC3.CF_a_runmean30)
print(stats)
print(p)
print('')

stats, p = shapiro(dfSC0.CF_a_runmean90)
print(stats)
print(p)
print('')
stats, p = shapiro(dfSC1.CF_a_runmean90)
print(stats)
print(p)
print('')
stats, p = shapiro(dfSC2.CF_a_runmean90)
print(stats)
print(p)
print('')
stats, p = shapiro(dfSC3.CF_a_runmean90)
print(stats)
print(p)
print('')

In [None]:
'''
            Levene Test
    ** test for equal variances ** 
'''
stats, p = levene(dfSC0.CF_a_runmean30,dfSC1.CF_a_runmean30,dfSC2.CF_a_runmean30,dfSC3.CF_a_runmean30)
print(stats)
print(p)

In [None]:
'''
                 Kruskal Wallis
    ** Compare if differences between groups **
'''
SC_CF_a_Kruskal = stats.kruskal(dfSC0.CF_a,dfSC1.CF_a,dfSC2.CF_a,dfSC3.CF_a, nan_policy='omit')
SC_CF_a_runmean7_Kruskal = stats.kruskal(dfSC0.CF_a_runmean7,dfSC1.CF_a_runmean7,dfSC2.CF_a_runmean7,dfSC3.CF_a_runmean7, nan_policy='omit')
SC_CF_a_runmean30_Kruskal = stats.kruskal(dfSC0.CFrunmean30,dfSC1.CFrunmean30,dfSC2.CFrunmean30,dfSC3.CFrunmean30, nan_policy='omit')
SC_CF_a_runmean90_Kruskal = stats.kruskal(dfSC0.CF_a_runmean90,dfSC1.CF_a_runmean90,dfSC2.CF_a_runmean90,dfSC3.CF_a_runmean90, nan_policy='omit')

print(SC_CF_a_Kruskal)
print(SC_CF_a_runmean7_Kruskal)
print(SC_CF_a_runmean30_Kruskal)
print(SC_CF_a_runmean90_Kruskal)

In [None]:
'''
         Dunn post_hoc test
    ** Which group is different **
'''
CF_a_dunn = sp.posthoc_dunn(data, 'CF_a', 'SEVERITY_CODE')
CF_a_runmean7_dunn = sp.posthoc_dunn(data, 'CF_a_runmean7', 'SEVERITY_CODE')
CF_a_runmean30_dunn = sp.posthoc_dunn(data, 'CF_a_runmean30', 'SEVERITY_CODE')
CF_a_runmean90_dunn = sp.posthoc_dunn(data, 'CF_a_runmean90', 'SEVERITY_CODE')
print('CF_a_dunn')
print(CF_a_dunn)
print('CF_a_runmean7_dunn')
print(CF_a_runmean7_dunn)
print('CF_a_runmean30_dunn')
print(CF_a_runmean30_dunn)
print('CF_a_runmean90_dunn')
print(CF_a_runmean90_dunn)

In [None]:
'''
    Boxplot (general boxplot CLOUD COVER ANOMALY by SEVERITY2 per variable)
            ** in this step select the most convenient plot ** 
'''
#make grouped boxplot
#plt.figure(figsize=(18,10))
pal = sb.color_palette('PuBu')
DHWboxplot = sb.boxplot(y='CF_a', x='SEVERITY_CODE2', 
                 data=data, 
                 palette=pal,
                 hue='SEVERITY_CODE2')
plt.show()
DHW_adj_dateboxplot = sb.boxplot(y='CF_a_runmean7', x='SEVERITY_CODE2', 
                 data=data, 
                 palette=pal,
                 hue='SEVERITY_CODE2')
plt.show()
DHWrunmean30boxplot = sb.boxplot(y='CF_a_runmean30', x='SEVERITY_CODE2', 
                 data=data, 
                 palette=pal,
                 hue='SEVERITY_CODE2')
plt.show()
DHWrunmean30_adj_dateboxplot = sb.boxplot(y='CF_a_runmean90', x='SEVERITY_CODE2', 
                 data=data, 
                 palette=pal,
                 hue='SEVERITY_CODE2',
                 showmeans=True)
plt.show()

In [None]:
'''
       Shapiro-Wilks or normaltest 
         ** test for normality ** 
'''
stats, p = shapiro(dfSC20.CF_a)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC20.CF_a_arunmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC20.CF_a_runmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC20.CF_a_runmean90)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC21.CF_a)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC21.CF_a_arunmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC21.CF_a_arunmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC21.CF_a_arunmean90)
print(stats)
print(p)
print("")

In [None]:
'''
            Levene Test 
    ** test for equal variances ** 
'''
stats, p = levene(dfSC20.CF_a,dfSC21.CF_a)
print(stats)
print(p)
print("")
stats, p = levene(dfSC20.CF_a_runmean7,dfSC21.CF_runmean7)
print(stats)
print(p)
print("")
stats, p = levene(dfSC20.CF_a_runmean30,dfSC21.CF_a_runmean30)
print(stats)
print(p)
print("")
stats, p = levene(dfSC20.CF_a_runmean90,dfSC21.CF_a_runmean90)
print(stats)
print(p)

In [None]:
'''
                Mann Whitney U-Test CF's SEVERITY_CODE2
     === SEVERITY CODE2 grouped as 0 and 1 where "0" = 0 and "1" = 123 ===
'''

stat, p = mannwhitneyu(dfSC20.CF_a,dfSC21.CF_a)
print('Statistics Mann-Whitney U test fSC20.CF_a-dfSC21.CF_a =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfSC20.CF_a_runmean7,dfSC21.CF_a_runmean7)
print('Statistics Mann-Whitney U test fSC20.CF_a_runmean7-dfSC21.CF_a_runmean7 =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfSC20.CF_a_runmean30,dfSC21.CF_a_runmean30)
print('Statistics Mann-Whitney U test fSC20.CF_a_runmean30-dfSC21.CF_a_runmean30 =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfSC20.CF_a_runmean90,dfSC21.CF_a_runmean90)
print('Statistics Mann-Whitney U test fSC20.CF_a_runmean90-dfSC21.CF_a_runmean90 =%.3f, p=%.3f' % (stat, p))

In [None]:
'''
    Boxplot (general boxplot CLOUD COVER ANOMALY by SEVERITY3 per variable)
            ** in this step select the most convenient plot ** 
'''
#make grouped boxplot
#plt.figure(figsize=(18,10))
pal = sb.color_palette('PuBu')
DHWboxplot = sb.boxplot(y='CF_a', x='SEVERITY_CODE3', 
                 data=data, 
                 palette=pal,
                 hue='SEVERITY_CODE3')
plt.show()
DHW_adj_dateboxplot = sb.boxplot(y='CF_a_runmean7', x='SEVERITY_CODE3', 
                 data=data, 
                 palette=pal,
                 hue='SEVERITY_CODE3')
plt.show()
DHWrunmean30boxplot = sb.boxplot(y='CF_a_runmean30', x='SEVERITY_CODE3', 
                 data=data, 
                 palette=pal,
                 hue='SEVERITY_CODE3')
plt.show()
DHWrunmean30_adj_dateboxplot = sb.boxplot(y='CF_a_runmean90', x='SEVERITY_CODE3', 
                 data=data, 
                 palette=pal,
                 hue='SEVERITY_CODE3',
                 showmeans=True)
plt.show()

In [None]:
'''
       Shapiro-Wilks or normaltest 
         ** test for normality ** 
'''
stats, p = shapiro(dfSC30.CF_a)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC30.CF_a_runmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC30.CF_a_runmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC30.CF_a_runmean90)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC31.CF_a)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC31.CF_a_runmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC31.CF_a_runmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfSC31.CF_a_runmean90)
print(stats)
print(p)
print("")

In [None]:
'''
            Levene Test 
    ** test for equal variances ** 
'''
stats, p = levene(dfSC30.CF_a,dfSC31.CF_a)
print(stats)
print(p)
print("")
stats, p = levene(dfSC30.CF_a_runmean7,dfSC31.CF_runmean7)
print(stats)
print(p)
print("")
stats, p = levene(dfSC30.CF_a_runmean30,dfSC31.CF_a_runmean30)
print(stats)
print(p)
print("")
stats, p = levene(dfSC30.CF_a_runmean90,dfSC31.CF_a_runmean90)
print(stats)
print(p)

In [None]:
'''
                Mann Whitney U-Test CF's SEVERITY_CODE3
     === SEVERITY CODE2 grouped as 0 and 1 where "0" = 01 and "1" = 23 ===
'''

stat, p = mannwhitneyu(dfSC30.CF_a,dfSC31.CF_a)
print('Statistics Mann-Whitney U test dfSC30.CF_a-dfSC31.CF_a =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfSC30.CF_a_runmean7,dfSC31.CF_a_runmean7)
print('Statistics Mann-Whitney U test dfSC30.CF_a_runmean7-dfSC31.CF_a_runmean7 =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfSC30.CF_a_runmean30,dfSC31.CF_a_runmean30)
print('Statistics Mann-Whitney U test dfSC30.CF_a_runmean3-dfSC31.CF_a_runmean30 =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfSC30.CF_a_runmean90,dfSC31.CF_a_runmean90)
print('Statistics Mann-Whitney U test dfSC20.CF_a_runmean7-dfSC21.CF_a_runmean90 =%.3f, p=%.3f' % (stat, p))

In [None]:
'''
        ===    CF's anomaly SEVERITY_CODE 0 vs 2 + 3  ===
            
    Boxplot (general boxplot Cloud_Cover metrics by SEVERITY_CODE and hue=DHW_class)
'''
dfSC2and3 = data[data['SEVERITY_CODE3'] == '23']
dfSC2and3vsSC0 = dfSC2and3.append(dfSC0)
#dfSC2and3vsSC0 = dfSC2and3vsSC0[(dfSC2and3vsSC0['DHW_class']=='DHW>8')]
dfSC2and3vsSC0['SEVERITY_CODE6'] = np.where(dfSC2and3vsSC0['SEVERITY_CODE']>=2,"23","0")
#make grouped boxplot
#plt.figure(figsize=(18,10))
pal = sb.color_palette('Paired')
DHWboxplot = sb.boxplot(y='CF_a_runmean90', x='SEVERITY_CODE6', 
                 data=dfSC2and3vsSC0,
                 palette=pal,
                 hue='SEVERITY_CODE6')

stat, p = mannwhitneyu(dfSC2and3vsSC0[dfSC2and3vsSC0.SEVERITY_CODE6 == '23'].CF_a_runmean90,dfSC2and3vsSC0[dfSC2and3vsSC0.SEVERITY_CODE6 == '0'].CF_a_runmean90)
print('Statistics Mann-Whitney U test dfSC23.CF_a,dfSC0.CF_a_runmean =%.3f, p=%.3f' % (stat, p))

# 4. Are CRW bleaching categories correlated with reported bleaching severity?
## Explore distribution grouped by thermal stress value (DHW)

In [None]:
'''
      Data filter (group by DHW value)
    Create four df depending on DHW_clas
'''
dfDHW0 = data[data['DHW'] == 0]
dfDHW1_4 = data[(data['DHW'] < 4) & (data['DHW'] > 0 )]
dfDHW4_8 = data[(data['DHW'] < 8) & (data['DHW'] > 4 )]
dfDHW8 = data[data['DHW'] >= 8]

In [None]:
#dfDHW4_8.DHW_class.value_counts()

## How does DHW vary with DHW_class?

In [None]:
'''
    Use a pivot table to show how DHW vary with DHW_class
'''
SST_cloud_pivot = pd.pivot_table(data, values=['DHW'], index=['SEVERITY_CODE'], columns=['DHW_class'],aggfunc=len)
print('Values: DHW, Index: SEVERITY_CODE, Columns: DHW_Class:')
print(SST_cloud_pivot)
#SST_cloud_pivot.to_csv('pivot.csv')
print('''


''')
SST_cloud_pivot2 = pd.pivot_table(data, values=['DHW'], index=['SEVERITY_CODE', 'DHW_class'],aggfunc=len)
print('Values: DHW, Index: SEVERITY_CODE and DHW_Class:')
print(SST_cloud_pivot2)
#SST_cloud_pivot.to_csv('pivot.csv')

In [None]:
'''
    Stacked-Boxplot of DHW by SEVERITY and DHW_class 
'''
plt.figure(figsize=(18,10))
pal = sb.color_palette('PuBu') # color from seaborn palette
Stacked_BP_DHW_SC_DHW_class = SST_cloud_pivot.plot(kind='bar', stacked=True, color=pal)
print(Stacked_BP_DHW_SC_DHW_class)

In [None]:
from pivottablejs import pivot_ui
#pivot_ui(data)

# 5. Does cloudiness reduces the likelihood of bleaching among reefs exposed to heat stress?

In [None]:
'''
          Data filter (group by DHW value)
    Create four df depending on DHW and SEVERITY CODE
'''
dfDHW0_SC0 = data[(data['DHW'] == 0) & (data['SEVERITY_CODE'] == 0)]
dfDHW0_SC1 = data[(data['DHW'] == 0) & (data['SEVERITY_CODE'] == 1)]
dfDHW0_SC2 = data[(data['DHW'] == 0) & (data['SEVERITY_CODE'] == 2)]
dfDHW0_SC3 = data[(data['DHW'] == 0) & (data['SEVERITY_CODE'] == 3)]

dfDHW1_4_SC0 = data[(data['DHW'] > 0) & (data['DHW'] <= 4) & (data['SEVERITY_CODE'] == 0)]
dfDHW1_4_SC1 = data[(data['DHW'] > 0) & (data['DHW'] <= 4) & (data['SEVERITY_CODE'] == 1)]
dfDHW1_4_SC2 = data[(data['DHW'] > 0) & (data['DHW'] <= 4) & (data['SEVERITY_CODE'] == 2)]
dfDHW1_4_SC3 = data[(data['DHW'] > 0) & (data['DHW'] <= 4) & (data['SEVERITY_CODE'] == 3)]

dfDHW4_8_SC0 = data[(data['DHW'] > 4) & (data['DHW'] <= 8) & (data['SEVERITY_CODE'] == 0)]
dfDHW4_8_SC1 = data[(data['DHW'] > 4) & (data['DHW'] <= 8) & (data['SEVERITY_CODE'] == 1)]
dfDHW4_8_SC2 = data[(data['DHW'] > 4) & (data['DHW'] <= 8) & (data['SEVERITY_CODE'] == 2)]
dfDHW4_8_SC3 = data[(data['DHW'] > 4) & (data['DHW'] <= 8) & (data['SEVERITY_CODE'] == 3)]

dfDHW8_SC0 = data[(data['DHW'] > 8) & (data['SEVERITY_CODE'] == 0)]
dfDHW8_SC1 = data[(data['DHW'] > 8) & (data['SEVERITY_CODE'] == 1)]
dfDHW8_SC2 = data[(data['DHW'] > 8) & (data['SEVERITY_CODE'] == 2)]
dfDHW8_SC3 = data[(data['DHW'] > 8) & (data['SEVERITY_CODE'] == 3)]

In [None]:
'''
          Data filter (group by DHW value)
    Create four df depending on DHW = 0_4 and SEVERITY CODE joins
'''
dfDHW0_4_SC01 = data[(data['DHW'] > 0) & (data['DHW'] <= 4) & (data['SEVERITY_CODE'] <= 1)]
dfDHW0_4_SC012 = data[(data['DHW'] > 0) & (data['DHW'] <= 4) & (data['SEVERITY_CODE'] <= 2)]
dfDHW0_4_SC23 = data[(data['DHW'] > 0) & (data['DHW'] <= 4) & (data['SEVERITY_CODE'] >= 2)]
dfDHW0_4 = data[(data['DHW'] > 0) & (data['DHW'] <= 4)]
'''
          Data filter (group by DHW value)
    Create four df depending on DHW = 4_8 and SEVERITY CODE joins
'''
dfDHW4_8_SC01 = data[(data['DHW'] > 4) & (data['DHW'] <= 8) & (data['SEVERITY_CODE'] <= 1)]
dfDHW4_8_SC012 = data[(data['DHW'] > 4) & (data['DHW'] <= 8) & (data['SEVERITY_CODE'] <= 2)]
dfDHW4_8_SC23 = data[(data['DHW'] > 4) & (data['DHW'] <= 8) & (data['SEVERITY_CODE'] >= 2)]
dfDHW4_8 = data[(data['DHW'] > 4) & (data['DHW'] <= 8)]
'''
          Data filter (group by DHW value)
    Create four df depending on DHW = 8 and SEVERITY CODE joins
'''
dfDHW8_SC01 = data[(data['DHW'] > 8) & (data['SEVERITY_CODE'] <= 1)]
dfDHW8_SC012 = data[(data['DHW'] > 8) & (data['SEVERITY_CODE'] <= 2)]
dfDHW8_SC23 = data[(data['DHW'] > 8) & (data['SEVERITY_CODE'] >= 2)]
dfDHW8 = data[(data['DHW'] > 8)]

In [None]:
print(dfDHW0_SC0.DHW_class.value_counts())
print(dfDHW0_SC1.DHW_class.value_counts())
print(dfDHW0_SC2.DHW_class.value_counts())
print(dfDHW0_SC3.DHW_class.value_counts())

In [None]:
'''
    Boxplot (general boxplot Cloud_Cover metrics by SEVERITY_CODE and hue=DHW_class)
'''
import seaborn as sb
import matplotlib.pyplot as plt
#make grouped boxplot
#plt.figure(figsize=(18,10))
pal = sb.color_palette('PuBu')
DHWboxplot = sb.boxplot(y='CF', x='DHW_class', 
                 data=data, 
                 palette=pal,
                 hue='SEVERITY_CODE')
plt.show()
DHWboxplot = sb.boxplot(y='CFrunmean7', x='DHW_class', 
                 data=data, 
                 palette=pal,
                 hue='SEVERITY_CODE')
plt.show()
DHWboxplot = sb.boxplot(y='CFrunmean30', x='DHW_class', 
                 data=data, 
                 palette=pal,
                 hue='SEVERITY_CODE')
plt.show()
DHWboxplot = sb.boxplot(y='CFrunmean90', x='DHW_class', 
                 data=data, 
                 palette=pal,
                 hue='SEVERITY_CODE')
plt.show()

In [None]:
'''
                     Shapiro-Wilks or normaltest 
                      ** test for normality ** 
         each set represent a DHW_class + SEVERITY_CODE + CF variable
'''

##### CF #####
stats, p = shapiro(dfDHW0_SC0.CF)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW0_SC1.CF)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW0_SC2.CF)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW0_SC3.CF)
print(stats)
print(p)
print("")

stats, p = shapiro(dfDHW1_4_SC0.CF)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW1_4_SC1.CF)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW1_4_SC2.CF)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW1_4_SC3.CF)
print(stats)
print(p)
print("")

stats, p = shapiro(dfDHW4_8_SC0.CF)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW4_8_SC1.CF)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW4_8_SC2.CF)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW4_8_SC3.CF)
print(stats)
print(p)
print("")

stats, p = shapiro(dfDHW8_SC0.CF)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW8_SC1.CF)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW8_SC2.CF)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW8_SC3.CF)
print(stats)
print(p)
print("")

###### CFrunmean7 #####

stats, p = shapiro(dfDHW0_SC0.CFrunmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW0_SC1.CFrunmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW0_SC2.CFrunmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW0_SC3.CFrunmean7)
print(stats)
print(p)
print("")

stats, p = shapiro(dfDHW1_4_SC0.CFrunmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW1_4_SC1.CFrunmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW1_4_SC2.CFrunmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW1_4_SC3.CFrunmean7)
print(stats)
print(p)
print("")

stats, p = shapiro(dfDHW4_8_SC0.CFrunmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW4_8_SC1.CFrunmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW4_8_SC2.CFrunmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW4_8_SC3.CFrunmean7)
print(stats)
print(p)
print("")

stats, p = shapiro(dfDHW8_SC0.CFrunmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW8_SC1.CFrunmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW8_SC2.CFrunmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW8_SC3.CFrunmean7)
print(stats)
print(p)
print("")

##### CFrunmean30 #####

stats, p = shapiro(dfDHW0_SC0.CFrunmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW0_SC1.CFrunmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW0_SC2.CFrunmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW0_SC3.CFrunmean30)
print(stats)
print(p)
print("")

stats, p = shapiro(dfDHW1_4_SC0.CFrunmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW1_4_SC1.CFrunmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW1_4_SC2.CFrunmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW1_4_SC3.CFrunmean30)
print(stats)
print(p)
print("")

stats, p = shapiro(dfDHW4_8_SC0.CFrunmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW4_8_SC1.CFrunmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW4_8_SC2.CFrunmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW4_8_SC3.CFrunmean30)
print(stats)
print(p)
print("")

stats, p = shapiro(dfDHW8_SC0.CFrunmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW8_SC1.CFrunmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW8_SC2.CFrunmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW8_SC3.CFrunmean30)
print(stats)
print(p)
print("")

##### runmean90 #####

stats, p = shapiro(dfDHW0_SC0.CFrunmean90)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW0_SC1.CFrunmean90)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW0_SC2.CFrunmean90)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW0_SC3.CFrunmean90)
print(stats)
print(p)
print("")

stats, p = shapiro(dfDHW1_4_SC0.CFrunmean90)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW1_4_SC1.CFrunmean90)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW1_4_SC2.CFrunmean90)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW1_4_SC3.CFrunmean90)
print(stats)
print(p)
print("")

stats, p = shapiro(dfDHW4_8_SC0.CFrunmean90)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW4_8_SC1.CFrunmean90)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW4_8_SC2.CFrunmean90)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW4_8_SC3.CFrunmean90)
print(stats)
print(p)
print("")

stats, p = shapiro(dfDHW8_SC0.CFrunmean90)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW8_SC1.CFrunmean90)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW8_SC2.CFrunmean90)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW8_SC3.CFrunmean90)
print(stats)
print(p)
print("")

In [None]:
'''
                Levene Test 
       ** test for equal variances ** 
    each test is depending severity code
'''
#### CF ####
stats, p = levene(dfDHW0_SC0.CF,dfDHW1_4_SC0.CF,dfDHW4_8_SC0.CF, dfDHW8_SC0.CF)
print(stats)
print(p)
print("")

stats, p = levene(dfDHW0_SC1.CF,dfDHW1_4_SC1.CF,dfDHW4_8_SC1.CF, dfDHW8_SC1.CF)
print(stats)
print(p)
print("")

stats, p = levene(dfDHW0_SC2.CF,dfDHW1_4_SC2.CF,dfDHW4_8_SC2.CF, dfDHW8_SC2.CF)
print(stats)
print(p)
print("")

stats, p = levene(dfDHW0_SC3.CF,dfDHW1_4_SC3.CF,dfDHW4_8_SC3.CF, dfDHW8_SC3.CF)
print(stats)
print(p)
print("")

#### CFrunmean7 ####
stats, p = levene(dfDHW0_SC0.CFrunmean7,dfDHW1_4_SC0.CFrunmean7,dfDHW4_8_SC0.CFrunmean7, dfDHW8_SC0.CFrunmean7)
print(stats)
print(p)
print("")

stats, p = levene(dfDHW0_SC1.CFrunmean7,dfDHW1_4_SC1.CFrunmean7,dfDHW4_8_SC1.CFrunmean7, dfDHW8_SC1.CFrunmean7)
print(stats)
print(p)
print("")

stats, p = levene(dfDHW0_SC2.CFrunmean7,dfDHW1_4_SC2.CFrunmean7,dfDHW4_8_SC2.CFrunmean7, dfDHW8_SC2.CFrunmean7)
print(stats)
print(p)
print("")

stats, p = levene(dfDHW0_SC3.CFrunmean7,dfDHW1_4_SC3.CFrunmean7,dfDHW4_8_SC3.CFrunmean7, dfDHW8_SC3.CFrunmean7)
print(stats)
print(p)
print("")

#### CFrunmean30 ####
stats, p = levene(dfDHW0_SC0.CFrunmean30,dfDHW1_4_SC0.CFrunmean30,dfDHW4_8_SC0.CFrunmean30, dfDHW8_SC0.CFrunmean30)
print(stats)
print(p)
print("")

stats, p = levene(dfDHW0_SC1.CFrunmean30,dfDHW1_4_SC1.CFrunmean30,dfDHW4_8_SC1.CFrunmean30, dfDHW8_SC1.CFrunmean30)
print(stats)
print(p)
print("")

stats, p = levene(dfDHW0_SC2.CFrunmean30,dfDHW1_4_SC2.CFrunmean30,dfDHW4_8_SC2.CFrunmean30, dfDHW8_SC2.CFrunmean30)
print(stats)
print(p)
print("")

stats, p = levene(dfDHW0_SC3.CFrunmean30,dfDHW1_4_SC3.CFrunmean30,dfDHW4_8_SC3.CFrunmean30, dfDHW8_SC3.CFrunmean30)
print(stats)
print(p)
print("")

#### CFrunmean90 ####
stats, p = levene(dfDHW0_SC0.CFrunmean90,dfDHW1_4_SC0.CFrunmean90,dfDHW4_8_SC0.CFrunmean90, dfDHW8_SC0.CFrunmean90)
print(stats)
print(p)
print("")

stats, p = levene(dfDHW0_SC1.CFrunmean90,dfDHW1_4_SC1.CFrunmean90,dfDHW4_8_SC1.CFrunmean90, dfDHW8_SC1.CFrunmean90)
print(stats)
print(p)
print("")

stats, p = levene(dfDHW0_SC2.CFrunmean90,dfDHW1_4_SC2.CFrunmean90,dfDHW4_8_SC2.CFrunmean90, dfDHW8_SC2.CFrunmean90)
print(stats)
print(p)
print("")

stats, p = levene(dfDHW0_SC3.CFrunmean90,dfDHW1_4_SC3.CFrunmean90,dfDHW4_8_SC3.CFrunmean90, dfDHW8_SC3.CFrunmean90)
print(stats)
print(p)
print("")

In [None]:
'''
                            Kruskal Wallis
    ** Compare if differences between groups of same SEVERITY_CODE **
'''
DHW_class_DHW0_CF_a_Kruskal = stats.kruskal(dfDHW0_SC0.CFrunmean30,dfDHW0_SC1.CFrunmean30,dfDHW0_SC2.CFrunmean30,dfDHW0_SC3.CFrunmean30, nan_policy='omit')

DHW_class_DHW1_4_CF_a_Kruskal = stats.kruskal(dfDHW1_4_SC0.CFrunmean30,dfDHW1_4_SC1.CFrunmean30,dfDHW1_4_SC2.CFrunmean30,dfDHW1_4_SC3.CFrunmean30, nan_policy='omit')

DHW_class_DHW4_8_CF_a_Kruskal = stats.kruskal(dfDHW4_8_SC0.CFrunmean30,dfDHW4_8_SC1.CFrunmean30,dfDHW4_8_SC2.CFrunmean30,dfDHW4_8_SC3.CFrunmean30, nan_policy='omit')

DHW_class_DHW8_CF_a_Kruskal = stats.kruskal(dfDHW8_SC0.CFrunmean30,dfDHW8_SC1.CFrunmean30,dfDHW8_SC2.CFrunmean30,dfDHW8_SC3.CFrunmean30, nan_policy='omit')

print(DHW_class_DHW0_CF_a_Kruskal)
print(DHW_class_DHW1_4_CF_a_Kruskal)
print(DHW_class_DHW4_8_CF_a_Kruskal)
print(DHW_class_DHW8_CF_a_Kruskal)

In [None]:
'''
                    Dunn post_hoc test
    ** Which group is different according to the KW test **
'''
#### CF ####
DHW_class_SC0_CF_dunn = sp.posthoc_dunn(dfSC0, 'CF', 'DHW_class')
DHW_class_SC1_CF_dunn = sp.posthoc_dunn(dfSC1, 'CF', 'DHW_class')
DHW_class_SC2_CF_dunn = sp.posthoc_dunn(dfSC2, 'CF', 'DHW_class')
DHW_class_SC3_CF_dunn = sp.posthoc_dunn(dfSC3, 'CF', 'DHW_class')

#### CFrunmean7 ####
DHW_class_SC0_CFrunmean7_dunn = sp.posthoc_dunn(dfSC0, 'CFrunmean7', 'DHW_class')
DHW_class_SC1_CFrunmean7_dunn = sp.posthoc_dunn(dfSC1, 'CFrunmean7', 'DHW_class')
DHW_class_SC2_CFrunmean7_dunn = sp.posthoc_dunn(dfSC2, 'CFrunmean7', 'DHW_class')
DHW_class_SC3_CFrunmean7_dunn = sp.posthoc_dunn(dfSC3, 'CFrunmean7', 'DHW_class')

#### CFrunmean30 ####
DHW_class_SC0_CFrunmean30_dunn = sp.posthoc_dunn(dfSC0, 'CFrunmean30', 'DHW_class')
DHW_class_SC1_CFrunmean30_dunn = sp.posthoc_dunn(dfSC1, 'CFrunmean30', 'DHW_class')
DHW_class_SC2_CFrunmean30_dunn = sp.posthoc_dunn(dfSC2, 'CFrunmean30', 'DHW_class')
DHW_class_SC3_CFrunmean30_dunn = sp.posthoc_dunn(dfSC3, 'CFrunmean30', 'DHW_class')

#### CFrunmean90 ####
DHW_class_SC0_CFrunmean90_dunn = sp.posthoc_dunn(dfSC0, 'CFrunmean90', 'DHW_class')
DHW_class_SC1_CFrunmean90_dunn = sp.posthoc_dunn(dfSC1, 'CFrunmean90', 'DHW_class')
DHW_class_SC2_CFrunmean90_dunn = sp.posthoc_dunn(dfSC2, 'CFrunmean90', 'DHW_class')
DHW_class_SC3_CFrunmean90_dunn = sp.posthoc_dunn(dfSC3, 'CFrunmean90', 'DHW_class')


print('DHW_class_SC0_CF_dunn')
print(DHW_class_SC0_CF_dunn)
print('DHW_class_SC1_CF_dunn')
print(DHW_class_SC1_CF_dunn)
print('DHW_class_SC2_CF_dunn')
print(DHW_class_SC2_CF_dunn)
print('DHW_class_SC3_CF_dunn')
print(DHW_class_SC3_CF_dunn)
print('''
''')
print('DHW_class_SC0_CFrunmean7_dunn')
print(DHW_class_SC0_CFrunmean7_dunn)
print('DHW_class_SC1_CFrunmean7_dunn')
print(DHW_class_SC1_CFrunmean7_dunn)
print('DHW_class_SC2_CFrunmean7_dunn')
print(DHW_class_SC2_CFrunmean7_dunn)
print('DHW_class_SC3_CFrunmean7_dunn')
print(DHW_class_SC3_CFrunmean7_dunn)
print('''
''')
print('DHW_class_SC0_CFrunmean30_dunn')
print(DHW_class_SC0_CFrunmean30_dunn)
print('DHW_class_SC1_CFrunmean30_dunn')
print(DHW_class_SC1_CFrunmean30_dunn)
print('DHW_class_SC2_CFrunmean30_dunn')
print(DHW_class_SC2_CFrunmean30_dunn)
print('DHW_class_SC3_CFrunmean30_dunn')
print(DHW_class_SC3_CFrunmean30_dunn)
print('''
''')
print('DHW_class_SC0_CFrunmean90_dunn')
print(DHW_class_SC0_CFrunmean90_dunn)
print('DHW_class_SC1_CFrunmean90_dunn')
print(DHW_class_SC1_CFrunmean90_dunn)
print('DHW_class_SC2_CFrunmean90_dunn')
print(DHW_class_SC2_CFrunmean90_dunn)
print('DHW_class_SC3_CFrunmean90_dunn')
print(DHW_class_SC3_CFrunmean90_dunn)
print('''
''')


In [None]:
'''
            ===    CF's SEVERITY_CODE 0 vs 3 and DHW_class = DHW8 ===

    Boxplot (general boxplot Cloud_Cover metrics by SEVERITY_CODE and hue=DHW_class)
'''
plot_DHW8_SC0vs3=dfDHW8_SC0.append(dfDHW8_SC3)
#make grouped boxplot
#plt.figure(figsize=(18,10))
pal = sb.color_palette('PuBu')
DHWboxplot = sb.boxplot(y='CF', x='SEVERITY_CODE', 
                 data=plot_DHW8_SC0vs3, 
                 palette=pal,
                 hue='DHW_class')
plt.show()
DHWboxplot = sb.boxplot(y='CFrunmean7', x='SEVERITY_CODE', 
                 data=plot_DHW8_SC0vs3, 
                 palette=pal,
                 hue='DHW_class')
plt.show()
DHWboxplot = sb.boxplot(y='CFrunmean30', x='SEVERITY_CODE', 
                 data=plot_DHW8_SC0vs3, 
                 palette=pal,
                 hue='DHW_class')
plt.show()
DHWboxplot = sb.boxplot(y='CFrunmean90', x='SEVERITY_CODE', 
                 data=plot_DHW8_SC0vs3, 
                 palette=pal,
                 hue='DHW_class')
plt.show()


'''
                        Mann Whitney U-Test     
'''
stat, p = mannwhitneyu(dfDHW8_SC0.CF,dfDHW8_SC3.CF)
print('Statistics Mann-Whitney U test dfDHW8_SC0.CF,dfDHW8_SC3.CF =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfDHW8_SC0.CFrunmean7,dfDHW8_SC3.CFrunmean7)
print('Statistics Mann-Whitney U test dfDHW8_SC0.CFrunmean7,dfDHW8_SC3.CFrunmean7 =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfDHW8_SC0.CFrunmean30,dfDHW8_SC3.CFrunmean30)
print('Statistics Mann-Whitney U test dfDHW8_SC0.CFrunmean30,dfDHW8_SC3.CFrunmean30 =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfDHW8_SC0.CFrunmean90,dfDHW8_SC3.CFrunmean90)
print('Statistics Mann-Whitney U test dfDHW8_SC0.CFrunmean90,dfDHW8_SC3.CFrunmean90 =%.3f, p=%.3f' % (stat, p))



In [None]:
'''
            ===    CF's SEVERITY_CODE 0 + 1 vs 3 and DHW_class = DHW8 ===
            
    Boxplot (general boxplot Cloud_Cover metrics by SEVERITY_CODE and hue=DHW_class)
'''
dfSC0and1 = data[data['SEVERITY_CODE3'] == '01']
dfSC0and1vsSC3 = dfSC0and1.append(dfSC3)
dfSC0and1vsSC3 = dfSC0and1vsSC3[(dfSC0and1vsSC3['DHW_class']=='DHW>8')]
dfSC0and1vsSC3['SEVERITY_CODE4'] = np.where(dfSC0and1vsSC3['SEVERITY_CODE']<=1,"01","3")
#make grouped boxplot
#plt.figure(figsize=(18,10))
pal = sb.color_palette('PuBu')
DHWboxplot = sb.boxplot(y='CF', x='SEVERITY_CODE4', 
                 data=dfSC0and1vsSC3,
                 palette=pal,
                 hue='DHW_class')
plt.show()
DHWboxplot = sb.boxplot(y='CFrunmean7', x='SEVERITY_CODE4', 
                 data=dfSC0and1vsSC3,
                 palette=pal,
                 hue='DHW_class')
plt.show()
DHWboxplot = sb.boxplot(y='CFrunmean30', x='SEVERITY_CODE4', 
                 data=dfSC0and1vsSC3,
                 palette=pal,
                 hue='DHW_class')
plt.show()
DHWboxplot = sb.boxplot(y='CFrunmean90', x='SEVERITY_CODE4', 
                 data=dfSC0and1vsSC3,
                 palette=pal,
                 hue='DHW_class')
plt.show()

'''
                        Mann Whitney U-Test   
               CF's SEVERITY_CODE 0 + 1 vs 3 and DHW_class = DHW8           
'''
stat, p = mannwhitneyu(dfDHW8_SC01.CF,dfDHW8_SC3.CF)
print('Statistics Mann-Whitney U test dfDHW8_SC01.CF,dfDHW8_SC3.CF =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfDHW8_SC01.CFrunmean7,dfDHW8_SC3.CFrunmean7)
print('Statistics Mann-Whitney U test dfDHW8_SC01.CFrunmean7,dfDHW8_SC3.CFrunmean7 =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfDHW8_SC01.CFrunmean30,dfDHW8_SC3.CFrunmean30)
print('Statistics Mann-Whitney U test dfDHW8_SC01.CFrunmean30,dfDHW8_SC3.CFrunmean30 =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfDHW8_SC01.CFrunmean90,dfDHW8_SC3.CFrunmean90)
print('Statistics Mann-Whitney U test dfDHW8_SC01.CFrunmean90,dfDHW8_SC3.CFrunmean90 =%.3f, p=%.3f' % (stat, p))

In [None]:
'''
            ===    CF's SEVERITY_CODE 0 vs 1 + 2 + 3 and DHW_class = DHW8 ===
    Boxplot (general boxplot Cloud_Cover metrics by SEVERITY_CODE and hue=DHW_class)
'''
plotDHW8_SC123vs3 = data[data['DHW_class']== 'DHW>8']
#make grouped boxplot
# plt.figure(figsize=(18,10))
pal = sb.color_palette('PuBu')
DHWboxplot = sb.boxplot(y='CF', x='SEVERITY_CODE5', 
                 data=plotDHW8_SC123vs3, 
                 palette=pal,
                 hue='DHW_class')
plt.show()
DHWboxplot = sb.boxplot(y='CFrunmean7', x='SEVERITY_CODE5', 
                 data=plotDHW8_SC123vs3, 
                 palette=pal,
                 hue='DHW_class')
plt.show()
DHWboxplot = sb.boxplot(y='CFrunmean30', x='SEVERITY_CODE5', 
                 data=plotDHW8_SC123vs3, 
                 palette=pal,
                 hue='DHW_class')
plt.show()
DHWboxplot = sb.boxplot(y='CFrunmean90', x='SEVERITY_CODE5', 
                 data=plotDHW8_SC123vs3, 
                 palette=pal,
                 hue='DHW_class')
plt.show()

'''
                        Mann Whitney U-Test 
            CF's SEVERITY_CODE 0+1+2 vs 3 and DHW_class = DHW8    
'''
stat, p = mannwhitneyu(dfDHW8_SC012.CF,dfDHW8_SC3.CF)
print('Statistics Mann-Whitney U test dfDHW8_SC012.CF,dfDHW8_SC3.CF =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfDHW8_SC012.CFrunmean7,dfDHW8_SC3.CFrunmean7)
print('Statistics Mann-Whitney U test dfDHW8_SC012.CFrunmean7,dfDHW8_SC3.CFrunmean7 =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfDHW8_SC012.CFrunmean30,dfDHW8_SC3.CFrunmean30)
print('Statistics Mann-Whitney U test dfDHW8_SC012.CFrunmean30,dfDHW8_SC3.CFrunmean30 =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfDHW8_SC012.CFrunmean90,dfDHW8_SC3.CFrunmean90)
print('Statistics Mann-Whitney U test dfDHW8_SC012.CFrunmean90,dfDHW8_SC3.CFrunmean90 =%.3f, p=%.3f' % (stat, p))

# 6. Is cloud cover anomaly a better variable for addressing the previous question? 

In [None]:
'''
    Boxplot (general boxplot Cloud_Cover metrics by SEVERITY_CODE and hue=DHW_class)
'''
import seaborn as sb
import matplotlib.pyplot as plt
#make grouped boxplot
#plt.figure(figsize=(18,10))
pal = sb.color_palette('PuBu')
DHWboxplot = sb.boxplot(y='CF_a', x='DHW_class', 
                 data=data, 
                 palette=pal,
                 hue='SEVERITY_CODE')
plt.show()
DHWboxplot = sb.boxplot(y='CF_a_runmean7', x='DHW_class', 
                 data=data, 
                 palette=pal,
                 hue='SEVERITY_CODE')
plt.show()
DHWboxplot = sb.boxplot(y='CF_a_runmean30', x='DHW_class', 
                 data=data, 
                 palette=pal,
                 hue='SEVERITY_CODE')
plt.show()
DHWboxplot = sb.boxplot(y='CF_a_runmean90', x='DHW_class', 
                 data=data, 
                 palette=pal,
                 hue='SEVERITY_CODE')
plt.show()

In [None]:
'''
                     Shapiro-Wilks or normaltest 
                      ** test for normality ** 
         each set represent a DHW_class + SEVERITY_CODE + CF_a variable
'''

##### CF #####
stats, p = shapiro(dfDHW0_SC0.CF_a)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW0_SC1.CF_a)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW0_SC2.CF_a)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW0_SC3.CF_a)
print(stats)
print(p)
print("")

stats, p = shapiro(dfDHW1_4_SC0.CF_a)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW1_4_SC1.CF_a)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW1_4_SC2.CF_a)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW1_4_SC3.CF_a)
print(stats)
print(p)
print("")

stats, p = shapiro(dfDHW4_8_SC0.CF_a)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW4_8_SC1.CF_a)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW4_8_SC2.CF_a)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW4_8_SC3.CF_a)
print(stats)
print(p)
print("")

stats, p = shapiro(dfDHW8_SC0.CF_a)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW8_SC1.CF_a)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW8_SC2.CF_a)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW8_SC3.CF_a)
print(stats)
print(p)
print("")

###### CFrunmean7 #####

stats, p = shapiro(dfDHW0_SC0.CF_a_runmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW0_SC1.CF_a_runmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW0_SC2.CF_a_runmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW0_SC3.CF_a_runmean7)
print(stats)
print(p)
print("")

stats, p = shapiro(dfDHW1_4_SC0.CF_a_runmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW1_4_SC1.CF_a_runmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW1_4_SC2.CF_a_runmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW1_4_SC3.CF_a_runmean7)
print(stats)
print(p)
print("")

stats, p = shapiro(dfDHW4_8_SC0.CF_a_runmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW4_8_SC1.CF_a_runmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW4_8_SC2.CF_a_runmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW4_8_SC3.CF_a_runmean7)
print(stats)
print(p)
print("")

stats, p = shapiro(dfDHW8_SC0.CF_a_runmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW8_SC1.CF_a_runmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW8_SC2.CF_a_runmean7)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW8_SC3.CF_a_runmean7)
print(stats)
print(p)
print("")

##### CFrunmean30 #####

stats, p = shapiro(dfDHW0_SC0.CF_a_runmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW0_SC1.CF_a_runmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW0_SC2.CF_a_runmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW0_SC3.CF_a_runmean30)
print(stats)
print(p)
print("")

stats, p = shapiro(dfDHW1_4_SC0.CF_a_runmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW1_4_SC1.CF_a_runmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW1_4_SC2.CF_a_runmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW1_4_SC3.CF_a_runmean30)
print(stats)
print(p)
print("")

stats, p = shapiro(dfDHW4_8_SC0.CF_a_runmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW4_8_SC1.CF_a_runmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW4_8_SC2.CF_a_runmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW4_8_SC3.CF_a_runmean30)
print(stats)
print(p)
print("")

stats, p = shapiro(dfDHW8_SC0.CF_a_runmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW8_SC1.CF_a_runmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW8_SC2.CF_a_runmean30)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW8_SC3.CF_a_runmean30)
print(stats)
print(p)
print("")

##### runmean90 #####

stats, p = shapiro(dfDHW0_SC0.CF_a_runmean90)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW0_SC1.CF_a_runmean90)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW0_SC2.CF_a_runmean90)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW0_SC3.CF_a_runmean90)
print(stats)
print(p)
print("")

stats, p = shapiro(dfDHW1_4_SC0.CF_a_runmean90)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW1_4_SC1.CF_a_runmean90)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW1_4_SC2.CF_a_runmean90)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW1_4_SC3.CF_a_runmean90)
print(stats)
print(p)
print("")

stats, p = shapiro(dfDHW4_8_SC0.CF_a_runmean90)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW4_8_SC1.CF_a_runmean90)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW4_8_SC2.CF_a_runmean90)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW4_8_SC3.CF_a_runmean90)
print(stats)
print(p)
print("")

stats, p = shapiro(dfDHW8_SC0.CF_a_runmean90)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW8_SC1.CF_a_runmean90)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW8_SC2.CF_a_runmean90)
print(stats)
print(p)
print("")
stats, p = shapiro(dfDHW8_SC3.CF_a_runmean90)
print(stats)
print(p)
print("")

In [None]:
'''
                Levene Test 
       ** test for equal variances ** 
    each test is depending severity code
'''
#### CF ####
stats, p = levene(dfDHW0_SC0.CF_a,dfDHW1_4_SC0.CF_a,dfDHW4_8_SC0.CF_a, dfDHW8_SC0.CF_a)
print(stats)
print(p)
print("")

stats, p = levene(dfDHW0_SC1.CF_a,dfDHW1_4_SC1.CF_a,dfDHW4_8_SC1.CF_a, dfDHW8_SC1.CF_a)
print(stats)
print(p)
print("")

stats, p = levene(dfDHW0_SC2.CF_a,dfDHW1_4_SC2.CF_a,dfDHW4_8_SC2.CF_a, dfDHW8_SC2.CF_a)
print(stats)
print(p)
print("")

stats, p = levene(dfDHW0_SC3.CF_a,dfDHW1_4_SC3.CF_a,dfDHW4_8_SC3.CF_a, dfDHW8_SC3.CF_a)
print(stats)
print(p)
print("")

#### CFrunmean7 ####
stats, p = levene(dfDHW0_SC0.CF_a_runmean7,dfDHW1_4_SC0.CF_a_runmean7,dfDHW4_8_SC0.CF_a_runmean7, dfDHW8_SC0.CF_a_runmean7)
print(stats)
print(p)
print("")

stats, p = levene(dfDHW0_SC1.CF_a_runmean7,dfDHW1_4_SC1.CF_a_runmean7,dfDHW4_8_SC1.CF_a_runmean7, dfDHW8_SC1.CF_a_runmean7)
print(stats)
print(p)
print("")

stats, p = levene(dfDHW0_SC2.CF_a_runmean7,dfDHW1_4_SC2.CF_a_runmean7,dfDHW4_8_SC2.CF_a_runmean7, dfDHW8_SC2.CF_a_runmean7)
print(stats)
print(p)
print("")

stats, p = levene(dfDHW0_SC3.CF_a_runmean7,dfDHW1_4_SC3.CF_a_runmean7,dfDHW4_8_SC3.CF_a_runmean7, dfDHW8_SC3.CF_a_runmean7)
print(stats)
print(p)
print("")

#### CFrunmean30 ####
stats, p = levene(dfDHW0_SC0.CF_a_runmean30,dfDHW1_4_SC0.CF_a_runmean30,dfDHW4_8_SC0.CF_a_runmean30, dfDHW8_SC0.CF_a_runmean30)
print(stats)
print(p)
print("")

stats, p = levene(dfDHW0_SC1.CF_a_runmean30,dfDHW1_4_SC1.CF_a_runmean30,dfDHW4_8_SC1.CF_a_runmean30, dfDHW8_SC1.CF_a_runmean30)
print(stats)
print(p)
print("")

stats, p = levene(dfDHW0_SC2.CF_a_runmean30,dfDHW1_4_SC2.CF_a_runmean30,dfDHW4_8_SC2.CF_a_runmean30, dfDHW8_SC2.CF_a_runmean30)
print(stats)
print(p)
print("")

stats, p = levene(dfDHW0_SC3.CF_a_runmean30,dfDHW1_4_SC3.CF_a_runmean30,dfDHW4_8_SC3.CF_a_runmean30, dfDHW8_SC3.CF_a_runmean30)
print(stats)
print(p)
print("")

#### CFrunmean90 ####
stats, p = levene(dfDHW0_SC0.CF_a_runmean90,dfDHW1_4_SC0.CF_a_runmean90,dfDHW4_8_SC0.CF_a_runmean90, dfDHW8_SC0.CF_a_runmean90)
print(stats)
print(p)
print("")

stats, p = levene(dfDHW0_SC1.CF_a_runmean90,dfDHW1_4_SC1.CF_a_runmean90,dfDHW4_8_SC1.CF_a_runmean90, dfDHW8_SC1.CF_a_runmean90)
print(stats)
print(p)
print("")

stats, p = levene(dfDHW0_SC2.CF_a_runmean90,dfDHW1_4_SC2.CF_a_runmean90,dfDHW4_8_SC2.CF_a_runmean90, dfDHW8_SC2.CF_a_runmean90)
print(stats)
print(p)
print("")

stats, p = levene(dfDHW0_SC3.CF_a_runmean90,dfDHW1_4_SC3.CF_a_runmean90,dfDHW4_8_SC3.CF_a_runmean90, dfDHW8_SC3.CF_a_runmean90)
print(stats)
print(p)
print("")

In [None]:
'''
                            Kruskal Wallis
    ** Compare if differences between groups of same SEVERITY_CODE **
'''
DHW_class_DHW0_CF_a_Kruskal = stats.kruskal(dfDHW0_SC0.CF_a_runmean90,dfDHW0_SC1.CF_a_runmean90,dfDHW0_SC2.CF_a_runmean90,dfDHW0_SC3.CF_a_runmean90, nan_policy='omit')

DHW_class_DHW1_4_CF_a_Kruskal = stats.kruskal(dfDHW1_4_SC0.CF_a_runmean90,dfDHW1_4_SC1.CF_a_runmean90,dfDHW1_4_SC2.CF_a_runmean90,dfDHW1_4_SC3.CF_a_runmean90, nan_policy='omit')

DHW_class_DHW4_8_CF_a_Kruskal = stats.kruskal(dfDHW4_8_SC0.CF_a_runmean90,dfDHW4_8_SC1.CF_a_runmean90,dfDHW4_8_SC2.CF_a_runmean90,dfDHW4_8_SC3.CF_a_runmean90, nan_policy='omit')

DHW_class_DHW8_CF_a_Kruskal = stats.kruskal(dfDHW8_SC0.CF_a_runmean90,dfDHW8_SC1.CF_a_runmean90,dfDHW8_SC2.CF_a_runmean90,dfDHW8_SC3.CF_a_runmean90, nan_policy='omit')


print(DHW_class_DHW0_CF_a_Kruskal)
print(DHW_class_DHW1_4_CF_a_Kruskal)
print(DHW_class_DHW4_8_CF_a_Kruskal)
print(DHW_class_DHW8_CF_a_Kruskal)

In [None]:
'''
                          Dunn post_hoc test
          ** Which group is different according to the KW test **
    adjust depending the group to be tested, and print the desired result
'''
#### CF_a_runmean90 ####
DHW_class_DHW0_CF_a_dunn = sp.posthoc_dunn(dfDHW0, 'CFrunmean90', 'SEVERITY_CODE')
DHW_class_DHW1_4_CF_a_dunn = sp.posthoc_dunn(dfDHW1_4, 'CFrunmean90', 'SEVERITY_CODE')
DHW_class_DHW4_8_CF_a_dunn = sp.posthoc_dunn(dfDHW4_8, 'CFrunmean90', 'SEVERITY_CODE')
DHW_class_DHW8_CF_a_dunn = sp.posthoc_dunn(dfDHW8, 'CFrunmean90', 'SEVERITY_CODE') # print this

print('DHW_class_DHW0_CF_a_dunn')
print(DHW_class_DHW0_CF_a_dunn)
print('DHW_class_DHW1_4_CF_a_dunn')
print(DHW_class_DHW1_4_CF_a_dunn)
print('DHW_class_DHW4_8_CF_a_dunn')
print(DHW_class_DHW4_8_CF_a_dunn)
print('DHW_class_DHW8_CF_a_dunn')
print(DHW_class_DHW8_CF_a_dunn)


In [None]:
from scipy.stats import f_oneway
aov = f_oneway(dfDHW8_SC0.CF_a_runmean90,dfDHW8_SC1.CF_a_runmean90,dfDHW8_SC2.CF_a_runmean90,dfDHW8_SC3.CF_a_runmean90)
from statsmodels.stats.multicomp import pairwise_tukeyhsd
tukey = pairwise_tukeyhsd(endog=dfDHW8['CF_a_runmean30'],
                          groups=dfDHW8['SEVERITY_CODE'],
                          alpha=0.05)

print("                     ANOVA             ")
print(aov)
print(" ")
print("                 Tukey post-hoc        ")
print(tukey)

In [None]:
'''
         ===    CF's anomaly SEVERITY_CODE 0 vs 3 and DHW_class = DHW8 ===

    Boxplot (general boxplot Cloud_Cover metrics by SEVERITY_CODE and hue=DHW_class)
'''
plot_DHW4_8_SC1vs3 = dfDHW4_8_SC1.append(dfDHW4_8_SC3)
plot_DHW8_SC0vs3=dfDHW8_SC0.append(dfDHW8_SC3)
#make grouped boxplot
#plt.figure(figsize=(18,10))
pal = sb.color_palette('PuBu')
# DHWboxplot = sb.boxplot(y='CF_a', x='SEVERITY_CODE', 
#                  data=plot_DHW8_SC0vs3, 
#                  palette=pal,
#                  hue='DHW_class')
# plt.show()
# DHWboxplot = sb.boxplot(y='CF_a_runmean7', x='SEVERITY_CODE', 
#                  data=plot_DHW8_SC0vs3, 
#                  palette=pal,
#                  hue='DHW_class')
# plt.show()
DHWboxplot = sb.boxplot(y='CF_a_runmean90', x='SEVERITY_CODE', 
                 data=plot_DHW4_8_SC1vs3, 
                 palette=pal,
                 hue='DHW_class')
plt.show()
DHWboxplot = sb.boxplot(y='CF_a_runmean90', x='SEVERITY_CODE', 
                 data=plot_DHW8_SC0vs3, 
                 palette=pal,
                 hue='DHW_class')
plt.show()


'''
                        Mann Whitney U-Test     
'''
stat, p = mannwhitneyu(dfDHW8_SC0.CF_a_runmean30,dfDHW8_SC3.CF_a_runmean30)
print('Statistics Mann-Whitney U test dfDHW8_SC0.CF_a,dfDHW8_SC3.CF_a =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfDHW8_SC0.CF_a_runmean30,dfDHW8_SC3.CF_a_runmean30)
print('Statistics Mann-Whitney U test dfDHW8_SC0.CF_a_runmean7,dfDHW8_SC3.CF_a_runmean7 =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfDHW4_8_SC1.CF_a_runmean30,dfDHW4_8_SC3.CF_a_runmean30)
print('Statistics Mann-Whitney U test dfDHW8_SC1.CF_a_runmean90,dfDHW4_8_SC3.CF_a_runmean90 =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfDHW8_SC0.CF_a_runmean30,dfDHW8_SC3.CF_a_runmean30)
print('Statistics Mann-Whitney U test dfDHW8_SC0.CF_a_runmean90,dfDHW8_SC3.CF_a_runmean90 =%.3f, p=%.3f' % (stat, p))


In [None]:
'''
        ===    CF's anomaly SEVERITY_CODE 0 + 1 vs 3 and DHW_class = DHW8 ===
            
    Boxplot (general boxplot Cloud_Cover metrics by SEVERITY_CODE and hue=DHW_class)
'''
dfSC0and1 = data[data['SEVERITY_CODE3'] == '01']
dfSC0and1vsSC3 = dfSC0and1.append(dfSC3)
dfSC0and1vsSC3 = dfSC0and1vsSC3[(dfSC0and1vsSC3['DHW_class']=='DHW>8')]
dfSC0and1vsSC3['SEVERITY_CODE4'] = np.where(dfSC0and1vsSC3['SEVERITY_CODE']<=1,"01","3")
#make grouped boxplot
#plt.figure(figsize=(18,10))
pal = sb.color_palette('PuBu')
# DHWboxplot = sb.boxplot(y='CF_a', x='SEVERITY_CODE4', 
#                  data=dfSC0and1vsSC3,
#                  palette=pal,
#                  hue='DHW_class')
# plt.show()
# DHWboxplot = sb.boxplot(y='CF_a_runmean7', x='SEVERITY_CODE4', 
#                  data=dfSC0and1vsSC3,
#                  palette=pal,
#                  hue='DHW_class')
# plt.show()
# DHWboxplot = sb.boxplot(y='CF_a_runmean30', x='SEVERITY_CODE4', 
#                  data=dfSC0and1vsSC3,
#                  palette=pal,
#                  hue='DHW_class')
# plt.show()
DHWboxplot = sb.boxplot(y='CF_a_runmean90', x='SEVERITY_CODE4', 
                 data=dfSC0and1vsSC3,
                 palette=pal,
                 hue='DHW_class')
plt.show()

'''
                        Mann Whitney U-Test   
               CF's SEVERITY_CODE 0 + 1 vs 3 and DHW_class = DHW8           
'''
# stat, p = mannwhitneyu(dfDHW8_SC01.CF_a,dfDHW8_SC3.CF_a)
# print('Statistics Mann-Whitney U test dfDHW8_SC01.CF_a,dfDHW8_SC3.CF_a =%.3f, p=%.3f' % (stat, p))
# stat, p = mannwhitneyu(dfDHW8_SC01.CF_a_runmean7,dfDHW8_SC3.CF_a_runmean7)
# print('Statistics Mann-Whitney U test dfDHW8_SC01.CF_a_runmean7,dfDHW8_SC3.CF_a_runmean7 =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfDHW0_4_SC01.CF_a_runmean30,dfDHW1_4_SC3.CF_a_runmean30)
print('Statistics Mann-Whitney U test dfDHW4_8_SC01.CF_a_runmean90,dfDHW4_8_SC3.CF_a_runmean90 =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfDHW4_8_SC01.CF_a_runmean30,dfDHW4_8_SC3.CF_a_runmean30)
print('Statistics Mann-Whitney U test dfDHW4_8_SC01.CF_a_runmean90,dfDHW4_8_SC3.CF_a_runmean90 =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfDHW8_SC01.CF_a_runmean30,dfDHW8_SC3.CF_a_runmean30)
print('Statistics Mann-Whitney U test dfDHW8_SC01.CF_a_runmean90,dfDHW8_SC3.CF_a_runmean90 =%.3f, p=%.3f' % (stat, p))

In [None]:
'''
        ===    CF's anomaly SEVERITY_CODE 0+1+2 vs 3 and DHW_class = DHW8 ===
    Boxplot (general boxplot Cloud_Cover metrics by SEVERITY_CODE and hue=DHW_class)
'''
plotDHW8_SC123vs3 = data[data['DHW_class']== 'DHW>8']
#make grouped boxplot
# plt.figure(figsize=(18,10))
pal = sb.color_palette('PuBu')
# DHWboxplot = sb.boxplot(y='CF_a', x='SEVERITY_CODE5', 
#                  data=plotDHW8_SC123vs3, 
#                  palette=pal,
#                  hue='DHW_class')
# plt.show()
# DHWboxplot = sb.boxplot(y='CF_a_runmean7', x='SEVERITY_CODE5', 
#                  data=plotDHW8_SC123vs3, 
#                  palette=pal,
#                  hue='DHW_class')
# plt.show()
# DHWboxplot = sb.boxplot(y='CF_a_runmean30', x='SEVERITY_CODE5', 
#                  data=plotDHW8_SC123vs3, 
#                  palette=pal,
#                  hue='DHW_class')
plt.show()
DHWboxplot = sb.violinplot(y='CF_a_runmean90', x='SEVERITY_CODE5', 
                 data=plotDHW8_SC123vs3, 
                 palette=pal,
                 hue='DHW_class')
plt.show()

'''
                        Mann Whitney U-Test 
            CF's SEVERITY_CODE 0+1+2 vs 3 and DHW_class = DHW8    
'''

stat, p = mannwhitneyu(dfDHW0_4_SC012.CF_a_runmean30,dfDHW1_4_SC3.CF_a_runmean30)
print('Statistics Mann-Whitney U test dfDHW1_4_SC012.CF_a_runmean90,dfDHW1_4_SC3.CF_a_runmean90 =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfDHW4_8_SC012.CF_a_runmean30,dfDHW4_8_SC3.CF_a_runmean30)
print('Statistics Mann-Whitney U test dfDHW4_8_SC012.CF_a_runmean90,dfDHW4_8_SC3.CF_a_runmean90 =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfDHW8_SC012.CF_a_runmean30,dfDHW8_SC3.CF_a_runmean30)
print('Statistics Mann-Whitney U test dfDHW8_SC012.CF_a_runmean90,dfDHW8_SC3.CF_a_runmean90 =%.3f, p=%.3f' % (stat, p))

In [None]:
'''
        ===    CF's anomaly SEVERITY_CODE 0 vs 2 + 3 and DHW_class = DHW8 ===
            
    Boxplot (general boxplot Cloud_Cover metrics by SEVERITY_CODE and hue=DHW_class)
'''
dfSC2and3 = data[data['SEVERITY_CODE3'] == '23']
dfSC2and3vsSC0 = dfSC2and3.append(dfSC0)
dfSC2and3vsSC0 = dfSC2and3vsSC0[(dfSC2and3vsSC0['DHW_class']=='DHW>8')]
dfSC2and3vsSC0['SEVERITY_CODE6'] = np.where(dfSC2and3vsSC0['SEVERITY_CODE']>=2,"23","0")
#make grouped boxplot
#plt.figure(figsize=(18,10))
pal = sb.color_palette('Paired')
DHWboxplot = sb.violinplot(y='CF_a_runmean90', x='SEVERITY_CODE6', 
                 data=dfSC2and3vsSC0,
                 palette=pal,
                 hue='DHW_class')

stat, p = mannwhitneyu(dfDHW0_4_SC23.CF_a_runmean90,dfDHW1_4_SC0.CF_a_runmean90)
print('Statistics Mann-Whitney U test dfDHW8_SC23.CF_a,dfDHW8_SC0.CF_a =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfDHW4_8_SC23.CF_a_runmean90,dfDHW4_8_SC0.CF_a_runmean90)
print('Statistics Mann-Whitney U test dfDHW8_SC23.CF_a,dfDHW8_SC0.CF_a =%.3f, p=%.3f' % (stat, p))
stat, p = mannwhitneyu(dfDHW8_SC23.CF_a_runmean30,dfDHW8_SC0.CF_a_runmean30)
print('Statistics Mann-Whitney U test dfDHW8_SC23.CF_a,dfDHW8_SC0.CF_a =%.3f, p=%.3f' % (stat, p))

In [None]:
'''
         ===    CF's anomaly SEVERITY_CODE 0 vs 2 and DHW_class = DHW8 ===

    Boxplot (general boxplot Cloud_Cover metrics by SEVERITY_CODE and hue=DHW_class)
'''
plot_DHW8_SC0vs2=dfDHW8_SC0.append(dfDHW8_SC2)
#make grouped boxplot
#plt.figure(figsize=(18,10))
pal = sb.color_palette('PuBu')
DHWboxplot = sb.violinplot(y='CF_a_runmean90', x='SEVERITY_CODE', 
                 data=plot_DHW8_SC0vs2, 
                 palette=pal,
                 hue='DHW_class')
stat, p = mannwhitneyu(dfDHW8_SC0.CF_a_runmean90,dfDHW8_SC2.CF_a_runmean90)
print('Statistics Mann-Whitney U test dfDHW8_SC0.CF_a_runmean90,dfDHW8_SC2.CF_a_runmean90 =%.3f, p=%.3f' % (stat, p))