# Machine learning algorithms for coral bleaching classification 

## Load dataset

In [None]:
'''
    Import libraries
'''
from sklearn import datasets
import pandas as pd
import numpy as np
from numpy import mean
from numpy import std
import sklearn
import seaborn as sb
import matplotlib.pyplot as plt
from numpy import mean 
from numpy import std
from scipy import stats
import scipy.stats as scipystats
import statsmodels.api as sm
import statsmodels.formula.api as smf
from scipy.stats.stats import linregress
from scipy.stats.stats import pearsonr
# check scikit-learn version
print(sklearn.__version__)

In [None]:
'''
    Load full dataset
'''
data = pd.read_csv('df_sst_clouds.csv')
len(data)  

In [None]:
'''
    Subset DF by SEVERITY_CODE [0,1,2,3]
'''
#data = data.dropna() # drop rows that contains NaN's 
data = data[(data.SEVERITY_CODE == 0)|(data.SEVERITY_CODE == 1)|(data.SEVERITY_CODE == 2)|(data.SEVERITY_CODE == 3)] 
#data = data[(data.YEAR >= 2005)] # First year with more than 100 records
#list(data.columns)
data = data.dropna() # drop rows that contains NaN's
len(data)

In [None]:
data.columns

# Calculate a linear least-squares regression for two sets of measurements

In [None]:
reglinSEVERITY_CODE_WD = linregress(data.SEVERITY_CODE, data.WD)
print('SEVERITY_CODE vs WD:', reglinSEVERITY_CODE_WD)
print('')
reglinSEVERITY_CODE_DHW = linregress(data.SEVERITY_CODE, data.DHW)
print('SEVERITY_CODE vs DHW:', reglinSEVERITY_CODE_DHW)
print('')
reglinSEVERITY_CODE_DHW_9 = linregress(data.SEVERITY_CODE, data.DHW_9)
print('SEVERITY_CODE vs DHW_9:', reglinSEVERITY_CODE_DHW_9)


In [None]:
#To get coefficient of determination (R-squared) do rvalue**2:
reglinSEVERITY_CODE_DHW.rvalue**2

In [None]:
reg = smf.ols(formula = "DHW ~ SEVERITY_CODE", data = data).fit()
reg.summary()

In [None]:
from sklearn import linear_model
X = data[['DHW_9','CF_a_runmean90']] 
y = data['SEVERITY_CODE']
regr = linear_model.LinearRegression()
regr.fit(X, y)
print('Intercept: \n', regr.intercept_)
print('Coefficients: \n', regr.coef_)
# with statsmodels
X = sm.add_constant(X) # adding a constant
 
model = sm.OLS(y, X).fit()
predictions = model.predict(X) 
 
print_model = model.summary()
print(print_model)

# Regplot

In [None]:
Cf_a_DHW = data[['CF_a_runmean90','DHW','SEVERITY_CODE']]
scatter = sb.regplot(Cf_a_DHW.SEVERITY_CODE, Cf_a_DHW.DHW, marker='+',scatter_kws={'s':1}, line_kws={'lw':1, 'color':'darkred'}) 
#plt.savefig('regplotDHW_CF_a_90.pdf', dpi=300)

# Pairplots

In [None]:
 
# Create individial series per variable and severity 
Cf_DHW = data[['CF_a_runmean90','DHW','SEVERITY_CODE']]
pairgrid = sb.pairplot(Cf_DHW, hue='SEVERITY_CODE', palette="hls", kind='reg', markers='.', corner=True, plot_kws={'scatter_kws':{'alpha': 0.5,'edgecolor': 'none'}},size = 10)
plt.show()
#plt.savefig('pairplotDHW_CF_a_90.pdf', dpi=300)