# Machine learning algorithms for coral bleaching classification 

## Load dataset

In [None]:
'''
    Import libraries
'''
from sklearn import datasets
import pandas as pd
import numpy as np
from numpy import mean
from numpy import std
import sklearn
import seaborn as sb
import matplotlib.pyplot as plt
from numpy import mean 
from numpy import std
from scipy import stats
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from scipy.stats.stats import linregress
from scipy.stats.stats import pearsonr
# check scikit-learn version
print(sklearn.__version__)

In [None]:
'''
    Load full dataset
'''
data = pd.read_csv('df_sst_clouds.csv')
len(data)  

In [None]:
'''
    Subset DF by SEVERITY_CODE [0,1,2,3]
'''
#data = data.dropna() # drop rows that contains NaN's 
data = data[(data.SEVERITY_CODE == 0)|(data.SEVERITY_CODE == 1)|(data.SEVERITY_CODE == 2)|(data.SEVERITY_CODE == 3)] 
data = data[(data.YEAR >= 2005)] # First year with more than 100 records
#list(data.columns)
data = data.dropna() # drop rows that contains NaN's
len(data)

In [None]:
'''
    Define dataset(s) for DAY of REPORT "dor"
'''
data_dor = data.loc[:, ~data.columns.str.contains('adj')] # select only day of freport
#data_dor = data_dor.loc[:, ~data_dor.columns.str.contains('run')] # drop "CF_a..." variables
data_dor = data_dor.loc[:, ~data_dor.columns.str.contains('std')]
#data_dor = data_dor.loc[:, ~data_dor.columns.str.contains('mean')]
X = data_dor.drop(['ITEM_ID','DHW_class','SEVERITY_CODE', 'COUNTRY', 'DAY', 'MONTH','YEAR','JD','lat','lon','full_date'], axis=1) # dependent variables
y = data_dor['SEVERITY_CODE'] # labels (indipendent variable)

In [None]:
X.columns

In [None]:
y = pd.DataFrame(y.values)

In [None]:
# Standardizing the features
X = StandardScaler().fit_transform(X)

In [None]:
pca = PCA(n_components=2)
principalComponents = pca.fit_transform(X)
principalDf = pd.DataFrame(data = principalComponents
             , columns = ['principal component 1', 'principal component 2'])#,'principal component 3', 'principal component 4','principal component 5'])

In [None]:
principalDf['SEVERITY_CODE'] = y
finalDf = principalDf

In [None]:
fig = plt.figure(figsize = (5,5))
ax = fig.add_subplot(1,1,1) 
ax.set_xlabel('Principal Component 1', fontsize = 15)
ax.set_ylabel('Principal Component 2', fontsize = 15)
ax.set_title('2 component PCA', fontsize = 20)

targets = [0, 1, 2, 3]
colors = ['r', 'b', 'c', 'g']
for target, color in zip(targets,colors):
    indicesToKeep = finalDf['SEVERITY_CODE'] == target
    ax.scatter(finalDf.loc[indicesToKeep, 'principal component 1']
               , finalDf.loc[indicesToKeep, 'principal component 2']
               , c = color
               , s = 0.25)
ax.legend(targets)
ax.grid()

In [None]:
print(pca.explained_variance_ratio_)
total_var = (pca.explained_variance_ratio_.sum()*100)
print(total_var)

In [None]:
'''
    Load full dataset
'''
data = pd.read_csv('CI.csv')
len(data)

In [None]:
# plt.scatter(data['ID'], data['INDEX_eq'], s=data['SEVERITY_CODE'], c=data['SEVERITY_CODE'], alpha=0.5)
# plt.show()

In [None]:
import seaborn as sns
plt.figure(figsize=(12,8))
#data = data[(data['SEVERITY_CODE']<2)]  
sns.scatterplot(data=data, x=data['ID'], 
                y=data['INDEX_eq'], 
                #hue=data['SEVERITY_CODE'],
                palette='Set1',
                #style="Reported",
                # size="DHW_class",
                # sizes=(25, 50),
                alpha=0.50)
plt.show()

In [None]:
sns.relplot(
    data=data, x="ID", y="INDEX_eq",
    col="SEVERITY_CODE", hue="SEVERITY_CODE", palette="viridis",
    kind="scatter", alpha=0.5
)