### Importing MVCO data

In [33]:
import numpy as np
import scipy as sp
from scipy.spatial import ConvexHull, convex_hull_plot_2d
import matplotlib.pyplot as plt
import pandas as pd

df = pd.read_csv('/D/MIT-WHOI/data/2021/biovol_concentration_by_class_time_seriesCNN_hourly.csv')
df["id"] = df.index

print(df.head())
# print(list(df.columns))

df_filtered =pd.melt(df,id_vars = ['datetime'], var_name = 'species',value_name='biovol/mL')

               datetime  Acanthoica_quattrospina  Akashiwo  \
0  06-Jun-2006 18:56:16                 0.000000       0.0   
1  06-Jun-2006 19:18:01                 0.000000       0.0   
2  06-Jun-2006 20:23:16                 0.000000       0.0   
3  06-Jun-2006 21:17:32                 0.000000       0.0   
4  06-Jun-2006 22:25:43                34.815354       0.0   

   Alexandrium_catenella  Amphidinium  Amylax  Apedinella  \
0                    0.0          0.0     0.0    0.000000   
1                    0.0          0.0     0.0    0.000000   
2                    0.0          0.0     0.0    0.000000   
3                    0.0          0.0     0.0    0.000000   
4                    0.0          0.0     0.0   96.448108   

   Asterionellopsis_glacialis  Bacillaria  Bacillariophyceae  ...     pennate  \
0                  190.513016         0.0         311.643280  ...    0.000000   
1                    0.000000         0.0          56.404548  ...  361.366141   
2                

In [34]:
clist = pd.read_csv('/D/MIT-WHOI/data/2021/IFCB_classlist_type.csv')
# print(clist.head())
# print(list(clist.columns)[0])

clist = pd.melt(clist,id_vars = ['CNN_classlist'], var_name = 'group',value_name='in_out')

clist_filtered = clist[clist['in_out']==1]

clist_filtered = clist_filtered.rename(columns = {"CNN_classlist":'species','in_out':'present'})

print(clist_filtered.head())

merged = df_filtered.join(clist_filtered.set_index('species'),on = 'species')

print(merged.head())

                                        species   group  present
0                       Chaetoceros_tenuissimus  Diatom      1.0
1                             Cylindrotheca_min  Diatom      1.0
2                          Eucampia_morphytype1  Diatom      1.0
3  Guinardia_delicatula_TAG_Amphidinium_grazing  Diatom      1.0
4                        Hemiaulus_membranaceus  Diatom      1.0
               datetime                  species  biovol/mL            group  \
0  06-Jun-2006 18:56:16  Acanthoica_quattrospina        0.0  Coccolithophore   
0  06-Jun-2006 18:56:16  Acanthoica_quattrospina        0.0             Nano   
1  06-Jun-2006 19:18:01  Acanthoica_quattrospina        0.0  Coccolithophore   
1  06-Jun-2006 19:18:01  Acanthoica_quattrospina        0.0             Nano   
2  06-Jun-2006 20:23:16  Acanthoica_quattrospina        0.0  Coccolithophore   

   present  
0      1.0  
0      1.0  
1      1.0  
1      1.0  
2      1.0  


In [37]:
df_long = merged[['datetime','group','biovol/mL']]

df_short = df_long.pivot_table(index=["datetime"], 
                    columns='group', 
                    values='biovol/mL')

In [52]:
df_short.head()

df_short.index.name = 'datetime'
df_short.reset_index(inplace=True)

ValueError: cannot insert datetime, already exists

In [73]:

df_short['datetime'].head()

df_short['datetime'] =  pd.to_datetime(df_short['datetime'], format="%d-%b-%Y %X")



year = pd.to_numeric(df_short["datetime"].dt.year)
doy = pd.to_numeric(df_short["datetime"].dt.dayofyear).div(365)

df_short["year"] = year + doy

print(df_short.head())


df_dd = df_short[["year","Diatom","Dinoflagellate"]]

df_dd.to_csv('/D/MIT-WHOI/github_repos/plankton-index/df_dd.txt',sep= " ",index = False)
# from datetime import datetime
# day_of_year = datetime.now().timetuple().tm_yday 
# print(day_of_year)

# df_short['datetime'].dtypes


group               datetime      Ciliate  Coccolithophore        Diatom  \
datetime                                                                   
0        2007-04-01 00:28:14  1952.162094       128.998273  30550.985713   
1        2007-04-01 01:22:34  1888.968813        89.118841  27507.200308   
2        2007-04-01 02:27:47  2734.251767       219.056383  25738.830229   
3        2007-04-01 03:33:02  1506.973968        73.365809  31687.382353   
4        2007-04-01 04:27:21  2232.721254        82.716600  29088.064968   

group     Dinoflagellate  IFCB artifact         Nano    Other live  \
datetime                                                             
0            2599.249141     670.644834  7367.429339    453.713189   
1            3102.500260     565.436912  8360.338310  25927.564471   
2            2365.043237     897.389025  8509.690338    832.300632   
3            2894.457052    2653.760872  8237.828202   8212.188449   
4            2810.684279     485.804170  8505.7

In [None]:
#for nice latex fonts
from matplotlib import rc
rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
## for Palatino and other serif fonts use:
#rc('font',**{'family':'serif','serif':['Palatino']})
rc('text', usetex=True)


from matplotlib.pyplot import figure
from scipy.stats import gaussian_kde #req'd for density dot plot

figure(figsize=(10, 10), dpi=80) 

plt.rc('text', usetex=True)
plt.rc('font', family='serif')

xy = np.vstack([df_dd['Diatom'],df_dd['Dinoflagellate']])
z = gaussian_kde(xy)(xy)

plt.scatter(np.log(df_dd['Diatom']),np.log(df_dd['Dinoflagellate']),c = z, s = 100)
plt.xlabel(r'\textbf{time} (s)')
plt.ylabel(r'\textit{voltage} (mV)',fontsize=16)

In [None]:
import pylab

fig = plt.figure()
ax = fig.add_subplot(2, 1, 1)

line, = ax.plot(df_dd["Diatom"],df_dd["Dinoflagellate"], color='blue', lw=2)

ax.set_yscale('log')
ax.set_xscale('log')


pylab.show()