<a href="https://colab.research.google.com/github/kevin-eschbach/algae_experiments/blob/main/experiment_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
import numpy as np
import pandas as pd # maybe use dask
import sklearn
import plotly.express as px
import glob, os
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
%matplotlib inline

In [None]:
path = r'/content/gdrive/MyDrive/[09] Master Thesis/data/'

data = pd.read_csv(path + "ATP3-UFS-Instrumentation.csv")
data['DateTime'] = pd.to_datetime(data['DateTime'])

harvest_data = pd.read_csv(path + "ATP3-UFS-HarvestData.csv")
harvest_data["Date"] = pd.to_datetime(harvest_data["Date"])

composition_data = pd.read_csv(path + "ATP3-UFS-Composition.csv")
composition_data["DATETIME"] = pd.to_datetime(composition_data["DATETIME"])

weather_data = pd.read_csv(path + "ATP3-UFS-Weather.csv")
weather_data["Date"] = pd.to_datetime(weather_data["Date"])

## outlier removal
for col in ['pH', 'Temp (C)', 'Cond (mS.cm)', 'DO (mg.L)', 'DO (%sat)', 'Sal (g.L)']:
  q_low = data[col].quantile(0.01)
  q_high = data[col].quantile(0.99)
  data = data[(data[col] < q_high) & (data[col] > q_low)]

In [None]:
def show_experiment(instrumentation_data, harvest_data, composition_data, weather_data, experimentID, pondID, siteID):

  # ================== INSTRUMENTATION DATA =======================
  intrumentation = instrumentation_data.loc[instrumentation_data['ExperimentID'] == experimentID]
  intrumentation = intrumentation.loc[intrumentation['PondID'] == pondID]
  intrumentation = intrumentation.loc[intrumentation['SiteID'] == siteID]
  intrumentation = intrumentation.sort_values("DateTime")
  time = np.asarray(intrumentation['DateTime'])
  start = time[0]
  end = time[-1]
  duration = end-start
  duration_days = duration.astype("timedelta64[D]") // np.timedelta64(1, 'D')
  duration_hours = int(duration.astype("timedelta64[h]") / np.timedelta64(1, 'h') % 24)

  # =================== HARVEST DATA =============================
  harvest = harvest_data.loc[harvest_data['ExperimentID'] == experimentID]
  harvest = harvest.loc[harvest['PondID'] == pondID]
  harvest = harvest.loc[harvest['SiteID'] == siteID]
  harvest = harvest.sort_values("Date")

  # =================== COMPOSITION DATA =======================
  composition = composition_data.loc[composition_data['ExperimentID'] == experimentID]
  composition = composition.loc[composition['PondID'] == pondID]
  composition = composition.loc[composition['SiteID'] == siteID]
  composition = composition.sort_values('DATETIME')

  # ================== WHEATHER DATA =========================
  weather = weather_data.loc[weather_data['ExperimentID'] == experimentID]
  weather = weather.loc[weather['SiteID'] == siteID]
  weather = weather.sort_values("Date")
  # NOTE: same for all the ponds at the same site!!!

  intrumentation = intrumentation.resample('d', on='DateTime').mean().dropna(how='all').reset_index()
  weather = weather.resample('d', on='Date').mean().dropna(how='all').reset_index()

  # Merge weather and instrumentation data

  # Merge harvest and composition data - But how???




  print("Harvest Information:")
  fig = px.scatter_3d(composition, x='Protein.AF', y='FAME.Lipids.AF', z='Carbohydrates.AF', color='Duration.days', symbol='StrainID') # size = AFDW.g.L
  fig.show()

  print("Global Light Energy:")
  fig = px.histogram(weather, x='GlobalLightEnergy(W.m2)')
  fig.show()
  fig = px.line(weather, x='Date', y='GlobalLightEnergy(W.m2)')
  fig.show()

  print(f"START: {start}\nEND: {end}\nDURATION: {duration_days}d {duration_hours}h")
  for col in intrumentation.columns:
    if col in ['ExperimentID', 'SiteID', 'PondID', 'StrainID']:
      print(f"{col}: {intrumentation[col].unique()}")
    elif col in ['DateTime', 'Date', 'PAR (umol.m2.s)']:
      pass
    else:
      print(f"============{col}=============")
      print(f"max {col}: {intrumentation[col].max()}")
      print(f"min {col}: {intrumentation[col].min()}")
      print(f"mean {col}: {intrumentation[col].mean()}")

      fig = px.histogram(intrumentation, x=col)
      fig.show()
      fig = px.line(intrumentation, x='DateTime', y=col)
      fig.show()

In [None]:
# 0 <= light energy W.m2 <= 1200
# 5 <= pH <= 9
# 0 <= temp (C) <= 50
# 0 <= DO mg.L <= 20
# 5 <= Sal g.L <= 50

In [None]:
show_experiment(data, harvest_data, composition_data, weather_data, 'SEP102014', 'P3', 'ASU')