In [18]:
import sys
import psycopg2
sys.path.append('../')
from helper import db
import pandas as pd
import numpy as np
import bokeh

In [41]:
# Based on https://www.usna.edu/Users/oceano/pguth/md_help/html/approx_equivalents.htm rounding latitude and longitude 
# to .01 degree approximates by 11.1km which is ok at our scale
query = """
SELECT
extract(year from data_date) as year,
depth,
ROUND(latitude) as latitude, 
ROUND(longitude) as longitude,
ROUND(AVG(temperature), 3) as temperature,
ROUND(AVG(salinity), 3) as salinity
FROM OCEAN_DATA
WHERE data_date BETWEEN '2009-01-01' AND '2011-12-31'
AND to_char(data_date,'Mon') in ('Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep')
GROUP BY year, depth, latitude, longitude
ORDER BY year, temperature;
"""
df = db.run_query(query)

In [42]:
df.head()

Unnamed: 0,year,depth,latitude,longitude,temperature,salinity
0,2009.0,"(0, 100]",55,-55,-1.609,32.72
1,2009.0,"(0, 100]",55,-55,-1.537,33.389
2,2009.0,"(0, 100]",55,-55,-1.534,33.227
3,2009.0,"(0, 100]",55,-54,-1.487,32.855
4,2009.0,"(0, 100]",55,-54,-1.485,32.839


In [43]:
len(df)

68172

In [44]:
temp_var = df.copy()

In [51]:
temp_var["temperature"] = temp_var["temperature"].astype(float)
temp_var["salinity"] = temp_var["salinity"].astype(float)
temp_var.sort_values(by=["depth", "latitude", "longitude", "year"]).head(15)
temp_var.groupby(["depth", "latitude", "longitude", "year"])[["temperature", "salinity"]].mean().reset_index()

Unnamed: 0,depth,latitude,longitude,year,temperature,salinity
0,"(0, 100]",38,-70,2011.0,20.405000,35.6510
1,"(0, 100]",38,-69,2010.0,12.590000,35.2700
2,"(0, 100]",38,-68,2010.0,16.997500,35.4315
3,"(0, 100]",38,-68,2011.0,16.741000,35.1930
4,"(0, 100]",38,-67,2010.0,23.496000,36.4320
...,...,...,...,...,...,...
20344,"(900, 1000]",59,-37,2011.0,3.787333,34.8970
20345,"(900, 1000]",59,-36,2009.0,3.822000,34.8915
20346,"(900, 1000]",59,-36,2010.0,3.696000,34.8530
20347,"(900, 1000]",59,-36,2011.0,4.014000,34.9380


In [338]:
# Taking only from April to Sept as fishes are no in the area the whole year
query = """
SELECT
extract(year from data_date) as year,
depth,
ROUND(AVG(temperature), 3) as temperature,
ROUND(AVG(salinity), 3) as salinity
FROM OCEAN_DATA
WHERE data_date BETWEEN '2009-01-01' AND '2019-12-31'
AND salinity BETWEEN 30 and 41
AND temperature BETWEEN -2.5 and 40
AND to_char(data_date,'Mon') in ('Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep')
GROUP BY year, depth
ORDER BY year, temperature;
"""
df1 = db.run_query(query)

In [340]:
df1.head()

Unnamed: 0,year,depth,temperature,salinity
0,2009.0,"(2000, 2100]",3.467,34.926
1,2009.0,"(1900, 2000]",3.474,34.918
2,2009.0,"(1800, 1900]",3.557,34.93
3,2009.0,"(1700, 1800]",3.604,34.925
4,2009.0,"(1600, 1700]",3.685,34.925


In [341]:
#Clean data

from pandas.api.types import CategoricalDtype
#turn string back into categories... yes you have to do it manually apprarently
cat = ['(0, 100]', '(100, 200]', '(200, 300]', '(300, 400]', '(400, 500]', '(500, 600]', '(600, 700]', '(700, 800]', '(800, 900]', '(900, 1000]',
        '(1000, 1100]', '(1100, 1200]', '(1200, 1300]', '(1300, 1400]', '(1400, 1500]', '(1500, 1600]', '(1600, 1700]', '(1700, 1800]', '(1800, 1900]', '(1900, 2000]', 
        '(2000, 2100]', '(2100, 2200]', '(2200, 2300]', '(2300, 2400]', '(2400, 2500]', '(2500, 2600]', '(2600, 2700]', '(2700, 2800]', '(2800, 2900]', '(2900, 3000]',
        '(3000, 3100]', '(3100, 3200]', '(3200, 3300]', '(3300, 3400]', '(3400, 3500]', '(3500, 3600]', '(3600, 3700]', '(3700, 3800]', '(3800, 3900]', '(3900, 4000]', 
        '(4000, 4100]', '(4100, 4200]', '(4200, 4300]', '(4300, 4400]', '(4400, 4500]', '(4500, 4600]', '(4600, 4700]', '(4700, 4800]', '(4800, 4900]', '(4900, 5000]',
        '(5000, 5100]', '(5100, 5200]', '(5200, 5300]', '(5300, 5400]', '(5400, 5500]', '(5500, 5600]', '(5600, 5700]', '(5700, 5800]', '(5800, 5900]', '(5900, 6000]',
       ]

cat_type = CategoricalDtype(categories=cat, ordered=True)
df1["depth"] = df1["depth"].astype(cat_type)

# Change temp and salinity to floats
df1["temperature"] = df1["temperature"].astype(float)
df1["salinity"] = df1["salinity"].astype(float)
df1 = df1.sort_values(by=["year", "depth"])

df1.head()

Unnamed: 0,year,depth,temperature,salinity
32,2009.0,"(0, 100]",10.632,35.102
31,2009.0,"(100, 200]",9.138,35.296
30,2009.0,"(200, 300]",8.509,35.256
29,2009.0,"(300, 400]",8.037,35.231
28,2009.0,"(400, 500]",7.234,35.15


In [342]:
df2 = df1.copy()
df2["temperature"] = df2["temperature"].astype(float)
df2["salinity"] = df2["salinity"].astype(float)
df2 = df2.sort_values(by=["year", "depth"])

def variation(df):
    # Be sure to only input df[df.depth < "(2100, 2200]"]
    for depth_range in df["depth"]:
        # Get the baseline value of the parameter in 2009
        depth = (df.depth == depth_range)
        y2009 = (df.year == 2009)
        baseline_temp = df.loc[depth & y2009]["temperature"].item()
        baseline_sal = df.loc[depth & y2009]["salinity"].item()
        # Use subset to update values
        df.loc[depth, "temp_variation"] = df.loc[depth, "temperature"]- baseline_temp
        df.loc[depth, "sal_variation"] = df.loc[depth, "salinity"]- baseline_sal
    return df
            
        
df3 = variation(df2[df2.depth < "(2100, 2200]"])
df3.head(50)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


Unnamed: 0,year,depth,temperature,salinity,temp_variation,sal_variation
32,2009.0,"(0, 100]",10.632,35.102,0.0,0.0
31,2009.0,"(100, 200]",9.138,35.296,0.0,0.0
30,2009.0,"(200, 300]",8.509,35.256,0.0,0.0
29,2009.0,"(300, 400]",8.037,35.231,0.0,0.0
28,2009.0,"(400, 500]",7.234,35.15,0.0,0.0
27,2009.0,"(500, 600]",6.567,35.086,0.0,0.0
26,2009.0,"(600, 700]",5.929,35.026,0.0,0.0
25,2009.0,"(700, 800]",5.361,34.982,0.0,0.0
23,2009.0,"(800, 900]",4.914,34.958,0.0,0.0
22,2009.0,"(900, 1000]",4.572,34.945,0.0,0.0


In [351]:
df3[df3.year == 2019].groupby(["depth", "year"]).mean().head(21)

Unnamed: 0_level_0,Unnamed: 1_level_0,temperature,salinity,temp_variation,sal_variation
depth,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"(0, 100]",2019.0,10.753,34.947,0.121,-0.155
"(100, 200]",2019.0,8.759,35.218,-0.379,-0.078
"(200, 300]",2019.0,8.088,35.203,-0.421,-0.053
"(300, 400]",2019.0,7.427,35.155,-0.61,-0.076
"(400, 500]",2019.0,6.754,35.104,-0.48,-0.046
"(500, 600]",2019.0,6.129,35.055,-0.438,-0.031
"(600, 700]",2019.0,5.566,35.011,-0.363,-0.015
"(700, 800]",2019.0,5.071,34.975,-0.29,-0.007
"(800, 900]",2019.0,4.681,34.95,-0.233,-0.008
"(900, 1000]",2019.0,4.377,34.935,-0.195,-0.01
