In [1]:
#Given a year 2009
# we have N cells 
# Each cell contains information [{sst,wind_dir, cholorphyll, index, month, month_index}] (array is sorted by month)


In [2]:
import math
import json
from pprint import pprint
import pandas as pd
from datetime import datetime

In [3]:
def drange(start, stop, step):
    r = start
    while r < stop:
        yield r
        r += step

In [4]:
def readSSTData(year):
    data = []
    filename = "../data/sstmay" + str(year) + '.json'
    with open(filename) as f:
        data = json.load(f)
    return data

In [5]:
def readCholorData(year):
    data = []
    filename = "../data/chlorophyllmay" + str(year) + '.json'
    with open(filename) as f:
        data = json.load(f)
    return data

In [6]:
def readWindData():
    data = []
    filename = "../data/wind.json"
    with open(filename) as f:
        data = json.load(f)
    return data
    

In [32]:
def initMatrix(yearRange):
    matrix = []
    smallestLat = 35.775
    largestLat = 39.15
    smallestLon = 235.5625
    largestLon = 238.9375
    latRange = largestLat - smallestLat
    lonRange = largestLon - smallestLon

    latStep = latRange/11
    lonStep = lonRange/11

    latInterval = drange(smallestLat, largestLat, latStep)
    latInc = [float("{0:.4f}".format(x)) for x in latInterval]
    latInc = latInc[:-1]

    lonInterval = drange(smallestLon, largestLon, lonStep)
    lonInc = [float("{0:.4f}".format(x)) for x in lonInterval]
    print(lonInc)
    for lat_i, lat_ in enumerate(latInc):
        if(lat_i+1 < len(latInc)):
            print(lat_i)
            for lon_i,lon_ in enumerate(lonInc):
                if(lon_i+1 < len(lonInc)):
                    latRnge =[lat_,latInc[lat_i+1]]
                    lonRnge =[lon_,lonInc[lon_i+1]]
                    cell = {'latRange': latRnge, 'lonRange': lonRnge}
                    for year in yearRange:
                        cell[year] = {'sst':-9999,'windDegree':-9999, 'chloro': -9999}
                    matrix.append(cell)
        
    return matrix
    

In [33]:
#extend this to handle days for that month.
# e.g sst:(float) -> sst:[float,float,...] (sorted by day)
def processFile(years,matrix):
    for year in years:
        sstData = readSSTData(year)
        rows = sstData["table"]["rows"]
        for idx, cell in enumerate(matrix):
            cell = matrix[idx]
            totalIntTemp = 0
            count = 0
            avgIntTemp = 0
            for row in rows:
                lat = row[2]
                lon = row[3]
                sst = row[4]
                if((cell['latRange'][0] <= lat <= cell['latRange'][1] ) and (cell['lonRange'][0] <= lon <= cell['lonRange'][1])):
                    if(sst != None and sst != 0):
                        count = count + 1
                        totalIntTemp = totalIntTemp + sst
            if count != 0:
                avgIntTemp = totalIntTemp/count
                cell[year]['sst'] = float("{0:.4f}".format(avgIntTemp))
        
    return matrix

In [34]:
def processWindData(matrix):
    windData = readWindData()
    windRows = windData['table']['rows']
    for idx, cell in enumerate(matrix):
        cell = matrix[idx]
        for wRow in windRows:
            datetime_object = datetime.strptime(wRow[0], '%Y-%m-%dT%H:%M:%SZ')
            year = datetime_object.year
            lat = wRow[2]
            lon = wRow[3]
            xWind = wRow[4]
            yWind = wRow[5]
            if((cell['latRange'][0] <= lat <= cell['latRange'][1] ) and (cell['lonRange'][0] <= lon <= cell['lonRange'][1])):
                if(xWind != None and yWind != None):
                    cell[year]['windDegree'] = wind_degree = math.degrees(math.atan(yWind/xWind))
    return matrix

In [35]:
def processChlorophyll(years,matrix):
    for year in years:
        cholorData = readCholorData(year)
        rows = cholorData["table"]["rows"]
        for idx, cell in enumerate(matrix):
            cell = matrix[idx]
            totalIntTemp = 0
            count = 0
            avgIntTemp = 0
            for row in rows:
                lat = row[2]
                lon = row[3]
                chloro = row[4]
                if((cell['latRange'][0] <= lat <= cell['latRange'][1] ) and (cell['lonRange'][0] <= lon <= cell['lonRange'][1])):
                    if(chloro != None ):
                        count = count + 1
                        totalIntTemp = totalIntTemp + chloro
            if count != 0:
                avgIntTemp = totalIntTemp/count
                cell[year]['chloro'] = float("{0:.4f}".format(avgIntTemp))
        
    return matrix

In [36]:
#run this function for all 
years = [2009,2010,2011,2012,2013,2014,2015,2016,2017,2018]
sstYears = [2011,2015]
matrix = initMatrix(years)
matrix = processFile(sstYears,matrix)
matrix = processWindData(matrix)
matrix = processChlorophyll(sstYears,matrix)

[235.5625, 235.8693, 236.1761, 236.483, 236.7898, 237.0966, 237.4034, 237.7102, 238.017, 238.3239, 238.6307]
0
1
2
3
4
5
6
7
8
9


In [38]:
with open('data.json', 'w') as outfile:
    json.dump(matrix, outfile)

In [39]:
matrix

[{2016: {'chloro': -9999, 'sst': -9999, 'windDegree': -82.12673472493668},
  2017: {'chloro': -9999, 'sst': -9999, 'windDegree': -88.52408595720108},
  2018: {'chloro': -9999, 'sst': -9999, 'windDegree': -69.94635230819601},
  'lonRange': [235.5625, 235.8693],
  'latRange': [35.775, 36.0818],
  2009: {'chloro': -9999, 'sst': -9999, 'windDegree': -49.719220258227914},
  2010: {'chloro': -9999, 'sst': -9999, 'windDegree': 46.16051282090083},
  2011: {'chloro': 0.2566, 'sst': 13.3366, 'windDegree': -76.93208748518201},
  2012: {'chloro': -9999, 'sst': -9999, 'windDegree': -50.052108988163155},
  2013: {'chloro': -9999, 'sst': -9999, 'windDegree': -78.01428210489628},
  2014: {'chloro': -9999, 'sst': -9999, 'windDegree': -78.59122351344351},
  2015: {'chloro': 0.2347, 'sst': 12.9796, 'windDegree': -51.106840220889666}},
 {2016: {'chloro': -9999, 'sst': -9999, 'windDegree': -83.33159572042065},
  2017: {'chloro': -9999, 'sst': -9999, 'windDegree': -83.62590882454896},
  2018: {'chloro': -99