In [78]:
#Given a year 2009
# we have N cells 
# Each cell contains information [{sst,wind_dir, cholorphyll, index, month, month_index}] (array is sorted by month)


In [79]:
import math
import json
import csv
import re
import ast
from pprint import pprint
import pandas as pd
from datetime import datetime

In [80]:
def drange(start, stop, step):
    r = start
    while r < stop:
        yield r
        r += step

In [81]:
def speciesMap():
    speciesmap = {}
    firstspeciesrow = ""
    with open("../data/species_codes.csv") as codefile:
        codereader = csv.reader(codefile, delimiter=",")
        linecount = 0
        for row in codereader:
            if linecount == 0:
                linecount = linecount + 1
                firstspeciesrow = row
                continue
            group = row[3]
            speciesmap[str(row[1])] = str(group).lower()
        speciesmap["OTHER ROCKFISH YOY"] = "other rockfish"
        speciesmap['WOLF EEL YOY'] = "other rockfish"
        speciesmap['PAINTED GREENLING YOY'] = "other rockfish"
        speciesmap['CURLFIN SOLE'] = "flatfish"
        speciesmap['PACIFIC ARGENTINE'] = "fish"
    return speciesmap

In [82]:
def readTopLevelCodes():
    data = []
    filename = "../data/speciesClass.json"
    with open(filename) as f:
        data = json.load(f)
    return data

In [83]:
def readSSTData(year):
    data = []
    filename = "../data/sstmay" + str(year) + '.json'
    with open(filename) as f:
        data = json.load(f)
    return data

In [84]:
def readCholorData(year):
    data = []
    filename = "../data/chlorophyllmay" + str(year) + '.json'
    with open(filename) as f:
        data = json.load(f)
    return data

In [85]:
def readWindData():
    data = []
    filename = "../data/wind.json"
    with open(filename) as f:
        data = json.load(f)
    return data
    

In [86]:
def initMatrix(yearRange):
    matrix = []
    smallestLat = 35.775
    largestLat = 39.15
    smallestLon = 235.5625
    largestLon = 238.9375
    latRange = largestLat - smallestLat
    lonRange = largestLon - smallestLon
    delta = 0.0125
    delta = delta * 27
    latInterval = drange(smallestLat, largestLat, delta)
    latInc = [float("{0:.4f}".format(x)) for x in latInterval]

    lonInterval = drange(smallestLon, largestLon, delta)
    lonInc = [float("{0:.4f}".format(x)) for x in lonInterval]
    lonInc.append(largestLon)
    
    for lat_i, lat_ in enumerate(latInc):
        if(lat_i+1 < len(latInc)):
            for lon_i,lon_ in enumerate(lonInc):
                if(lon_i+1 < len(lonInc)):
                    latRnge =[lat_,latInc[lat_i+1]]
                    lonRnge =[lon_,lonInc[lon_i+1]]
                    cell = {'latRange': latRnge, 'lonRange': lonRnge}
                    for year in yearRange:
                        cell[year] = {'sst':-9999,'windDegree':-9999, 'chloro': -9999}
                    matrix.append(cell)
    return matrix
    

In [87]:
# latRange and lonRange match merge data
# map concated data to new species :o
# map to matrixObject (saves time for processing step...)
def populationForCell(sol):
    keyDump = json.loads(sol[0])
    keys = []
    for key, values in keyDump.items() :
        keys.append(key)

    cellIDs = {}
    for row in sol:
        pops = json.loads(row)
        id_ = pops["cellID"]
        if id_ in cellIDs:
            for key in keys:
                if key not in ["latRange","lonRange","cellID","lat","lon"]:
                    cellIDs[id_][key] = cellIDs[id_][key] + pops[key]
        else:
            cellIDs[id_] = pops

    rTLC = readTopLevelCodes()
    levelOne = [] 
    for key, value in rTLC.items():
        levelOne.append(value) 
    levelOne = list(set(levelOne))
    
    speciesmap = speciesMap()
    levelTwo = []
    for key, value in speciesmap.items():
        levelTwo.append(value) 
    levelTwo = list(set(levelTwo))
    
    species = []
    for key, value in cellIDs.items():
        species.append(value) 
        
    for val in species:
        val['uniqueSpecies'] = list(set(val['uniqueSpecies']))
        val['uniqueSpeciesCount'] = len(val['uniqueSpecies'])
        val['levelTwo'] = levelTwo
        val['levelOne'] = levelOne
        levelOneMap = {}
        for spec in levelTwo:
            if rTLC[spec] in levelOneMap:
                levelOneMap[rTLC[spec]] = levelOneMap[rTLC[spec]] + val[spec]
            else:
                levelOneMap[rTLC[spec]] = val[spec]
        val['levelOneMap'] = levelOneMap
    return species

In [88]:
def processPopulation(year):
    smallestLat = 35.775
    largestLat = 39.15
    smallestLon = 235.5625
    largestLon = 238.9375
    latRange = largestLat - smallestLat
    lonRange = largestLon - smallestLon
    delta = 0.0125
    delta = delta * 27
    latInterval = drange(smallestLat, largestLat, delta)
    latInc = [float("{0:.4f}".format(x)) for x in latInterval]

    lonInterval = drange(smallestLon, largestLon, delta)
    lonInc = [float("{0:.4f}".format(x)) for x in lonInterval]
    lonInc.append(largestLon)
    
    speciesmap = speciesMap()
    firsthaulrow = ""
    datadict = {}
    sol = []
    with open("../data/haul_catch_years_clean.csv") as haulfile:
        reader = csv.reader(haulfile, delimiter=",")
        firsthaulrow = next(reader, None)
        for row in reader:
            date = row[2]
            monthmatch = re.search(r'\d+', date)
            yearmatch = re.search(r'\d{4}', date)
            monthvalue = monthmatch.group()
            yearvalue = yearmatch.group()
            for i in range(3, len(row)):
                speciescodes = speciesmap[firsthaulrow[i]]
                datadict[speciescodes] = 0

            if monthvalue == "5" and yearvalue == str(year):
                 for i, lat_ in enumerate(latInc):
                    lat = ast.literal_eval(row[0])
                    if(i+1 < len(latInc)):
                        if lat >= lat_ and lat < latInc[i+1]:
                            for j,lon_ in enumerate(lonInc):
                                lon = 360-ast.literal_eval(row[1])
                                if(j+1 < len(lonInc)):
                                    if lon >= lon_ and lon < lonInc[j+1]:
                                        latRnge =[lat_,latInc[i+1]]
                                        lonRnge =[lon_,lonInc[j+1]]
                                        datadict['latRange'] = latRnge
                                        datadict['lonRange'] = lonRnge
                                        datadict['cellID'] = str(i) + str(j)
                                        datadict["lat"] = float("{0:.4f}".format(lat))
                                        datadict["lon"] = float("{0:.4f}".format(lon))
                                        datadict["uniqueSpecies"] = []
                                        for k in range(3, len(row)):
                                            if row[k] != '':
                                                value = ast.literal_eval(row[k])
                                                if value > 0:
                                                    datadict["uniqueSpecies"].append(firsthaulrow[k])
                                                    datadict[speciesmap[firsthaulrow[k]]] += value
                                        datadictjson = json.dumps(datadict)
                                        sol.append(datadictjson)
    #                                 print(datadictjson, end=",\n")
    if len(sol) > 0:
        return populationForCell(sol)
    else:
        return []

In [98]:
#extend this to handle days for that month.
# e.g sst:(float) -> sst:[float,float,...] (sorted by day)
def processFile(years,matrix):
    for year in years:
        sstData = readSSTData(year)
        rows = sstData["table"]["rows"]
        for idx, cell in enumerate(matrix):
            cell = matrix[idx]
            totalIntTemp = 0
            count = 0
            avgIntTemp = 0
            temps = []
            for row in rows:
                lat = row[2]
                lon = row[3]
                sst = row[4]
                if((cell['latRange'][0] <= lat < cell['latRange'][1] ) and (cell['lonRange'][0] <= lon < cell['lonRange'][1])):
                    if(sst != None and sst != 0):
                        count = count + 1
                        totalIntTemp = totalIntTemp + sst
                    else:
                        sst = -9999
                    temps.append(sst)
            if count != 0:
                avgIntTemp = totalIntTemp/count
                cell[year]['sst'] = float("{0:.4f}".format(avgIntTemp))
            cell[year]['sstHD'] = temps
        
    return matrix

In [90]:
def processWindData(matrix):
    windData = readWindData()
    windRows = windData['table']['rows']
    for idx, cell in enumerate(matrix):
        cell = matrix[idx]
        for wRow in windRows:
            datetime_object = datetime.strptime(wRow[0], '%Y-%m-%dT%H:%M:%SZ')
            year = datetime_object.year
            lat = wRow[2]
            lon = wRow[3]
            xWind = wRow[4]
            yWind = wRow[5]
            if((cell['latRange'][0] <= lat < cell['latRange'][1] ) and (cell['lonRange'][0] <= lon < cell['lonRange'][1])):
                if(xWind != None and yWind != None):
                    cell[year]['windDegree'] = wind_degree = math.degrees(math.atan(yWind/xWind))
    return matrix

In [91]:
def processChlorophyll(years,matrix):
    for year in years:
        cholorData = readCholorData(year)
        rows = cholorData["table"]["rows"]
        for idx, cell in enumerate(matrix):
            cell = matrix[idx]
            totalIntTemp = 0
            count = 0
            avgIntTemp = 0
            for row in rows:
                lat = row[2]
                lon = row[3]
                chloro = row[4]
                if((cell['latRange'][0] <= lat < cell['latRange'][1] ) and (cell['lonRange'][0] <= lon < cell['lonRange'][1])):
                    if(chloro != None ):
                        count = count + 1
                        totalIntTemp = totalIntTemp + chloro
            if count != 0:
                avgIntTemp = totalIntTemp/count
                cell[year]['chloro'] = float("{0:.4f}".format(avgIntTemp))
        
    return matrix

In [92]:
def calculateAverage(data):
    #given data for that year for each species... sum up  divide by cells
    level2Totals = {}
    level1Totals = {}

    for d in data:
        for l1_name in d['levelOne']:
            if l1_name in level1Totals:
                level1Totals[l1_name]['sum'] = level1Totals[l1_name]['sum'] + d['levelOneMap'][l1_name]
                level1Totals[l1_name]['count'] = level1Totals[l1_name]['count'] + 1
                level1Totals[l1_name]['average'] = level1Totals[l1_name]['sum'] / level1Totals[l1_name]['count']
            else:
                level1Totals[l1_name] = {'sum':0,'count':0,'average':0}
                
        
        for l2_name in d['levelTwo']:
            if l2_name in level2Totals:
                level2Totals[l2_name]['sum'] = level2Totals[l2_name]['sum'] + d[l2_name]
                level2Totals[l2_name]['count'] = level2Totals[l2_name]['count'] + 1
                level2Totals[l2_name]['average'] = level2Totals[l2_name]['sum'] / level2Totals[l2_name]['count']
            else:
                level2Totals[l2_name] = {'sum':0,'count':0,'average':0}
    
    return [level1Totals,level2Totals]

In [93]:
def processPops(matrix,years):
    yearAvg = {}
    for year in years:
        popData = processPopulation(year)
        yearlyAverage = calculateAverage(popData)
        yearAvg[year] = {'L1':yearlyAverage[0],'L2':yearlyAverage[1]}
        for data in popData:
            matrix[int(data['cellID'])][year]['popInfo'] = data
    return [matrix,yearAvg]

In [94]:
#run this function for all 
years = [2009,2010,2011,2012,2013,2014,2015,2016,2017,2018]
sstYears = [2009,2010,2011,2012,2013,2014,2015]
matrix = initMatrix(years)

In [95]:
len(matrix)

100

In [99]:
matrix = processFile(sstYears,matrix)

In [101]:
matrix = processWindData(matrix) #for all years
matrix = processChlorophyll(sstYears,matrix)

In [102]:
newMatrix = matrix

In [103]:
temp = processPops(newMatrix,years)

In [62]:
len(lonInc)
# [235.5625, 235.9]
# [235.9, 236.2375]
# [236.2375, 236.575]
# [236.575, 236.9125]
# [236.9125, 237.25]
# [237.25, 237.5875]
# [237.5875, 237.925]
# [237.925, 238.2625]
# [238.2625, 238.6]

11

In [63]:
len(latInc)

11

In [106]:
newMatrix[0]

{2009: {'chloro': 0.8782,
  'sst': 12.7459,
  'sstHD': [13.065,
   12.973637,
   13.064546,
   12.9118185,
   12.948,
   12.889999,
   12.895,
   12.856817,
   12.924091,
   12.962275,
   12.985909,
   12.949091,
   12.955001,
   12.959091,
   12.933636,
   12.863636,
   12.986,
   13.032499,
   13.002501,
   13.005002,
   13.158181,
   13.096499,
   13.2095,
   13.132001,
   13.106999,
   13.2235,
   13.1735,
   13.031817,
   13.030909,
   12.941364,
   12.965453,
   12.919089,
   12.921363,
   12.857272,
   12.895455,
   12.883636,
   13.023,
   12.948181,
   12.940909,
   12.929544,
   12.889999,
   12.937728,
   12.814091,
   12.926362,
   13.035998,
   13.1315,
   13.144499,
   13.185,
   13.125999,
   13.1085005,
   13.171499,
   13.097499,
   13.191,
   13.1935,
   12.959,
   12.992,
   12.973636,
   12.824546,
   12.800909,
   12.845001,
   12.781667,
   12.832916,
   12.777917,
   12.876365,
   12.8945465,
   12.95409,
   12.829999,
   12.883182,
   12.85409,
   12.901818,
   

In [104]:
with open('data.json', 'w') as outfile:
    json.dump(temp[0], outfile)

In [107]:
x = 575 * 4
x

2300