In [2]:
import sys
sys.path.append('../')
import pandas as pd
import numpy as np
import zipfile
from helper import spkly

In [5]:
zip = zipfile.ZipFile('../data/Teleost_Especes_Pelagiques_Abondance_2009-2018.zip')
zip.namelist()

# let's get the fish mappings - requires specific encoding to be read
FISH_MAP = pd.read_csv('../data/DataDictionary_DictionnaireDonnees.csv', encoding='cp437', skiprows=10).set_index('Species')
FISH_MAP = FISH_MAP[['Name _EN']].to_dict('dict')['Name _EN']
FISH_MAP

{'AMSPP': 'Sand Lances (Ammodytes sp.)',
 'ARRIS': 'White Barracudina',
 'ARSIL': 'Atlantic Argentine',
 'BOSAI': 'Arctic Cod',
 'CLHAR': 'Atlantic Herring',
 'CYLUM': 'Lumpfish',
 'GAACU': 'Threespine Stickleback',
 'MAVIL': 'Capelin (M. villosus)',
 'MEATL': 'Atlantic Soft Pout',
 'MEBIL': 'Silver Hake',
 'OSMOR': 'Rainbow Smelt',
 'POVIR': 'Pollock',
 'SCSCO': 'Atlantic Mackerel (S. scombrus)'}

In [10]:
import datetime 

north_fish, south_fish = pd.read_csv(zip.open(zip.namelist()[0])), pd.read_csv(zip.open(zip.namelist()[1]))
north_fish['region'] = 'NORTH'
south_fish['region'] = 'SOUTH'

# let's just append the two datasets together and rename the fish mappings to something slightly more human readable 
fish_set = pd.concat([north_fish, south_fish]).rename(columns=FISH_MAP).copy()
fish_set['Date'] = pd.to_datetime(fish_set['Date'])
display(fish_set.shape)

# these sparkline image volumes don't look very promising for some species... e.g., sand lances. Also see that this data is only collected 3
# months out of a year!
spkly.display(fish_set.groupby([fish_set['Date'].dt.year, fish_set['Date'].dt.month]).sum())

(3165, 19)

Unnamed: 0_level_0,Unnamed: 1_level_0,Station,Longitude,Latitude,Profondeur/Depth,Sand Lances (Ammodytes sp.),White Barracudina,Atlantic Argentine,Arctic Cod,Atlantic Herring,Lumpfish,Threespine Stickleback,Capelin (M. villosus),Atlantic Soft Pout,Silver Hake,Rainbow Smelt,Pollock,Atlantic Mackerel (S. scombrus)
Date,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2009,8,18475,-11056.761083,8706.707667,41789.5,3.3,4218.1,3.8,98.6,905.8,20.2,185.9,232218.9,2825.9,34.2,0.0,0.9,2.9
2009,9,13470,-9912.839833,7488.690167,12790.0,0.0,264.3,0.0,2.1,21543.8,11.0,3093.6,27834.2,206.2,6.7,8683.4,1.0,404.8
2010,8,15592,-9060.587833,7103.932583,31658.5,0.0,930.9,1.0,241.1,220.9,37.8,119.0,258929.8,2138.3,88.4,0.0,0.0,5.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018,8,16383,-10282.677500,8181.549000,38571.5,59.6,489.9,107.2,605.1,9768.5,52.5,2.3,22612.3,396.0,210.4,0.0,2.7,121.0
2018,9,5623,-6401.170167,4840.246583,9824.0,0.0,13.9,0.0,1.0,12829.4,3.3,91.9,15438.6,9.8,23.9,6295.0,223.0,5435.5
2018,10,639,-364.932667,284.021833,544.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,3.0


In [14]:
fish_set.dtypes

Date                               datetime64[ns]
Station                                     int64
Longitude                                 float64
Latitude                                  float64
Profondeur/Depth                          float64
Sand Lances (Ammodytes sp.)               float64
White Barracudina                         float64
Atlantic Argentine                        float64
Arctic Cod                                float64
Atlantic Herring                          float64
Lumpfish                                  float64
Threespine Stickleback                    float64
Capelin (M. villosus)                     float64
Atlantic Soft Pout                        float64
Silver Hake                               float64
Rainbow Smelt                             float64
Pollock                                   float64
Atlantic Mackerel (S. scombrus)           float64
region                                     object
dtype: object

In [None]:
import folium

import folium
m = folium.Map(location=[50, -60], zoom_start=4.5, tiles='Stamen Terrain')

for flt in fish_set.index[:10]:
    folium.Marker(location=[fish_set["Latitude"].iloc[flt], fish_set["Longitude"].iloc[flt]]).add_to(m)
m