# Installation of the library

In [None]:
pip install mecoda-nat

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting mecoda-nat
  Downloading mecoda_nat-0.5.8-py3-none-any.whl (22 kB)
Collecting flat-table
  Downloading flat_table-1.1.1-py3-none-any.whl (6.8 kB)
Collecting pydantic
  Downloading pydantic-1.9.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.1 MB)
[K     |████████████████████████████████| 11.1 MB 25.7 MB/s 
Installing collected packages: pydantic, flat-table, mecoda-nat
Successfully installed flat-table-1.1.1 mecoda-nat-0.5.8 pydantic-1.9.1


# Import libraries

In [None]:
from mecoda_nat import get_obs, get_dfs, get_count_by_taxon, download_photos
import os

import pandas as pd 

# Connection with Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Variables

In [None]:
basic_path = '/content/drive/MyDrive/natusphera/'

In [None]:
taxon_unique = [
                'chromista', 
                'protozoa', 
                'animalia', 
                'mollusca', 
                'arachnida', 
                'insecta', 
                'aves', 
                'mammalia', 
                'amphibia', 
                'reptilia', 
                'actinopterygii', 
                'fungi', 
                'plantae',
                'unknown'
]

# Downloading images

# Taxons

In [None]:
for taxon in taxon_unique:

  print('Obs: ' + taxon)

  obs = get_obs(year=2018, taxon=taxon)

  print('Dataframes: ' + taxon)

  df_obs, df_photos = get_dfs(obs)

  print('Sorted & filtered dataframes: ' + taxon)

  df_photos = df_photos.sort_values(by = 'id').head(200)

  print('Downloading: ' + taxon)

  download_photos(df_photos, basic_path + '/2018_' + taxon + '/')

Obs: chromista
Generando lista de observaciones:
Número de elementos: 200
Número de elementos: 298
Dataframes: chromista
Sorted & filtered dataframes: chromista
Downloading: chromista
Obs: protozoa
Generando lista de observaciones:
Número de elementos: 44
Dataframes: protozoa
Sorted & filtered dataframes: protozoa
Downloading: protozoa
Obs: animalia
Generando lista de observaciones:
Número de elementos: 200
Número de elementos: 400
Número de elementos: 600
Número de elementos: 800
Número de elementos: 1000
Número de elementos: 1072
Dataframes: animalia
Sorted & filtered dataframes: animalia
Downloading: animalia
Obs: mollusca
Generando lista de observaciones:
Número de elementos: 200
Número de elementos: 400
Número de elementos: 600
Número de elementos: 629
Dataframes: mollusca
Sorted & filtered dataframes: mollusca
Downloading: mollusca
Obs: arachnida
Generando lista de observaciones:
Número de elementos: 200
Número de elementos: 400
Número de elementos: 496
Dataframes: arachnida
Sort

# Taxon unique

In [None]:
taxon_unique = [
                'actinopterygii'
]

In [None]:
for taxon in taxon_unique:

  print('Obs: ' + taxon)

  obs = get_obs(year=2021, taxon=taxon)

  print('Dataframes: ' + taxon)

  df_obs, df_photos = get_dfs(obs)

  print('Sorted & filtered dataframes: ' + taxon)

  df_photos = df_photos.sort_values(by = 'id')

  print('Downloading: ' + taxon)

  download_photos(df_photos, basic_path + '/all_2021_' + taxon + '/')

  print('Downloaded')

Obs: actinopterygii
Generando lista de observaciones:
Número de elementos: 200
Número de elementos: 400
Número de elementos: 600
Número de elementos: 800
Número de elementos: 1000
Número de elementos: 1200
Número de elementos: 1400
Número de elementos: 1600
Número de elementos: 1800
Número de elementos: 2000
Número de elementos: 2200
Número de elementos: 2400
Número de elementos: 2600
Número de elementos: 2800
Número de elementos: 3000
Número de elementos: 3200
Número de elementos: 3400
Número de elementos: 3600
Número de elementos: 3800
Número de elementos: 4000
Número de elementos: 4185
Dataframes: actinopterygii
Sorted & filtered dataframes: actinopterygii
Downloading: actinopterygii
Downloaded


# Studying taxons

In [None]:
obs_2021 = get_obs(year=2021, taxon='actinopterygii')
df_obs_2021, df_photos_2021 = get_dfs(obs_2021)
df_photos_2021.groupby(by='taxon_name').count().sort_values(by='id', ascending = False).head(15)

Generando lista de observaciones:
Número de elementos: 200
Número de elementos: 400
Número de elementos: 600
Número de elementos: 800
Número de elementos: 1000
Número de elementos: 1200
Número de elementos: 1400
Número de elementos: 1600
Número de elementos: 1800
Número de elementos: 2000
Número de elementos: 2200
Número de elementos: 2400
Número de elementos: 2600
Número de elementos: 2800
Número de elementos: 3000
Número de elementos: 3200
Número de elementos: 3400
Número de elementos: 3600
Número de elementos: 3800
Número de elementos: 4000
Número de elementos: 4194


Unnamed: 0_level_0,id,photos.id,iconic_taxon,photos.medium_url,user_login,latitude,longitude,path
taxon_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Parablennius pilicornis,253,253,253,253,253,250,250,253
Tripterygion tripteronotum,207,207,207,207,207,205,205,207
Diplodus vulgaris,199,199,199,199,199,198,198,199
Thalassoma pavo,182,182,182,182,182,178,178,182
Chromis chromis,179,179,179,179,179,176,176,179
Coris julis,170,170,170,170,170,170,170,170
Sarpa salpa,160,160,160,160,160,158,158,160
Diplodus sargus,160,160,160,160,160,157,157,160
Mullus surmuletus,145,145,145,145,145,143,143,145
Gobius incognitus,123,123,123,123,123,123,123,123


In [None]:
obs_2020 = get_obs(year=2020, taxon='actinopterygii')
df_obs_2020, df_photos_2020 = get_dfs(obs_2020)
df_photos_2020.groupby(by='taxon_name').count().sort_values(by='id', ascending = False).head(15)

Generando lista de observaciones:
Número de elementos: 200
Número de elementos: 400
Número de elementos: 600
Número de elementos: 800
Número de elementos: 1000
Número de elementos: 1200
Número de elementos: 1400
Número de elementos: 1600
Número de elementos: 1800
Número de elementos: 2000
Número de elementos: 2200
Número de elementos: 2400
Número de elementos: 2600
Número de elementos: 2715


Unnamed: 0_level_0,id,photos.id,iconic_taxon,photos.medium_url,user_login,latitude,longitude,path
taxon_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Mullus surmuletus,399,399,399,399,399,397,397,399
Sarpa salpa,365,365,365,365,365,364,364,365
Parablennius pilicornis,357,357,357,357,357,357,357,357
Diplodus vulgaris,305,305,305,305,305,302,302,305
Thalassoma pavo,277,277,277,277,277,275,275,277
Coris julis,273,273,273,273,273,273,273,273
Chromis chromis,239,239,239,239,239,239,239,239
Diplodus sargus,215,215,215,215,215,215,215,215
Oblada melanura,187,187,187,187,187,185,185,187
Diplodus cervinus,160,160,160,160,160,160,160,160


In [None]:
obs_2019 = get_obs(year=2019, taxon='actinopterygii')
df_obs_2019, df_photos_2019 = get_dfs(obs_2019)

Generando lista de observaciones:
Número de elementos: 200
Número de elementos: 400
Número de elementos: 600
Número de elementos: 800
Número de elementos: 934


Unnamed: 0,taxon_name,id,photos.id,iconic_taxon,photos.medium_url,user_login,latitude,longitude,path
0,Oblada melanura,106,106,106,106,106,106,106,106
1,Parablennius pilicornis,91,91,91,91,91,91,91,91
2,Thalassoma pavo,81,81,81,81,81,81,81,81
3,Coris julis,70,70,70,70,70,70,70,70
4,Sarpa salpa,59,59,59,59,59,59,59,59
5,Diplodus vulgaris,49,49,49,49,49,49,49,49
6,Chromis chromis,42,42,42,42,42,41,41,42
7,Gobius incognitus,33,33,33,33,33,33,33,33
8,Mullus surmuletus,31,31,31,31,31,31,31,31
9,Diplodus sargus,23,23,23,23,23,23,23,23


In [None]:
obs_2018 = get_obs(year=2018, taxon='actinopterygii')
df_obs_2018, df_photos_2018 = get_dfs(obs_2018)
df_photos_2018.groupby(by='taxon_name').count().sort_values(by='id', ascending = False).head(15)

Generando lista de observaciones:
Número de elementos: 200
Número de elementos: 400
Número de elementos: 600
Número de elementos: 679


Unnamed: 0_level_0,id,photos.id,iconic_taxon,photos.medium_url,user_login,latitude,longitude,path
taxon_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Serranus cabrilla,61,61,61,61,61,61,61,61
Tripterygion tripteronotum,52,52,52,52,52,52,52,52
Parablennius pilicornis,51,51,51,51,51,51,51,51
Thalassoma pavo,49,49,49,49,49,49,49,49
Coris julis,47,47,47,47,47,47,47,47
Diplodus sargus,44,44,44,44,44,44,44,44
Sarpa salpa,32,32,32,32,32,32,32,32
Chromis chromis,30,30,30,30,30,30,30,30
Diplodus vulgaris,29,29,29,29,29,28,28,29
Mullus surmuletus,20,20,20,20,20,20,20,20


In [None]:
df_photos_2021 = df_photos_2021.groupby(by='taxon_name').count().sort_values(by='id', ascending = False).head(15).reset_index()[['taxon_name', 'id']]
df_photos_2020 = df_photos_2020.groupby(by='taxon_name').count().sort_values(by='id', ascending = False).head(15).reset_index()[['taxon_name', 'id']]
df_photos_2019 = df_photos_2019.groupby(by='taxon_name').count().sort_values(by='id', ascending = False).head(15).reset_index()[['taxon_name', 'id']]
df_photos_2018 = df_photos_2018.groupby(by='taxon_name').count().sort_values(by='id', ascending = False).head(15).reset_index()[['taxon_name', 'id']]

In [None]:
df_photos_2021[df_photos_2021['taxon_name'] == 'Diplodus sargus'].sort_values(by='id', ascending = False)

Unnamed: 0,id,photos.id,iconic_taxon,taxon_name,photos.medium_url,user_login,latitude,longitude,path
0,327418,418714,actinopterygii,Diplodus sargus,http://natusfera.gbif.es/attachments/local_pho...,reyes33,36.835000,-6.350000,327418_418714.jpg
1,327418,418715,actinopterygii,Diplodus sargus,http://natusfera.gbif.es/attachments/local_pho...,reyes33,36.835000,-6.350000,327418_418715.jpg
2,327418,418716,actinopterygii,Diplodus sargus,http://natusfera.gbif.es/attachments/local_pho...,reyes33,36.835000,-6.350000,327418_418716.jpg
27,325764,416592,actinopterygii,Diplodus sargus,http://natusfera.gbif.es/attachments/local_pho...,laurabiomar,41.183495,1.555558,325764_416592.jpg
47,325341,416176,actinopterygii,Diplodus sargus,http://natusfera.gbif.es/attachments/local_pho...,andrea,41.369901,2.191172,325341_416176.jpg
...,...,...,...,...,...,...,...,...,...
4386,300076,386654,actinopterygii,Diplodus sargus,http://natusfera.gbif.es/attachments/local_pho...,salvatorecoco,37.556781,15.153637,300076_386654.jpg
4429,299883,386426,actinopterygii,Diplodus sargus,http://natusfera.gbif.es/attachments/local_pho...,andrea,41.376706,2.193674,299883_386426.jpg
4489,299662,386178,actinopterygii,Diplodus sargus,http://natusfera.gbif.es/attachments/local_pho...,victoria_christine,41.376893,2.193387,299662_386178.jpg
4490,299662,386179,actinopterygii,Diplodus sargus,http://natusfera.gbif.es/attachments/local_pho...,victoria_christine,41.376893,2.193387,299662_386179.jpg


In [None]:
df = pd.merge(df_photos_2021, df_photos_2020, on='taxon_name', how='outer', suffixes=('_2021', '_2020'))
df = pd.merge(df, df_photos_2019, on='taxon_name', how='outer')
df = pd.merge(df, df_photos_2018, on='taxon_name', how='outer')
df = df.fillna(0)

In [None]:
df['sum_total'] = df['id_2021']+df['id_2020']+df['id_x']+df['id_y']

In [None]:
df

Unnamed: 0,taxon_name,id_2021,id_2020,id_x,id_y,sum_total
0,Parablennius pilicornis,253.0,357.0,91.0,51.0,752.0
1,Tripterygion tripteronotum,207.0,0.0,18.0,52.0,277.0
2,Diplodus vulgaris,199.0,305.0,49.0,29.0,582.0
3,Thalassoma pavo,182.0,277.0,81.0,49.0,589.0
4,Chromis chromis,179.0,239.0,42.0,30.0,490.0
5,Coris julis,170.0,273.0,70.0,47.0,560.0
6,Sarpa salpa,160.0,365.0,59.0,32.0,616.0
7,Diplodus sargus,160.0,215.0,23.0,44.0,442.0
8,Mullus surmuletus,145.0,399.0,31.0,20.0,595.0
9,Gobius incognitus,123.0,96.0,33.0,20.0,272.0


In [None]:
df.sort_values(by='sum_total', ascending= False).head(15)

Unnamed: 0,taxon_name,id_2021,id_2020,id_x,id_y,sum_total
0,Parablennius pilicornis,253.0,357.0,91.0,51.0,752.0
6,Sarpa salpa,160.0,365.0,59.0,32.0,616.0
8,Mullus surmuletus,145.0,399.0,31.0,20.0,595.0
3,Thalassoma pavo,182.0,277.0,81.0,49.0,589.0
2,Diplodus vulgaris,199.0,305.0,49.0,29.0,582.0
5,Coris julis,170.0,273.0,70.0,47.0,560.0
4,Chromis chromis,179.0,239.0,42.0,30.0,490.0
7,Diplodus sargus,160.0,215.0,23.0,44.0,442.0
15,Oblada melanura,0.0,187.0,106.0,0.0,293.0
1,Tripterygion tripteronotum,207.0,0.0,18.0,52.0,277.0


In [None]:
df.sort_values(by='sum_total', ascending= False).head(15)['taxon_name'].unique()

array(['Parablennius pilicornis', 'Sarpa salpa', 'Mullus surmuletus',
       'Thalassoma pavo', 'Diplodus vulgaris', 'Coris julis',
       'Chromis chromis', 'Diplodus sargus', 'Oblada melanura',
       'Tripterygion tripteronotum', 'Gobius incognitus',
       'Serranus cabrilla', 'Symphodus roissali', 'Symphodus tinca',
       'Diplodus cervinus'], dtype=object)

In [None]:
df_photos[df_photos['taxon_name'] == esp]

Unnamed: 0,id,photos.id,iconic_taxon,taxon_name,photos.medium_url,user_login,latitude,longitude,path
58,260236,335991,actinopterygii,Parablennius pilicornis,http://natusfera.gbif.es/attachments/local_pho...,okeanoslife,41.417496,2.236596,260236_335991.jpg
59,260236,335992,actinopterygii,Parablennius pilicornis,http://natusfera.gbif.es/attachments/local_pho...,okeanoslife,41.417496,2.236596,260236_335992.jpg
144,101168,132483,actinopterygii,Parablennius pilicornis,http://natusfera.gbif.es/attachments/local_pho...,anellides,41.607314,2.647511,101168_132483.jpg
146,101166,132481,actinopterygii,Parablennius pilicornis,http://natusfera.gbif.es/attachments/local_pho...,anellides,41.607314,2.647511,101166_132481.jpg
175,95810,124334,actinopterygii,Parablennius pilicornis,http://natusfera.gbif.es/attachments/local_pho...,anellides,41.376828,2.193171,95810_124334.jpg
183,95786,124308,actinopterygii,Parablennius pilicornis,http://natusfera.gbif.es/attachments/local_pho...,anellides,41.376828,2.193171,95786_124308.jpg
206,92562,119732,actinopterygii,Parablennius pilicornis,http://natusfera.gbif.es/attachments/local_pho...,anellides,41.607127,2.645525,92562_119732.jpg
210,92534,119698,actinopterygii,Parablennius pilicornis,http://natusfera.gbif.es/attachments/local_pho...,anellides,41.376411,2.192131,92534_119698.jpg
236,87691,114230,actinopterygii,Parablennius pilicornis,http://natusfera.gbif.es/attachments/local_pho...,jaume-piera,40.903596,0.826967,87691_114230.jpg
237,87480,113958,actinopterygii,Parablennius pilicornis,http://natusfera.gbif.es/attachments/local_pho...,jaume-piera,40.910484,0.830697,87480_113958.jpg


In [None]:
years = [2021, 2020, 2019, 2018]

folders = [ 'all_2021_actinopterygii', 
                'all_2020_actinopterygii', 
                'all_2019_actinopterygii', 
                'all_2018_actinopterygii']

especies = ['Parablennius pilicornis', 
                'Sarpa salpa', 
                'Mullus surmuletus',
                'Thalassoma pavo',
                'Diplodus vulgaris', 
                'Coris julis',
                'Chromis chromis', 
                'Diplodus sargus', 
                'Oblada melanura',
                'Tripterygion tripteronotum', 
                'Gobius incognitus',
                'Serranus cabrilla', 
                'Symphodus roissali', 
                'Symphodus tinca',
                'Diplodus cervinus']

for fld, year in zip(folders, years):

  for esp in especies:

    obs = get_obs(year=year, taxon='actinopterygii')
    df_obs, df_photos = get_dfs(obs)

    photos = df_photos[df_photos['taxon_name'] == esp]['path']

    for photo in photos:

      path_img = os.path.join('/content/drive/MyDrive/natusphera', f'{fld}', str(photo))
          
      img = get_im_cv2(path_img)
      X_train.append(img)
      X_train_id.append(esp)
      y_train.append(esp)

In [None]:
years = [2018]

folders = ['all_2018_actinopterygii']

especies = ['Parablennius pilicornis']

for fld, year in zip(folders, years):

  for esp in especies:

    obs = get_obs(year=year, taxon='actinopterygii')
    df_obs, df_photos = get_dfs(obs)

    photos = df_photos[df_photos['taxon_name'] == esp]['path']

    for photo in photos:

      path_img = os.path.join('/content/drive/MyDrive/natusphera', f'{fld}', str(photo))

      print(path_img)   

Generando lista de observaciones:
Número de elementos: 200
Número de elementos: 400
Número de elementos: 600
Número de elementos: 679
/content/drive/MyDrive/natusphera/all_2018_actinopterygii/260236_335991.jpg
/content/drive/MyDrive/natusphera/all_2018_actinopterygii/260236_335992.jpg
/content/drive/MyDrive/natusphera/all_2018_actinopterygii/101168_132483.jpg
/content/drive/MyDrive/natusphera/all_2018_actinopterygii/101166_132481.jpg
/content/drive/MyDrive/natusphera/all_2018_actinopterygii/95810_124334.jpg
/content/drive/MyDrive/natusphera/all_2018_actinopterygii/95786_124308.jpg
/content/drive/MyDrive/natusphera/all_2018_actinopterygii/92562_119732.jpg
/content/drive/MyDrive/natusphera/all_2018_actinopterygii/92534_119698.jpg
/content/drive/MyDrive/natusphera/all_2018_actinopterygii/87691_114230.jpg
/content/drive/MyDrive/natusphera/all_2018_actinopterygii/87480_113958.jpg
/content/drive/MyDrive/natusphera/all_2018_actinopterygii/83453_109031.jpg
/content/drive/MyDrive/natusphera/all

In [None]:
      img = get_im_cv2(path_img)
      X_train.append(img)
      X_train_id.append(esp)
      y_train.append(esp)

In [None]:
obs = get_obs(year=2018, taxon='actinopterygii')
df_obs, df_photos = get_dfs(obs)

Generando lista de observaciones:
Número de elementos: 200
Número de elementos: 400
Número de elementos: 600
Número de elementos: 679


In [None]:
df_photos.head()

Unnamed: 0,id,photos.id,iconic_taxon,taxon_name,photos.medium_url,user_login,latitude,longitude,path
0,316012,406145,actinopterygii,Pomatomus saltatrix,http://natusfera.gbif.es/attachments/local_pho...,reyes33,36.835,-6.35,316012_406145.jpg
1,313906,403768,actinopterygii,Scophthalmus rhombus,http://natusfera.gbif.es/attachments/local_pho...,reyes33,36.835,-6.35,313906_403768.jpg
2,313906,403769,actinopterygii,Scophthalmus rhombus,http://natusfera.gbif.es/attachments/local_pho...,reyes33,36.835,-6.35,313906_403769.jpg
3,313906,403770,actinopterygii,Scophthalmus rhombus,http://natusfera.gbif.es/attachments/local_pho...,reyes33,36.835,-6.35,313906_403770.jpg
4,313906,403771,actinopterygii,Scophthalmus rhombus,http://natusfera.gbif.es/attachments/local_pho...,reyes33,36.835,-6.35,313906_403771.jpg


In [None]:
df_photos[df_photos['taxon_name'] == 'Sarpa salpa']['photos.id']

117    134530
126    132576
142    132485
174    124336
209    119725
242    111707
263    108686
268    108409
290    107881
291    107882
308    107849
309    107850
316    107841
317    107842
328    107809
329    107808
330    107810
331    107811
377    105766
378    105767
379    105768
380    105769
381    105770
382    105771
383    105772
384    105773
385    105774
386    105775
493    101004
494    101177
498    100998
647     76048
Name: photos.id, dtype: Int64