# FLICKR image scraping with API key

In [2]:
# Flickr API key, specific for this project
API_KEY = "key_string_goes_here"


In [3]:
import requests
import json
import os

def search_and_download(search_terms, num_images_per_folder):
    # Create a directory to store the images
    if not os.path.exists("image_scraping"):
        os.makedirs("image_scraping")
 
    # Search for images with a Creative Commons license on Flickr
    for search_term in search_terms:
        search_term_formatted = search_term.replace('_', '+')
        search_url = f"https://api.flickr.com/services/rest/?method=flickr.photos.search&api_key={API_KEY}&text={search_term_formatted}&per_page={num_images_per_folder}&format=json&nojsoncallback=1&sort=relevance&license=4,5,6,7"
        response = requests.get(search_url)
        data = json.loads(response.text)
        if data["stat"] == "ok":
            photos = data["photos"]["photo"]
            if not os.path.exists(f"image_scraping/{search_term}"):
                os.makedirs(f"image_scraping/{search_term}")
            for i, photo in enumerate(photos):
                photo_url = f"https://farm{photo['farm']}.staticflickr.com/{photo['server']}/{photo['id']}_{photo['secret']}.jpg"
                response = requests.get(photo_url)
                with open(f"image_scraping/{search_term}/{i}.jpg", "wb") as f:
                    f.write(response.content)
        else:
            print("An error occurred while searching for images on Flickr.")

# Example usage
search_terms = ["dog", "cat", "bird"]
#search_terms = class_names[13:]

num_images_per_folder = 50
search_and_download(search_terms, num_images_per_folder)

# IUCN red list API

In [1]:
class_names = ['A73EGS-P', 'CUNWCB-Y', 'Istiophorus_platypterus', 'P1ROZC-Z', 'PQV7DP-S', 'acanthaluteres_brownii', 'acanthaluteres_spilomelanurus', 'acanthaluteres_vittiger', 'acanthistius_cinctus', 'acanthopagrus_australis', 'acanthopagrus_berda', 'acanthopagrus_latus', 'achoerodus_gouldii', 'achoerodus_viridis', 'acreichthys_tomentosus', 'aesopia_cornuta', 'aethaloperca_rogaa', 'alectis_ciliaris', 'alectis_indica', 'alepes_kleinii', 'aluterus_monoceros', 'aluterus_scriptus', 'amanses_scopas', 'anampses_caeruleopunctatus', 'anampses_elegans', 'anampses_femininus', 'anampses_geographicus', 'anampses_lennardi', 'anampses_melanurus', 'anampses_meleagrides', 'anampses_neoguinaicus', 'anampses_twistii', 'anodontostoma_chacunda', 'anyperodon_leucogrammicus', 'aphareus_furca', 'aphareus_rutilans', 'aprion_virescens', 'argyrops_spinifer', 'aseraggodes_melanostictus', 'atractoscion_aequidens', 'atule_mate', 'auxis_rochei', 'auxis_thazard', 'bathylagichthys_greyae', 'beryx_decadactylus', 'bodianus_anthioides', 'bodianus_axillaris', 'bodianus_bilunulatus', 'bodianus_bimaculatus', 'bodianus_diana', 'bodianus_loxozonus', 'bodianus_mesothorax', 'bodianus_perditio', 'bodianus_unimaculatus', 'bodianus_vulpinus', 'bothus_mancus', 'bothus_myriaster', 'bothus_pantherinus', 'brachaluteres_jacksonianus', 'brachirus_orientalis', 'caesioperca_lepidopterus', 'cantherhines_dumerilii', 'cantherhines_fronticinctus', 'cantherhines_pardalis', 'cantheschenia_grandisquamis', 'caprodon_longimanus', 'caprodon_schlegelii', 'carangoides_caeruleopinnatus', 'carangoides_chrysophrys', 'carangoides_equula', 'carangoides_ferdau', 'carangoides_fulvoguttatus', 'carangoides_hedlandensis', 'carangoides_malabaricus', 'carangoides_orthogrammus', 'carangoides_plagiotaenia', 'caranx_ignobilis', 'caranx_lugubris', 'caranx_melampygus', 'caranx_sexfasciatus', 'carcharhinus_albimarginatus', 'carcharhinus_amblyrhynchos', 'carcharhinus_falciformis', 'carcharhinus_galapagensis', 'carcharhinus_limbatus', 'carcharhinus_melanopterus', 'carcharhinus_obscurus', 'carcharhinus_plumbeus', 'carcharhinus_sorrah', 'centroberyx_affinis', 'centrogenys_vaigiensis', 'centroscymnus_coelolepis', 'cephalopholis_argus', 'cephalopholis_boenak', 'cephalopholis_cyanostigma', 'cephalopholis_formosa', 'cephalopholis_igarashiensis', 'cephalopholis_leopardus', 'cephalopholis_microprion', 'cephalopholis_miniata', 'cephalopholis_sexmaculata', 'cephalopholis_sonnerati', 'cephalopholis_spiloparaea', 'chascanopsetta_lugubris', 'cheilinus_chlorourus', 'cheilinus_fasciatus', 'cheilinus_oxycephalus', 'cheilinus_trilobatus', 'cheilinus_undulatus', 'cheilio_inermis', 'cheilodactylus_ephippium', 'cheilodactylus_fuscus', 'cheilodactylus_spectabilis', 'cheilodactylus_vestitus', 'chelidonichthys_kumu', 'chirocentrus_dorab', 'chirocentrus_nudus', 'choerodon_anchorago', 'choerodon_cauteroma', 'choerodon_cyanodus', 'choerodon_fasciatus', 'choerodon_graphicus', 'choerodon_jordani', 'choerodon_rubescens', 'choerodon_schoenleinii', 'choerodon_venustus', 'choerodon_vitta', 'choerodon_zamboangae', 'chromileptes_altivelis', 'cirrhilabrus_bathyphilus', 'cirrhilabrus_condei', 'cirrhilabrus_cyanopleura', 'cirrhilabrus_exquisitus', 'cirrhilabrus_laboutei', 'cirrhilabrus_punctatus', 'cirrhilabrus_scottorum', 'cirrhilabrus_temminckii', 'coris_aygula', 'coris_batuensis', 'coris_bulbifrons', 'coris_caudimacula', 'coris_dorsomacula', 'coris_gaimard', 'coris_picta', 'coris_pictoides', 'coris_sandeyeri', 'crenimugil_crenilabis', 'cymbacephalus_nematophthalmus', 'cymolutes_praetextatus', 'cymolutes_torquatus', 'cynoglossus_puncticeps', 'cyttopsis_rosea', 'dactylophora_nigricans', 'decapterus_macrosoma', 'decapterus_russelli', 'diproctacanthus_xanthurus', 'dotalabrus_aurantiacus', 'elagatis_bipinnulata', 'epibulus_insidiator', 'epinephelus_areolatus', 'epinephelus_bleekeri', 'epinephelus_chlorostigma', 'epinephelus_coeruleopunctatus', 'epinephelus_coioides', 'epinephelus_corallicola', 'epinephelus_cyanopodus', 'epinephelus_epistictus', 'epinephelus_fasciatus', 'epinephelus_fuscoguttatus', 'epinephelus_hexagonatus', 'epinephelus_howlandi', 'epinephelus_lanceolatus', 'epinephelus_latifasciatus', 'epinephelus_macrospilos', 'epinephelus_maculatus', 'epinephelus_melanostigma', 'epinephelus_merra', 'epinephelus_morrhua', 'epinephelus_multinotatus', 'epinephelus_ongus', 'epinephelus_polyphekadion', 'epinephelus_quoyanus', 'epinephelus_radiatus', 'epinephelus_retouti', 'epinephelus_rivulatus', 'epinephelus_sexfasciatus', 'epinephelus_spilotoceps', 'epinephelus_tauvina', 'epinephelus_undulatostriatus', 'etelis_carbunculus', 'etelis_coruscans', 'eubalichthys_cyanoura', 'eubalichthys_mosaicus', 'eupetrichthys_angustipes', 'euthynnus_affinis', 'evistias_acutirostris', 'gempylus_serpens', 'gnathanodon_speciosus', 'gnathodentex_aureolineatus', 'gracila_albomarginata', 'gymnocranius_audleyi', 'gymnocranius_euanus', 'gymnocranius_grandoculis', 'gymnocranius_microdon', 'gymnosarda_unicolor', 'halichoeres_argus', 'halichoeres_biocellatus', 'halichoeres_chloropterus', 'halichoeres_chrysus', 'halichoeres_hartzfeldii', 'halichoeres_hortulanus', 'halichoeres_leucurus', 'halichoeres_margaritaceus', 'halichoeres_marginatus', 'halichoeres_melanochir', 'halichoeres_melanurus', 'halichoeres_melasmapomus', 'halichoeres_miniatus', 'halichoeres_nebulosus', 'halichoeres_nigrescens', 'halichoeres_scapularis', 'halichoeres_trimaculatus', 'harriotta_raleighana', 'hemigymnus_fasciatus', 'hemigymnus_melapterus', 'hemiramphus_far', 'herklotsichthys_quadrimaculatus', 'hologymnosus_annulatus', 'hologymnosus_doliatus', 'hyporhamphus_affinis', 'hyporhamphus_dussumieri', 'inegocia_japonica', 'johnius_borneensis', 'katsuwonus_pelamis', 'labrichthys_unilineatus', 'labroides_bicolor', 'labroides_dimidiatus', 'labroides_pectoralis', 'labropsis_australis', 'labropsis_manabei', 'labropsis_xanthonota', 'latridopsis_forsteri', 'lepidocybium_flavobrunneum', 'leptojulis_cyanopleura', 'lethrinus_amboinensis', 'lethrinus_atkinsoni', 'lethrinus_erythracanthus', 'lethrinus_genivittatus', 'lethrinus_harak', 'lethrinus_lentjan', 'lethrinus_microdon', 'lethrinus_miniatus', 'lethrinus_nebulosus', 'lethrinus_obsoletus', 'lethrinus_olivaceus', 'lethrinus_ornatus', 'lethrinus_rubrioperculatus', 'lethrinus_semicinctus', 'lethrinus_variegatus', 'lethrinus_xanthochilus', 'liopropoma_mitratum', 'liopropoma_susumi', 'liza_subviridis', 'liza_vaigiensis', 'lniistius_aneitensis', 'lniistius_pavo', 'lutjanus_adetii', 'lutjanus_argentimaculatus', 'lutjanus_biguttatus', 'lutjanus_bohar', 'lutjanus_carponotatus', 'lutjanus_decussatus', 'lutjanus_ehrenbergii', 'lutjanus_erythropterus', 'lutjanus_fulviflamma', 'lutjanus_fulvus', 'lutjanus_gibbus', 'lutjanus_johnii', 'lutjanus_kasmira', 'lutjanus_lemniscatus', 'lutjanus_lutjanus', 'lutjanus_malabaricus', 'lutjanus_monostigma', 'lutjanus_quinquelineatus', 'lutjanus_rivulatus', 'lutjanus_russellii', 'lutjanus_sebae', 'lutjanus_semicinctus', 'lutjanus_semicinctus_quoy', 'lutjanus_timoriensis', 'lutjanus_vitta', 'macolor_macularis', 'macolor_niger', 'macropharyngodon_choati', 'macropharyngodon_kuiteri', 'macropharyngodon_meleagris', 'macropharyngodon_negrosensis', 'macropharyngodon_ornatus', 'megalaspis_cordyla', 'meuschenia_australis', 'meuschenia_freycineti', 'meuschenia_galii', 'meuschenia_hippocrepis', 'meuschenia_scaber', 'meuschenia_trachylepis', 'monacanthus_chinensis', 'monotaxis_grandoculis', 'mugim_cephalus', 'naucrates_ductor', 'negaprion_acutidens', 'nemadactylus_douglasii', 'nemipterus_furcosus', 'nemipterus_hexodon', 'nemipterus_peronii', 'netuma_thalassina', 'nibea_soldado', 'notolabrus_fucicola', 'notolabrus_gymnogenis', 'notolabrus_tetricus', 'notorynchus_cepedianus', 'novaculichthys_taeniourus', 'novaculoides_macrolepidotus', 'oedalechilus_labiosus', 'ophthalmolepis_lineolatus', 'otolithes_ruber', 'oxycheilinus_bimaculatus', 'oxycheilinus_celebicus', 'oxycheilinus_digrammus', 'oxycheilinus_unifasciatus', 'oxymonacanthus_longirostris', 'pagrus_auratus', 'paracaesio_kusakarii', 'paracheilinus_filamentosus', 'paraluteres_prionurus', 'paramonacanthus_choirocephalus', 'paraplagusia_bilineata', 'parastromateus_niger', 'pardachirus_hedleyi', 'pardachirus_pavoninus', 'pentapodus_aureofasciatus', 'pentapodus_paradiseus', 'pentapodus_vitta_quoy', 'pervagor_alternans', 'pervagor_aspricaudus', 'pervagor_janthinosoma', 'pervagor_melanocephalus', 'pervagor_nigrolineatus', 'pinjalo_lewisi', 'platycephalus_indicus', 'plectranthias_longimanus', 'plectranthias_nanus', 'plectranthias_winniensis', 'plectropomus_areolatus', 'plectropomus_laevis', 'plectropomus_leopardus', 'plectropomus_maculatus', 'plectropomus_oligacanthus', 'plotosus_lineatus', 'pristipomoides_argyrogrammicus', 'pristipomoides_auricilla', 'pristipomoides_filamentosus', 'pristipomoides_flavipinnis', 'pristipomoides_sieboldii', 'pristipomoides_zonatus', 'promethichthys_prometheus', 'protonibea_diacanthus', 'psettodes_erumei', 'pseudalutarius_nasicornis', 'pseudanthias_bicolor', 'pseudanthias_cooperi', 'pseudanthias_dispar', 'pseudanthias_fasciatus', 'pseudanthias_huchtii', 'pseudanthias_hypselosoma', 'pseudanthias_lori', 'pseudanthias_luzonensis', 'pseudanthias_pictilis', 'pseudanthias_pleurotaenia', 'pseudanthias_rubrizonatus', 'pseudanthias_sheni', 'pseudanthias_smithvanizi', 'pseudanthias_squamipinnis', 'pseudanthias_tuka', 'pseudanthias_ventralis', 'pseudocaranx_dentex', 'pseudocarcharias_kamoharai', 'pseudocheilinus_evanidus', 'pseudocheilinus_hexataenia', 'pseudocheilinus_ocellatus', 'pseudocheilinus_octotaenia', 'pseudodax_moluccanus', 'pseudojuloides_cerasinus', 'pseudolabrus_biserialis', 'pseudolabrus_guentheri', 'pseudolabrus_luculentus', 'pseudorhombus_argus', 'pseudorhombus_arsius', 'pseudorhombus_elevatus', 'pteragogus_cryptus', 'pteragogus_enneacanthus', 'pteragogus_flagellifer', 'rastrelliger_kanagurta', 'retropinna_semoni', 'rhabdosargus_sarba', 'rhincodon_typus', 'rhizoprionodon_acutus', 'ruvettus_pretiosus', 'samaris_cristatus', 'samariscus_triocellatus', 'sarda_orientalis', 'sardinella_albella', 'sardinella_gibbosa', 'sardinops_sagax', 'scaevius_milii', 'scolopsis_affinis', 'scolopsis_bilineata', 'scolopsis_lineata', 'scolopsis_margaritifer', 'scolopsis_monogramma', 'scolopsis_trilineata', 'scolopsis_vosmeri', 'scolopsis_xenochrous', 'scomberoides_commersonnianus', 'scomberoides_lysan', 'scomberomorus_commerson', 'selar_crumenophthalmus', 'selaroides_leptolepis', 'seriola_dumerili', 'seriola_hippos', 'seriola_rivoliana', 'seriolina_nigrofasciata', 'serranocirrhitus_latus', 'sillago_ciliata', 'sillago_sihama', 'soleichthys_heterorhinos', 'sphyraena_barracuda', 'sphyraena_forsteri', 'sphyraena_jello', 'sphyraena_obtusata', 'stegostoma_fasciatum', 'stethojulis_bandanensis', 'stethojulis_interrupta', 'stethojulis_strigiventer', 'stethojulis_trilineata', 'stolephorus_waitei', 'suezichthys_arquatus', 'suezichthys_cyanolaemus', 'suezichthys_gracilis', 'symphorichthys_spilurus', 'symphorus_nematophorus', 'thalassoma_amblycephalum', 'thalassoma_hardwicke', 'thalassoma_jansenii', 'thalassoma_lunare', 'thalassoma_lutescens', 'thalassoma_nigrofasciatum', 'thalassoma_purpureum', 'thalassoma_quinquevittatum', 'thalassoma_trilobatum', 'thryssa_baelama', 'thryssa_hamiltonii', 'thunnus_alalunga', 'thunnus_albacares', 'thysanophrys_celebica', 'thysanophrys_chiltonae', 'trachichthys_australis', 'trachinotus_baillonii', 'trachinotus_blochii', 'trachinotus_botla', 'trachypoma_macracanthus', 'triaenodon_obesus', 'uraspis_secunda', 'valamugil_cunnesius', 'valamugil_engeli', 'valamugil_seheli', 'variola_albimarginata', 'variola_louti', 'wattsia_mossambica', 'wetmorella_albofasciata', 'wetmorella_nigropinnata', 'xiphocheilus_typus', 'zenarchopterus_dispar', 'zeus_faber']




In [267]:
# IUCN red list API KEY
token = "key_string_goes_here"

In [301]:
# import species information from IUCN red list website via API

import requests

def get_species_info(species_name, token):
    search_term_formatted = species_name.replace('_', ' ')
    species_url = f"http://apiv3.iucnredlist.org/api/v3/species/{search_term_formatted}?token={token}"
    response = requests.get(species_url)
    if response.status_code == 200:
        data = response.json()
        result = data.get("result")
        if result:
            return result[0]
        else:
            return "Information not found"
    else:
        return "Error retrieving information"
    

species_list = class_names

species_info = [get_species_info(species.replace(" ", "_"), token) for species in species_list]

df = pd.DataFrame(species_info, columns=['JSON_column'])


Unnamed: 0,JSON_column
0,Information not found
1,Information not found


In [303]:
df.head(5)


Unnamed: 0,JSON_column
0,Information not found
1,Information not found
2,"{'taxonid': 170338, 'scientific_name': 'Istiop..."
3,Information not found
4,Information not found


In [306]:
df.to_csv('data/fish_data/red_list_species.csv')

In [302]:
# import text information form the IUCN red list
import requests

def get_species_info(species_name, token):
    search_term_formatted = species_name.replace('_', ' ')
    species_url = f"http://apiv3.iucnredlist.org/api/v3/species/narrative/{search_term_formatted}?token={token}"
    response = requests.get(species_url)
    if response.status_code == 200:
        data = response.json()
        result = data.get("result")
        if result:
            return result[0]
        else:
            return "Information not found"
    else:
        return "Error retrieving information"
    

species_list = class_names

species_info = [get_species_info(species.replace(" ", "_"), token) for species in species_list]

df_2 = pd.DataFrame(species_info, columns=['JSON_column'])

Unnamed: 0,JSON_column
0,Information not found
1,Information not found


In [305]:
df_2.head(5)


Unnamed: 0,JSON_column
0,Information not found
1,Information not found
2,"{'species_id': 170338, 'taxonomicnotes': 'Ther..."
3,Information not found
4,Information not found


In [307]:
df_2.to_csv('data/fish_data/red_list_species_text.csv')

### IUCN JSON exploding - species information

In [None]:
# Formatting the column display in jupyter labs
pd.set_option('display.max_columns', None)
pd.set_option("display.max_rows", 200)
pd.set_option('display.max_colwidth', None)

In [61]:
df_testi_1 = pd.read_csv('data/fish_data/red_list_species.csv')
df_testi_2 = pd.read_csv('data/fish_data/red_list_species_text.csv')

In [62]:
df_testi_1['JSON_column'][0]

'Information not found'

In [63]:
# replace errors with JSON dummies
empty_json = "{'taxonid': 0, 'scientific_name': 'NA', 'kingdom': 'NA', 'phylum': 'NA', 'class': 'NA', 'order': 'NA', 'family': 'NA', 'genus': 'NA', 'main_common_name': 'NA', 'authority': 'NA', 'published_year': 'NA', 'assessment_date': 'NA', 'category': 'NA', 'criteria': 'NA', 'population_trend': 'NA', 'marine_system': 'NA', 'freshwater_system': 'NA', 'terrestrial_system': 'NA', 'assessor': 'NA', 'reviewer': 'NA', 'aoo_km2': 'NA', 'eoo_km2': 'NA', 'elevation_upper': 'NA', 'elevation_lower': 'NA', 'depth_upper': 'NA', 'depth_lower': 'NA', 'errata_flag': 'NA', 'errata_reason': 'NA', 'amended_flag': 'NA', 'amended_reason': 'NA'}"
empty_json_2 ="{'species_id': 0, 'taxonomicnotes': 'NA', 'rationale': 'NA', 'geographicrange':'NA', 'population':'NA', 'populationtrend':'NA', 'habitat\': 'NA', 'threats': 'NA', 'conservationmeasures': 'NA', 'usetrade': 'NA'}"


error_1 = "Information not found"
error_2 = "Error retrieving information"

df_testi_1.JSON_column.replace([error_1, error_2], [empty_json, empty_json], inplace=True)
df_testi_2.JSON_column.replace([error_1, error_2], [empty_json_2,empty_json_2], inplace=True)

In [64]:
df_testi_1['JSON_column'][49]

'{\'taxonid\': 187587, \'scientific_name\': \'Bodianus diana\', \'kingdom\': \'ANIMALIA\', \'phylum\': \'CHORDATA\', \'class\': \'ACTINOPTERYGII\', \'order\': \'PERCIFORMES\', \'family\': \'LABRIDAE\', \'genus\': \'Bodianus\', \'main_common_name\': "Indian Diana\'s Hogfish", \'authority\': \'(Lacepède, 1801)\', \'published_year\': 2010, \'assessment_date\': \'2008-03-12\', \'category\': \'LC\', \'criteria\': None, \'population_trend\': \'Unknown\', \'marine_system\': True, \'freshwater_system\': False, \'terrestrial_system\': False, \'assessor\': \'Russell, B.\', \'reviewer\': \'Sadovy, Y. & Carpenter, K.E.\', \'aoo_km2\': None, \'eoo_km2\': None, \'elevation_upper\': None, \'elevation_lower\': None, \'depth_upper\': 9, \'depth_lower\': 30, \'errata_flag\': None, \'errata_reason\': None, \'amended_flag\': None, \'amended_reason\': None}'

In [65]:
# workaround for inconsistent JSON formatting


df_testi_1['JSON_column'] = df_testi_1['JSON_column'].str.replace(r'\'\: \"',r"\'\: \'", regex = True)
df_testi_1['JSON_column'] = df_testi_1['JSON_column'].str.replace(r'", \'', r"', \'", regex=True)

df_testi_1['JSON_column'] = df_testi_1['JSON_column'].str.replace(r'"','öadfnvöjadfn', regex = True)


df_testi_1['JSON_column'] = df_testi_1['JSON_column'].str.replace(r'\'',r'\"', regex = True)
df_testi_1['JSON_column'] = df_testi_1['JSON_column'].str.replace(r"\\'",r'"', regex = True)
 

df_testi_1['JSON_column'] = df_testi_1['JSON_column'].str.replace('öadfnvöjadfn','\'', regex = True)

df_testi_1['JSON_column'] = df_testi_1['JSON_column'].str.replace( r"\\" ,'', regex = True)

df_testi_1['JSON_column'] = df_testi_1['JSON_column'].str.replace('\\\'',r"'", regex = True)


df_testi_1['JSON_column'] = df_testi_1['JSON_column'].str.replace('None','"None"', regex = True)
df_testi_1['JSON_column'] = df_testi_1['JSON_column'].str.replace('True',r'"True"', regex = True)
df_testi_1['JSON_column'] = df_testi_1['JSON_column'].str.replace('False','"False"', regex = True)

# replace specific context "double" colons with placeholders, then change outermost colons to single colons and replace placeholders again 

df_testi_1['JSON_column'] = df_testi_1['JSON_column'].str.replace('\"\, \"','aksFDBVÖsdbvöksdjbnv', regex = True)
df_testi_1['JSON_column'] = df_testi_1['JSON_column'].str.replace('\"\: \"','öoaerhgvöaoejnvnö', regex = True)

df_testi_1['JSON_column'] = df_testi_1['JSON_column'].str.replace('\{\"','äonväladfnvdLFKnbv#aldfnbäaldf', regex = True)
df_testi_1['JSON_column'] = df_testi_1['JSON_column'].str.replace('\"\:','ydfökvamdnfäljbhdkfnvakjsd', regex = True)
df_testi_1['JSON_column'] = df_testi_1['JSON_column'].str.replace('\, \"','öakdjfbvökadfbvydfkjbnvadk', regex = True)
df_testi_1['JSON_column'] = df_testi_1['JSON_column'].str.replace('\"\}','äadlfjbnvadäofbnadälfbv', regex = True)

df_testi_1['JSON_column'] = df_testi_1['JSON_column'].str.replace(r'\"', r"'", regex = True)



df_testi_1['JSON_column'] = df_testi_1['JSON_column'].str.replace('aksFDBVÖsdbvöksdjbnv', r'", "', regex = True)
df_testi_1['JSON_column'] = df_testi_1['JSON_column'].str.replace('öoaerhgvöaoejnvnö',r'": "', regex = True)

df_testi_1['JSON_column'] = df_testi_1['JSON_column'].str.replace('äonväladfnvdLFKnbv#aldfnbäaldf',r'{"', regex = True)
df_testi_1['JSON_column'] = df_testi_1['JSON_column'].str.replace('ydfökvamdnfäljbhdkfnvakjsd', r'":', regex = True)
df_testi_1['JSON_column'] = df_testi_1['JSON_column'].str.replace('öakdjfbvökadfbvydfkjbnvadk', r', "', regex = True)
df_testi_1['JSON_column'] = df_testi_1['JSON_column'].str.replace('äadlfjbnvadäofbnadälfbv', r'"}', regex = True)



In [66]:
df_testi_1['JSON_column'][49]

'{"taxonid": 187587, "scientific_name": "Bodianus diana", "kingdom": "ANIMALIA", "phylum": "CHORDATA", "class": "ACTINOPTERYGII", "order": "PERCIFORMES", "family": "LABRIDAE", "genus": "Bodianus", "main_common_name": "Indian Diana\'s Hogfish", "authority": "(Lacepède, 1801)", "published_year": 2010, "assessment_date": "2008-03-12", "category": "LC", "criteria": "None", "population_trend": "Unknown", "marine_system": "True", "freshwater_system": "False", "terrestrial_system": "False", "assessor": "Russell, B.", "reviewer": "Sadovy, Y. & Carpenter, K.E.", "aoo_km2": "None", "eoo_km2": "None", "elevation_upper": "None", "elevation_lower": "None", "depth_upper": 9, "depth_lower": 30, "errata_flag": "None", "errata_reason": "None", "amended_flag": "None", "amended_reason": "None"}'

In [67]:
# define function to explode json columns

def explode_json(df, col_name):
    ''' explodes json columns'''
    exploded = pd.json_normalize(df[col_name].apply(json.loads))
    return exploded


In [68]:
type(df_testi_1['JSON_column'][2])

str

In [37]:
df_testi_1['JSON_column'][44]

'{"taxonid": 198578, "scientific_name": "Beryx decadactylus", "kingdom": "ANIMALIA", "phylum": "CHORDATA", "class": "ACTINOPTERYGII", "order": "BERYCIFORMES", "family": "BERYCIDAE", "genus": "Beryx", "main_common_name": "Alfonsino", "authority": "Cuvier, 1829", "published_year": 2015, "assessment_date": "2013-01-29", "category": "LC", "criteria": "None", "population_trend": "Unknown", "marine_system": "True", "freshwater_system": "False", "terrestrial_system": "False", "assessor": "Iwamoto, T., Russell, B., Polanco Fernandez, A., McEachran, J.D. & Moore, J.", "reviewer": "Weller, S., Strongin, K., Carpenter, K.E. & Polidoro, B.", "aoo_km2": "None", "eoo_km2": "None", "elevation_upper": "None", "elevation_lower": "None", "depth_upper": 150, "depth_lower": 1000, "errata_flag": "None", "errata_reason": "None", "amended_flag": "None", "amended_reason": "None"}'

In [38]:
df_testi_1['JSON_column'][50]

'{"taxonid": 187744, "scientific_name": "Bodianus loxozonus", "kingdom": "ANIMALIA", "phylum": "CHORDATA", "class": "ACTINOPTERYGII", "order": "PERCIFORMES", "family": "LABRIDAE", "genus": "Bodianus", "main_common_name": "Blackfin Hogfish", "authority": "(Snyder, 1908)", "published_year": 2010, "assessment_date": "2008-04-12", "category": "LC", "criteria": "None", "population_trend": "Unknown", "marine_system": "True", "freshwater_system": "False", "terrestrial_system": "False", "assessor": "Russell, B.", "reviewer": "Sadovy, Y. & Carpenter, K.E.", "aoo_km2": "None", "eoo_km2": "None", "elevation_upper": "None", "elevation_lower": "None", "depth_upper": 3, "depth_lower": 50, "errata_flag": "None", "errata_reason": "None", "amended_flag": "None", "amended_reason": "None"}'

In [39]:
df_testi_1.dtypes

Unnamed: 0      int64
JSON_column    object
dtype: object

In [2]:
#df_testi_1['JSON_column'] = df_testi_1['JSON_column'].fillna(empty_json)

df_species_exploded = explode_json(df_testi_1, 'JSON_column')


In [3]:
df_species_exploded.to_csv('data/fish_data/red_list_species_exploded.csv')

In [4]:
df_species_exploded

### IUCN JSON exploding - description texts

In [None]:
# Formatting the column display in jupyter labs
pd.set_option('display.max_columns', None)
pd.set_option("display.max_rows", 200)
pd.set_option('display.max_colwidth', None)

In [44]:
df_testi_2 = pd.read_csv('data/fish_data/red_list_species_text.csv')

In [45]:
# define function to explode json columns

def explode_json(df, col_name):
    ''' explodes json columns'''
    exploded = pd.json_normalize(df[col_name].apply(json.loads))
    return exploded


In [46]:
df_testi_2['JSON_column'][40]

'{\'species_id\': 20256729, \'taxonomicnotes\': None, \'rationale\': \'This species<em>&#160;</em>ranges from South Africa to Japan and the Hawaiian Islands. This species&#160;occurs in coastal waters throughout its range. Although this species<em>&#160;</em>is not considered a highly commercial species, it is heavily exploited in some parts of its range and is also taken&#160;as bycatch of shrimp trawl fisheries. While this species may be overexploited in parts of its range, global declines are not suspected. Given its widespread distribution and relative abundance in many parts of its range, it is listed as Least Concern.\', \'geographicrange\': \'This species<em> </em>is broadly distributed throughout the Indo-West Pacific from South Africa to the Persian Gulf, including Europa, Madagascar, Reunion and the Seychelles, east to the Hawaiian Islands (Smith-Vaniz 1984) and French Polynesia, north to Japan (Gushiken 1984) and south to Australia (Western Australia to New South Wales) and 

In [47]:
# getting rid of some HTML code leftover in the description texts

html_leftovers = ['<span style="font-style: italic;">','</span>','<strong>', '</strong>','<u>', '</u>','<br/>','</em>','<em>','</p>','<p>','&#160;']

def delete_HTML(df,html_leftover,replacement):
        df['JSON_column'] = df['JSON_column'].str.replace(html_leftover,replacement, regex = True)
        return df

for html_leftover in html_leftovers:
    replacement = " "
    df_testi_2 = delete_HTML(df_testi_2, html_leftover, replacement)


In [48]:
df_testi_2['JSON_column'][40]

'{\'species_id\': 20256729, \'taxonomicnotes\': None, \'rationale\': \'This species<em>&#160;</em>ranges from South Africa to Japan and the Hawaiian Islands. This species&#160;occurs in coastal waters throughout its range. Although this species<em>&#160;</em>is not considered a highly commercial species, it is heavily exploited in some parts of its range and is also taken&#160;as bycatch of shrimp trawl fisheries. While this species may be overexploited in parts of its range, global declines are not suspected. Given its widespread distribution and relative abundance in many parts of its range, it is listed as Least Concern.\', \'geographicrange\': \'This species<em> </em>is broadly distributed throughout the Indo-West Pacific from South Africa to the Persian Gulf, including Europa, Madagascar, Reunion and the Seychelles, east to the Hawaiian Islands (Smith-Vaniz 1984) and French Polynesia, north to Japan (Gushiken 1984) and south to Australia (Western Australia to New South Wales) and 

In [49]:
# replace errors with JSON dummies
empty_json_2 = "{'species_id': 0, 'taxonomicnotes': 'NA', 'rationale': 'NA', 'geographicrange': 'NA', 'population': 'NA', 'populationtrend': 'NA', 'habitat': 'NA', 'threats': 'NA', 'conservationmeasures': 'NA', 'usetrade': 'NA'}"

error_1 = "Information not found"
error_2 = "Error retrieving information"

df_testi_2.JSON_column.replace([error_1, error_2], [empty_json_2,empty_json_2], inplace=True)

In [50]:
df_testi_2['JSON_column'][40]

'{\'species_id\': 20256729, \'taxonomicnotes\': None, \'rationale\': \'This species<em>&#160;</em>ranges from South Africa to Japan and the Hawaiian Islands. This species&#160;occurs in coastal waters throughout its range. Although this species<em>&#160;</em>is not considered a highly commercial species, it is heavily exploited in some parts of its range and is also taken&#160;as bycatch of shrimp trawl fisheries. While this species may be overexploited in parts of its range, global declines are not suspected. Given its widespread distribution and relative abundance in many parts of its range, it is listed as Least Concern.\', \'geographicrange\': \'This species<em> </em>is broadly distributed throughout the Indo-West Pacific from South Africa to the Persian Gulf, including Europa, Madagascar, Reunion and the Seychelles, east to the Hawaiian Islands (Smith-Vaniz 1984) and French Polynesia, north to Japan (Gushiken 1984) and south to Australia (Western Australia to New South Wales) and 

In [51]:
# format incosistent JSON column

# change special cases:   ': " to ': ' and  ", ' to ', ' and ."} to .'}
df_testi_2['JSON_column'] = df_testi_2['JSON_column'].str.replace(r'\'\: \"',r"\'\: \'", regex = True)
df_testi_2['JSON_column'] = df_testi_2['JSON_column'].str.replace(r'", \'', r"', \'", regex=True)
df_testi_2['JSON_column'] = df_testi_2['JSON_column'].str.replace(r'."}', r".'}", regex=True)




# replace outermost qoutes " with placeholder
df_testi_2['JSON_column'] = df_testi_2['JSON_column'].str.replace(r'"','öadfnvöjadfn', regex = True)

# change all  ' to " and all \' to "
df_testi_2['JSON_column'] = df_testi_2['JSON_column'].str.replace(r'\'',r'\"', regex = True)
df_testi_2['JSON_column'] = df_testi_2['JSON_column'].str.replace(r"\\'",r'"', regex = True)

#change placeholder outermost quotes to '
df_testi_2['JSON_column'] = df_testi_2['JSON_column'].str.replace('öadfnvöjadfn',r'\'', regex = True)



df_testi_2['JSON_column'] = df_testi_2['JSON_column'].str.replace( r"\\" ,'', regex = True)

df_testi_2['JSON_column'] = df_testi_2['JSON_column'].str.replace(r'\\\'',r"'", regex = True)

df_testi_2['JSON_column'] = df_testi_2['JSON_column'].str.replace('None','"None"', regex = True)
df_testi_2['JSON_column'] = df_testi_2['JSON_column'].str.replace('True',r'"True"', regex = True)
df_testi_2['JSON_column'] = df_testi_2['JSON_column'].str.replace('False','"False"', regex = True)


# slightly different replacement colons from JSON formatting

# replace all valid cases of " with placeholders -> replace rest of " with ' -> change placeholders back to "


df_testi_2['JSON_column'] = df_testi_2['JSON_column'].str.replace('\"\, \"','aksFDBVÖsdbvöksdjbnv', regex = True)
df_testi_2['JSON_column'] = df_testi_2['JSON_column'].str.replace('\"\: \"','öoaerhgvöaoejnvnö', regex = True)

df_testi_2['JSON_column'] = df_testi_2['JSON_column'].str.replace('\{\"','äonväladfnvdLFKnbv#aldfnbäaldf', regex = True)
df_testi_2['JSON_column'] = df_testi_2['JSON_column'].str.replace('\"\:','ydfökvamdnfäljbhdkfnvakjsd', regex = True)
df_testi_2['JSON_column'] = df_testi_2['JSON_column'].str.replace('\, \"','öakdjfbvökadfbvydfkjbnvadk', regex = True)
df_testi_2['JSON_column'] = df_testi_2['JSON_column'].str.replace('\"\}','äadlfjbnvadäofbnadälfbv', regex = True)


df_testi_2['JSON_column'] = df_testi_2['JSON_column'].str.replace(r'\"', r"'", regex = True)

df_testi_2['JSON_column'] = df_testi_2['JSON_column'].str.replace('aksFDBVÖsdbvöksdjbnv', r'", "', regex = True)
df_testi_2['JSON_column'] = df_testi_2['JSON_column'].str.replace('öoaerhgvöaoejnvnö',r'": "', regex = True)

df_testi_2['JSON_column'] = df_testi_2['JSON_column'].str.replace('äonväladfnvdLFKnbv#aldfnbäaldf',r'{"', regex = True)
df_testi_2['JSON_column'] = df_testi_2['JSON_column'].str.replace('ydfökvamdnfäljbhdkfnvakjsd', r'":', regex = True)
df_testi_2['JSON_column'] = df_testi_2['JSON_column'].str.replace('öakdjfbvökadfbvydfkjbnvadk', r', "', regex = True)
df_testi_2['JSON_column'] = df_testi_2['JSON_column'].str.replace('äadlfjbnvadäofbnadälfbv', r'"}', regex = True)

#df_testi_2['JSON_column'] = df_testi_2['JSON_column'].str.replace('övioadnfrvjlkandföov',r'."', regex = True)



In [52]:
df_testi_2['JSON_column'][40]

'{"species_id": 20256729, "taxonomicnotes": "None", "rationale": "This species<em>&#160;</em>ranges from South Africa to Japan and the Hawaiian Islands. This species&#160;occurs in coastal waters throughout its range. Although this species<em>&#160;</em>is not considered a highly commercial species, it is heavily exploited in some parts of its range and is also taken&#160;as bycatch of shrimp trawl fisheries. While this species may be overexploited in parts of its range, global declines are not suspected. Given its widespread distribution and relative abundance in many parts of its range, it is listed as Least Concern.", "geographicrange": "This species<em> </em>is broadly distributed throughout the Indo-West Pacific from South Africa to the Persian Gulf, including Europa, Madagascar, Reunion and the Seychelles, east to the Hawaiian Islands (Smith-Vaniz 1984) and French Polynesia, north to Japan (Gushiken 1984) and south to Australia (Western Australia to New South Wales) and New Caled

In [5]:
# helper code to check ranges of the JSON for formatting errors

SDFSGF['JSON_column'] = df_testi_2['JSON_column'][40:41]

df_text_exploded = explode_json(SDFSGF, 'JSON_column')
df_text_exploded.head(3)


In [6]:
df_text_exploded = explode_json(df_testi_2, 'JSON_column')


In [7]:
df_text_exploded.to_csv('data/fish_data/red_list_text_exploded.csv')

### Add class names to the IUCN DataFrames

In [1169]:
class_names = ['A73EGS-P', 'CUNWCB-Y', 'Istiophorus_platypterus', 'P1ROZC-Z', 'PQV7DP-S', 'acanthaluteres_brownii', 'acanthaluteres_spilomelanurus', 'acanthaluteres_vittiger', 'acanthistius_cinctus', 'acanthopagrus_australis', 'acanthopagrus_berda', 'acanthopagrus_latus', 'achoerodus_gouldii', 'achoerodus_viridis', 'acreichthys_tomentosus', 'aesopia_cornuta', 'aethaloperca_rogaa', 'alectis_ciliaris', 'alectis_indica', 'alepes_kleinii', 'aluterus_monoceros', 'aluterus_scriptus', 'amanses_scopas', 'anampses_caeruleopunctatus', 'anampses_elegans', 'anampses_femininus', 'anampses_geographicus', 'anampses_lennardi', 'anampses_melanurus', 'anampses_meleagrides', 'anampses_neoguinaicus', 'anampses_twistii', 'anodontostoma_chacunda', 'anyperodon_leucogrammicus', 'aphareus_furca', 'aphareus_rutilans', 'aprion_virescens', 'argyrops_spinifer', 'aseraggodes_melanostictus', 'atractoscion_aequidens', 'atule_mate', 'auxis_rochei', 'auxis_thazard', 'bathylagichthys_greyae', 'beryx_decadactylus', 'bodianus_anthioides', 'bodianus_axillaris', 'bodianus_bilunulatus', 'bodianus_bimaculatus', 'bodianus_diana', 'bodianus_loxozonus', 'bodianus_mesothorax', 'bodianus_perditio', 'bodianus_unimaculatus', 'bodianus_vulpinus', 'bothus_mancus', 'bothus_myriaster', 'bothus_pantherinus', 'brachaluteres_jacksonianus', 'brachirus_orientalis', 'caesioperca_lepidopterus', 'cantherhines_dumerilii', 'cantherhines_fronticinctus', 'cantherhines_pardalis', 'cantheschenia_grandisquamis', 'caprodon_longimanus', 'caprodon_schlegelii', 'carangoides_caeruleopinnatus', 'carangoides_chrysophrys', 'carangoides_equula', 'carangoides_ferdau', 'carangoides_fulvoguttatus', 'carangoides_hedlandensis', 'carangoides_malabaricus', 'carangoides_orthogrammus', 'carangoides_plagiotaenia', 'caranx_ignobilis', 'caranx_lugubris', 'caranx_melampygus', 'caranx_sexfasciatus', 'carcharhinus_albimarginatus', 'carcharhinus_amblyrhynchos', 'carcharhinus_falciformis', 'carcharhinus_galapagensis', 'carcharhinus_limbatus', 'carcharhinus_melanopterus', 'carcharhinus_obscurus', 'carcharhinus_plumbeus', 'carcharhinus_sorrah', 'centroberyx_affinis', 'centrogenys_vaigiensis', 'centroscymnus_coelolepis', 'cephalopholis_argus', 'cephalopholis_boenak', 'cephalopholis_cyanostigma', 'cephalopholis_formosa', 'cephalopholis_igarashiensis', 'cephalopholis_leopardus', 'cephalopholis_microprion', 'cephalopholis_miniata', 'cephalopholis_sexmaculata', 'cephalopholis_sonnerati', 'cephalopholis_spiloparaea', 'chascanopsetta_lugubris', 'cheilinus_chlorourus', 'cheilinus_fasciatus', 'cheilinus_oxycephalus', 'cheilinus_trilobatus', 'cheilinus_undulatus', 'cheilio_inermis', 'cheilodactylus_ephippium', 'cheilodactylus_fuscus', 'cheilodactylus_spectabilis', 'cheilodactylus_vestitus', 'chelidonichthys_kumu', 'chirocentrus_dorab', 'chirocentrus_nudus', 'choerodon_anchorago', 'choerodon_cauteroma', 'choerodon_cyanodus', 'choerodon_fasciatus', 'choerodon_graphicus', 'choerodon_jordani', 'choerodon_rubescens', 'choerodon_schoenleinii', 'choerodon_venustus', 'choerodon_vitta', 'choerodon_zamboangae', 'chromileptes_altivelis', 'cirrhilabrus_bathyphilus', 'cirrhilabrus_condei', 'cirrhilabrus_cyanopleura', 'cirrhilabrus_exquisitus', 'cirrhilabrus_laboutei', 'cirrhilabrus_punctatus', 'cirrhilabrus_scottorum', 'cirrhilabrus_temminckii', 'coris_aygula', 'coris_batuensis', 'coris_bulbifrons', 'coris_caudimacula', 'coris_dorsomacula', 'coris_gaimard', 'coris_picta', 'coris_pictoides', 'coris_sandeyeri', 'crenimugil_crenilabis', 'cymbacephalus_nematophthalmus', 'cymolutes_praetextatus', 'cymolutes_torquatus', 'cynoglossus_puncticeps', 'cyttopsis_rosea', 'dactylophora_nigricans', 'decapterus_macrosoma', 'decapterus_russelli', 'diproctacanthus_xanthurus', 'dotalabrus_aurantiacus', 'elagatis_bipinnulata', 'epibulus_insidiator', 'epinephelus_areolatus', 'epinephelus_bleekeri', 'epinephelus_chlorostigma', 'epinephelus_coeruleopunctatus', 'epinephelus_coioides', 'epinephelus_corallicola', 'epinephelus_cyanopodus', 'epinephelus_epistictus', 'epinephelus_fasciatus', 'epinephelus_fuscoguttatus', 'epinephelus_hexagonatus', 'epinephelus_howlandi', 'epinephelus_lanceolatus', 'epinephelus_latifasciatus', 'epinephelus_macrospilos', 'epinephelus_maculatus', 'epinephelus_melanostigma', 'epinephelus_merra', 'epinephelus_morrhua', 'epinephelus_multinotatus', 'epinephelus_ongus', 'epinephelus_polyphekadion', 'epinephelus_quoyanus', 'epinephelus_radiatus', 'epinephelus_retouti', 'epinephelus_rivulatus', 'epinephelus_sexfasciatus', 'epinephelus_spilotoceps', 'epinephelus_tauvina', 'epinephelus_undulatostriatus', 'etelis_carbunculus', 'etelis_coruscans', 'eubalichthys_cyanoura', 'eubalichthys_mosaicus', 'eupetrichthys_angustipes', 'euthynnus_affinis', 'evistias_acutirostris', 'gempylus_serpens', 'gnathanodon_speciosus', 'gnathodentex_aureolineatus', 'gracila_albomarginata', 'gymnocranius_audleyi', 'gymnocranius_euanus', 'gymnocranius_grandoculis', 'gymnocranius_microdon', 'gymnosarda_unicolor', 'halichoeres_argus', 'halichoeres_biocellatus', 'halichoeres_chloropterus', 'halichoeres_chrysus', 'halichoeres_hartzfeldii', 'halichoeres_hortulanus', 'halichoeres_leucurus', 'halichoeres_margaritaceus', 'halichoeres_marginatus', 'halichoeres_melanochir', 'halichoeres_melanurus', 'halichoeres_melasmapomus', 'halichoeres_miniatus', 'halichoeres_nebulosus', 'halichoeres_nigrescens', 'halichoeres_scapularis', 'halichoeres_trimaculatus', 'harriotta_raleighana', 'hemigymnus_fasciatus', 'hemigymnus_melapterus', 'hemiramphus_far', 'herklotsichthys_quadrimaculatus', 'hologymnosus_annulatus', 'hologymnosus_doliatus', 'hyporhamphus_affinis', 'hyporhamphus_dussumieri', 'inegocia_japonica', 'johnius_borneensis', 'katsuwonus_pelamis', 'labrichthys_unilineatus', 'labroides_bicolor', 'labroides_dimidiatus', 'labroides_pectoralis', 'labropsis_australis', 'labropsis_manabei', 'labropsis_xanthonota', 'latridopsis_forsteri', 'lepidocybium_flavobrunneum', 'leptojulis_cyanopleura', 'lethrinus_amboinensis', 'lethrinus_atkinsoni', 'lethrinus_erythracanthus', 'lethrinus_genivittatus', 'lethrinus_harak', 'lethrinus_lentjan', 'lethrinus_microdon', 'lethrinus_miniatus', 'lethrinus_nebulosus', 'lethrinus_obsoletus', 'lethrinus_olivaceus', 'lethrinus_ornatus', 'lethrinus_rubrioperculatus', 'lethrinus_semicinctus', 'lethrinus_variegatus', 'lethrinus_xanthochilus', 'liopropoma_mitratum', 'liopropoma_susumi', 'liza_subviridis', 'liza_vaigiensis', 'lniistius_aneitensis', 'lniistius_pavo', 'lutjanus_adetii', 'lutjanus_argentimaculatus', 'lutjanus_biguttatus', 'lutjanus_bohar', 'lutjanus_carponotatus', 'lutjanus_decussatus', 'lutjanus_ehrenbergii', 'lutjanus_erythropterus', 'lutjanus_fulviflamma', 'lutjanus_fulvus', 'lutjanus_gibbus', 'lutjanus_johnii', 'lutjanus_kasmira', 'lutjanus_lemniscatus', 'lutjanus_lutjanus', 'lutjanus_malabaricus', 'lutjanus_monostigma', 'lutjanus_quinquelineatus', 'lutjanus_rivulatus', 'lutjanus_russellii', 'lutjanus_sebae', 'lutjanus_semicinctus', 'lutjanus_semicinctus_quoy', 'lutjanus_timoriensis', 'lutjanus_vitta', 'macolor_macularis', 'macolor_niger', 'macropharyngodon_choati', 'macropharyngodon_kuiteri', 'macropharyngodon_meleagris', 'macropharyngodon_negrosensis', 'macropharyngodon_ornatus', 'megalaspis_cordyla', 'meuschenia_australis', 'meuschenia_freycineti', 'meuschenia_galii', 'meuschenia_hippocrepis', 'meuschenia_scaber', 'meuschenia_trachylepis', 'monacanthus_chinensis', 'monotaxis_grandoculis', 'mugim_cephalus', 'naucrates_ductor', 'negaprion_acutidens', 'nemadactylus_douglasii', 'nemipterus_furcosus', 'nemipterus_hexodon', 'nemipterus_peronii', 'netuma_thalassina', 'nibea_soldado', 'notolabrus_fucicola', 'notolabrus_gymnogenis', 'notolabrus_tetricus', 'notorynchus_cepedianus', 'novaculichthys_taeniourus', 'novaculoides_macrolepidotus', 'oedalechilus_labiosus', 'ophthalmolepis_lineolatus', 'otolithes_ruber', 'oxycheilinus_bimaculatus', 'oxycheilinus_celebicus', 'oxycheilinus_digrammus', 'oxycheilinus_unifasciatus', 'oxymonacanthus_longirostris', 'pagrus_auratus', 'paracaesio_kusakarii', 'paracheilinus_filamentosus', 'paraluteres_prionurus', 'paramonacanthus_choirocephalus', 'paraplagusia_bilineata', 'parastromateus_niger', 'pardachirus_hedleyi', 'pardachirus_pavoninus', 'pentapodus_aureofasciatus', 'pentapodus_paradiseus', 'pentapodus_vitta_quoy', 'pervagor_alternans', 'pervagor_aspricaudus', 'pervagor_janthinosoma', 'pervagor_melanocephalus', 'pervagor_nigrolineatus', 'pinjalo_lewisi', 'platycephalus_indicus', 'plectranthias_longimanus', 'plectranthias_nanus', 'plectranthias_winniensis', 'plectropomus_areolatus', 'plectropomus_laevis', 'plectropomus_leopardus', 'plectropomus_maculatus', 'plectropomus_oligacanthus', 'plotosus_lineatus', 'pristipomoides_argyrogrammicus', 'pristipomoides_auricilla', 'pristipomoides_filamentosus', 'pristipomoides_flavipinnis', 'pristipomoides_sieboldii', 'pristipomoides_zonatus', 'promethichthys_prometheus', 'protonibea_diacanthus', 'psettodes_erumei', 'pseudalutarius_nasicornis', 'pseudanthias_bicolor', 'pseudanthias_cooperi', 'pseudanthias_dispar', 'pseudanthias_fasciatus', 'pseudanthias_huchtii', 'pseudanthias_hypselosoma', 'pseudanthias_lori', 'pseudanthias_luzonensis', 'pseudanthias_pictilis', 'pseudanthias_pleurotaenia', 'pseudanthias_rubrizonatus', 'pseudanthias_sheni', 'pseudanthias_smithvanizi', 'pseudanthias_squamipinnis', 'pseudanthias_tuka', 'pseudanthias_ventralis', 'pseudocaranx_dentex', 'pseudocarcharias_kamoharai', 'pseudocheilinus_evanidus', 'pseudocheilinus_hexataenia', 'pseudocheilinus_ocellatus', 'pseudocheilinus_octotaenia', 'pseudodax_moluccanus', 'pseudojuloides_cerasinus', 'pseudolabrus_biserialis', 'pseudolabrus_guentheri', 'pseudolabrus_luculentus', 'pseudorhombus_argus', 'pseudorhombus_arsius', 'pseudorhombus_elevatus', 'pteragogus_cryptus', 'pteragogus_enneacanthus', 'pteragogus_flagellifer', 'rastrelliger_kanagurta', 'retropinna_semoni', 'rhabdosargus_sarba', 'rhincodon_typus', 'rhizoprionodon_acutus', 'ruvettus_pretiosus', 'samaris_cristatus', 'samariscus_triocellatus', 'sarda_orientalis', 'sardinella_albella', 'sardinella_gibbosa', 'sardinops_sagax', 'scaevius_milii', 'scolopsis_affinis', 'scolopsis_bilineata', 'scolopsis_lineata', 'scolopsis_margaritifer', 'scolopsis_monogramma', 'scolopsis_trilineata', 'scolopsis_vosmeri', 'scolopsis_xenochrous', 'scomberoides_commersonnianus', 'scomberoides_lysan', 'scomberomorus_commerson', 'selar_crumenophthalmus', 'selaroides_leptolepis', 'seriola_dumerili', 'seriola_hippos', 'seriola_rivoliana', 'seriolina_nigrofasciata', 'serranocirrhitus_latus', 'sillago_ciliata', 'sillago_sihama', 'soleichthys_heterorhinos', 'sphyraena_barracuda', 'sphyraena_forsteri', 'sphyraena_jello', 'sphyraena_obtusata', 'stegostoma_fasciatum', 'stethojulis_bandanensis', 'stethojulis_interrupta', 'stethojulis_strigiventer', 'stethojulis_trilineata', 'stolephorus_waitei', 'suezichthys_arquatus', 'suezichthys_cyanolaemus', 'suezichthys_gracilis', 'symphorichthys_spilurus', 'symphorus_nematophorus', 'thalassoma_amblycephalum', 'thalassoma_hardwicke', 'thalassoma_jansenii', 'thalassoma_lunare', 'thalassoma_lutescens', 'thalassoma_nigrofasciatum', 'thalassoma_purpureum', 'thalassoma_quinquevittatum', 'thalassoma_trilobatum', 'thryssa_baelama', 'thryssa_hamiltonii', 'thunnus_alalunga', 'thunnus_albacares', 'thysanophrys_celebica', 'thysanophrys_chiltonae', 'trachichthys_australis', 'trachinotus_baillonii', 'trachinotus_blochii', 'trachinotus_botla', 'trachypoma_macracanthus', 'triaenodon_obesus', 'uraspis_secunda', 'valamugil_cunnesius', 'valamugil_engeli', 'valamugil_seheli', 'variola_albimarginata', 'variola_louti', 'wattsia_mossambica', 'wetmorella_albofasciata', 'wetmorella_nigropinnata', 'xiphocheilus_typus', 'zenarchopterus_dispar', 'zeus_faber']

In [1222]:
# Class names as the model will output
class_names_transformed = ['A73egs-p', 'Cunwcb-y', 'Istiophorus platypterus', 'P1rozc-z', 'Pqv7dp-s', 'Acanthaluteres brownii', 'Acanthaluteres spilomelanurus', 'Acanthaluteres vittiger', 'Acanthistius cinctus', 'Acanthopagrus australis', 'Acanthopagrus berda', 'Acanthopagrus latus', 'Achoerodus gouldii', 'Achoerodus viridis', 'Acreichthys tomentosus', 'Aesopia cornuta', 'Aethaloperca rogaa', 'Alectis ciliaris', 'Alectis indica', 'Alepes kleinii', 'Aluterus monoceros', 'Aluterus scriptus', 'Amanses scopas', 'Anampses caeruleopunctatus', 'Anampses elegans', 'Anampses femininus', 'Anampses geographicus', 'Anampses lennardi', 'Anampses melanurus', 'Anampses meleagrides', 'Anampses neoguinaicus', 'Anampses twistii', 'Anodontostoma chacunda', 'Anyperodon leucogrammicus', 'Aphareus furca', 'Aphareus rutilans', 'Aprion virescens', 'Argyrops spinifer', 'Aseraggodes melanostictus', 'Atractoscion aequidens', 'Atule mate', 'Auxis rochei', 'Auxis thazard', 'Bathylagichthys greyae', 'Beryx decadactylus', 'Bodianus anthioides', 'Bodianus axillaris', 'Bodianus bilunulatus', 'Bodianus bimaculatus', 'Bodianus diana', 'Bodianus loxozonus', 'Bodianus mesothorax', 'Bodianus perditio', 'Bodianus unimaculatus', 'Bodianus vulpinus', 'Bothus mancus', 'Bothus myriaster', 'Bothus pantherinus', 'Brachaluteres jacksonianus', 'Brachirus orientalis', 'Caesioperca lepidopterus', 'Cantherhines dumerilii', 'Cantherhines fronticinctus', 'Cantherhines pardalis', 'Cantheschenia grandisquamis', 'Caprodon longimanus', 'Caprodon schlegelii', 'Carangoides caeruleopinnatus', 'Carangoides chrysophrys', 'Carangoides equula', 'Carangoides ferdau', 'Carangoides fulvoguttatus', 'Carangoides hedlandensis', 'Carangoides malabaricus', 'Carangoides orthogrammus', 'Carangoides plagiotaenia', 'Caranx ignobilis', 'Caranx lugubris', 'Caranx melampygus', 'Caranx sexfasciatus', 'Carcharhinus albimarginatus', 'Carcharhinus amblyrhynchos', 'Carcharhinus falciformis', 'Carcharhinus galapagensis', 'Carcharhinus limbatus', 'Carcharhinus melanopterus', 'Carcharhinus obscurus', 'Carcharhinus plumbeus', 'Carcharhinus sorrah', 'Centroberyx affinis', 'Centrogenys vaigiensis', 'Centroscymnus coelolepis', 'Cephalopholis argus', 'Cephalopholis boenak', 'Cephalopholis cyanostigma', 'Cephalopholis formosa', 'Cephalopholis igarashiensis', 'Cephalopholis leopardus', 'Cephalopholis microprion', 'Cephalopholis miniata', 'Cephalopholis sexmaculata', 'Cephalopholis sonnerati', 'Cephalopholis spiloparaea', 'Chascanopsetta lugubris', 'Cheilinus chlorourus', 'Cheilinus fasciatus', 'Cheilinus oxycephalus', 'Cheilinus trilobatus', 'Cheilinus undulatus', 'Cheilio inermis', 'Cheilodactylus ephippium', 'Cheilodactylus fuscus', 'Cheilodactylus spectabilis', 'Cheilodactylus vestitus', 'Chelidonichthys kumu', 'Chirocentrus dorab', 'Chirocentrus nudus', 'Choerodon anchorago', 'Choerodon cauteroma', 'Choerodon cyanodus', 'Choerodon fasciatus', 'Choerodon graphicus', 'Choerodon jordani', 'Choerodon rubescens', 'Choerodon schoenleinii', 'Choerodon venustus', 'Choerodon vitta', 'Choerodon zamboangae', 'Chromileptes altivelis', 'Cirrhilabrus bathyphilus', 'Cirrhilabrus condei', 'Cirrhilabrus cyanopleura', 'Cirrhilabrus exquisitus', 'Cirrhilabrus laboutei', 'Cirrhilabrus punctatus', 'Cirrhilabrus scottorum', 'Cirrhilabrus temminckii', 'Coris aygula', 'Coris batuensis', 'Coris bulbifrons', 'Coris caudimacula', 'Coris dorsomacula', 'Coris gaimard', 'Coris picta', 'Coris pictoides', 'Coris sandeyeri', 'Crenimugil crenilabis', 'Cymbacephalus nematophthalmus', 'Cymolutes praetextatus', 'Cymolutes torquatus', 'Cynoglossus puncticeps', 'Cyttopsis rosea', 'Dactylophora nigricans', 'Decapterus macrosoma', 'Decapterus russelli', 'Diproctacanthus xanthurus', 'Dotalabrus aurantiacus', 'Elagatis bipinnulata', 'Epibulus insidiator', 'Epinephelus areolatus', 'Epinephelus bleekeri', 'Epinephelus chlorostigma', 'Epinephelus coeruleopunctatus', 'Epinephelus coioides', 'Epinephelus corallicola', 'Epinephelus cyanopodus', 'Epinephelus epistictus', 'Epinephelus fasciatus', 'Epinephelus fuscoguttatus', 'Epinephelus hexagonatus', 'Epinephelus howlandi', 'Epinephelus lanceolatus', 'Epinephelus latifasciatus', 'Epinephelus macrospilos', 'Epinephelus maculatus', 'Epinephelus melanostigma', 'Epinephelus merra', 'Epinephelus morrhua', 'Epinephelus multinotatus', 'Epinephelus ongus', 'Epinephelus polyphekadion', 'Epinephelus quoyanus', 'Epinephelus radiatus', 'Epinephelus retouti', 'Epinephelus rivulatus', 'Epinephelus sexfasciatus', 'Epinephelus spilotoceps', 'Epinephelus tauvina', 'Epinephelus undulatostriatus', 'Etelis carbunculus', 'Etelis coruscans', 'Eubalichthys cyanoura', 'Eubalichthys mosaicus', 'Eupetrichthys angustipes', 'Euthynnus affinis', 'Evistias acutirostris', 'Gempylus serpens', 'Gnathanodon speciosus', 'Gnathodentex aureolineatus', 'Gracila albomarginata', 'Gymnocranius audleyi', 'Gymnocranius euanus', 'Gymnocranius grandoculis', 'Gymnocranius microdon', 'Gymnosarda unicolor', 'Halichoeres argus', 'Halichoeres biocellatus', 'Halichoeres chloropterus', 'Halichoeres chrysus', 'Halichoeres hartzfeldii', 'Halichoeres hortulanus', 'Halichoeres leucurus', 'Halichoeres margaritaceus', 'Halichoeres marginatus', 'Halichoeres melanochir', 'Halichoeres melanurus', 'Halichoeres melasmapomus', 'Halichoeres miniatus', 'Halichoeres nebulosus', 'Halichoeres nigrescens', 'Halichoeres scapularis', 'Halichoeres trimaculatus', 'Harriotta raleighana', 'Hemigymnus fasciatus', 'Hemigymnus melapterus', 'Hemiramphus far', 'Herklotsichthys quadrimaculatus', 'Hologymnosus annulatus', 'Hologymnosus doliatus', 'Hyporhamphus affinis', 'Hyporhamphus dussumieri', 'Inegocia japonica', 'Johnius borneensis', 'Katsuwonus pelamis', 'Labrichthys unilineatus', 'Labroides bicolor', 'Labroides dimidiatus', 'Labroides pectoralis', 'Labropsis australis', 'Labropsis manabei', 'Labropsis xanthonota', 'Latridopsis forsteri', 'Lepidocybium flavobrunneum', 'Leptojulis cyanopleura', 'Lethrinus amboinensis', 'Lethrinus atkinsoni', 'Lethrinus erythracanthus', 'Lethrinus genivittatus', 'Lethrinus harak', 'Lethrinus lentjan', 'Lethrinus microdon', 'Lethrinus miniatus', 'Lethrinus nebulosus', 'Lethrinus obsoletus', 'Lethrinus olivaceus', 'Lethrinus ornatus', 'Lethrinus rubrioperculatus', 'Lethrinus semicinctus', 'Lethrinus variegatus', 'Lethrinus xanthochilus', 'Liopropoma mitratum', 'Liopropoma susumi', 'Liza subviridis', 'Liza vaigiensis', 'Lniistius aneitensis', 'Lniistius pavo', 'Lutjanus adetii', 'Lutjanus argentimaculatus', 'Lutjanus biguttatus', 'Lutjanus bohar', 'Lutjanus carponotatus', 'Lutjanus decussatus', 'Lutjanus ehrenbergii', 'Lutjanus erythropterus', 'Lutjanus fulviflamma', 'Lutjanus fulvus', 'Lutjanus gibbus', 'Lutjanus johnii', 'Lutjanus kasmira', 'Lutjanus lemniscatus', 'Lutjanus lutjanus', 'Lutjanus malabaricus', 'Lutjanus monostigma', 'Lutjanus quinquelineatus', 'Lutjanus rivulatus', 'Lutjanus russellii', 'Lutjanus sebae', 'Lutjanus semicinctus', 'Lutjanus semicinctus quoy', 'Lutjanus timoriensis', 'Lutjanus vitta', 'Macolor macularis', 'Macolor niger', 'Macropharyngodon choati', 'Macropharyngodon kuiteri', 'Macropharyngodon meleagris', 'Macropharyngodon negrosensis', 'Macropharyngodon ornatus', 'Megalaspis cordyla', 'Meuschenia australis', 'Meuschenia freycineti', 'Meuschenia galii', 'Meuschenia hippocrepis', 'Meuschenia scaber', 'Meuschenia trachylepis', 'Monacanthus chinensis', 'Monotaxis grandoculis', 'Mugim cephalus', 'Naucrates ductor', 'Negaprion acutidens', 'Nemadactylus douglasii', 'Nemipterus furcosus', 'Nemipterus hexodon', 'Nemipterus peronii', 'Netuma thalassina', 'Nibea soldado', 'Notolabrus fucicola', 'Notolabrus gymnogenis', 'Notolabrus tetricus', 'Notorynchus cepedianus', 'Novaculichthys taeniourus', 'Novaculoides macrolepidotus', 'Oedalechilus labiosus', 'Ophthalmolepis lineolatus', 'Otolithes ruber', 'Oxycheilinus bimaculatus', 'Oxycheilinus celebicus', 'Oxycheilinus digrammus', 'Oxycheilinus unifasciatus', 'Oxymonacanthus longirostris', 'Pagrus auratus', 'Paracaesio kusakarii', 'Paracheilinus filamentosus', 'Paraluteres prionurus', 'Paramonacanthus choirocephalus', 'Paraplagusia bilineata', 'Parastromateus niger', 'Pardachirus hedleyi', 'Pardachirus pavoninus', 'Pentapodus aureofasciatus', 'Pentapodus paradiseus', 'Pentapodus vitta quoy', 'Pervagor alternans', 'Pervagor aspricaudus', 'Pervagor janthinosoma', 'Pervagor melanocephalus', 'Pervagor nigrolineatus', 'Pinjalo lewisi', 'Platycephalus indicus', 'Plectranthias longimanus', 'Plectranthias nanus', 'Plectranthias winniensis', 'Plectropomus areolatus', 'Plectropomus laevis', 'Plectropomus leopardus', 'Plectropomus maculatus', 'Plectropomus oligacanthus', 'Plotosus lineatus', 'Pristipomoides argyrogrammicus', 'Pristipomoides auricilla', 'Pristipomoides filamentosus', 'Pristipomoides flavipinnis', 'Pristipomoides sieboldii', 'Pristipomoides zonatus', 'Promethichthys prometheus', 'Protonibea diacanthus', 'Psettodes erumei', 'Pseudalutarius nasicornis', 'Pseudanthias bicolor', 'Pseudanthias cooperi', 'Pseudanthias dispar', 'Pseudanthias fasciatus', 'Pseudanthias huchtii', 'Pseudanthias hypselosoma', 'Pseudanthias lori', 'Pseudanthias luzonensis', 'Pseudanthias pictilis', 'Pseudanthias pleurotaenia', 'Pseudanthias rubrizonatus', 'Pseudanthias sheni', 'Pseudanthias smithvanizi', 'Pseudanthias squamipinnis', 'Pseudanthias tuka', 'Pseudanthias ventralis', 'Pseudocaranx dentex', 'Pseudocarcharias kamoharai', 'Pseudocheilinus evanidus', 'Pseudocheilinus hexataenia', 'Pseudocheilinus ocellatus', 'Pseudocheilinus octotaenia', 'Pseudodax moluccanus', 'Pseudojuloides cerasinus', 'Pseudolabrus biserialis', 'Pseudolabrus guentheri', 'Pseudolabrus luculentus', 'Pseudorhombus argus', 'Pseudorhombus arsius', 'Pseudorhombus elevatus', 'Pteragogus cryptus', 'Pteragogus enneacanthus', 'Pteragogus flagellifer', 'Rastrelliger kanagurta', 'Retropinna semoni', 'Rhabdosargus sarba', 'Rhincodon typus', 'Rhizoprionodon acutus', 'Ruvettus pretiosus', 'Samaris cristatus', 'Samariscus triocellatus', 'Sarda orientalis', 'Sardinella albella', 'Sardinella gibbosa', 'Sardinops sagax', 'Scaevius milii', 'Scolopsis affinis', 'Scolopsis bilineata', 'Scolopsis lineata', 'Scolopsis margaritifer', 'Scolopsis monogramma', 'Scolopsis trilineata', 'Scolopsis vosmeri', 'Scolopsis xenochrous', 'Scomberoides commersonnianus', 'Scomberoides lysan', 'Scomberomorus commerson', 'Selar crumenophthalmus', 'Selaroides leptolepis', 'Seriola dumerili', 'Seriola hippos', 'Seriola rivoliana', 'Seriolina nigrofasciata', 'Serranocirrhitus latus', 'Sillago ciliata', 'Sillago sihama', 'Soleichthys heterorhinos', 'Sphyraena barracuda', 'Sphyraena forsteri', 'Sphyraena jello', 'Sphyraena obtusata', 'Stegostoma fasciatum', 'Stethojulis bandanensis', 'Stethojulis interrupta', 'Stethojulis strigiventer', 'Stethojulis trilineata', 'Stolephorus waitei', 'Suezichthys arquatus', 'Suezichthys cyanolaemus', 'Suezichthys gracilis', 'Symphorichthys spilurus', 'Symphorus nematophorus', 'Thalassoma amblycephalum', 'Thalassoma hardwicke', 'Thalassoma jansenii', 'Thalassoma lunare', 'Thalassoma lutescens', 'Thalassoma nigrofasciatum', 'Thalassoma purpureum', 'Thalassoma quinquevittatum', 'Thalassoma trilobatum', 'Thryssa baelama', 'Thryssa hamiltonii', 'Thunnus alalunga', 'Thunnus albacares', 'Thysanophrys celebica', 'Thysanophrys chiltonae', 'Trachichthys australis', 'Trachinotus baillonii', 'Trachinotus blochii', 'Trachinotus botla', 'Trachypoma macracanthus', 'Triaenodon obesus', 'Uraspis secunda', 'Valamugil cunnesius', 'Valamugil engeli', 'Valamugil seheli', 'Variola albimarginata', 'Variola louti', 'Wattsia mossambica', 'Wetmorella albofasciata', 'Wetmorella nigropinnata', 'Xiphocheilus typus', 'Zenarchopterus dispar', 'Zeus faber']

In [1223]:
df_class_names = pd.DataFrame(class_names, columns=['class_names'])
df_class_names_transformed = pd.DataFrame(class_names_transformed, columns=['class_names_transformed'])

In [1225]:
df_class_names_transformed

Unnamed: 0,class_names_transformed
0,A73egs-p
1,Cunwcb-y
2,Istiophorus platypterus
3,P1rozc-z
4,Pqv7dp-s
...,...
478,Wetmorella albofasciata
479,Wetmorella nigropinnata
480,Xiphocheilus typus
481,Zenarchopterus dispar


In [1226]:
df_class_names.to_csv('data/fish_data/class_names.csv')
df_class_names_transformed.to_csv('data/fish_data/class_names_transformed.csv')

In [1205]:
df_text_exploded.head(2)

Unnamed: 0,species_id,taxonomicnotes,rationale,geographicrange,population,populationtrend,habitat,threats,conservationmeasures,usetrade
0,0,,,,,,,,,
1,0,,,,,,,,,


In [1227]:
df_fishfacts = pd.concat([df_class_names,df_class_names_transformed,df_species_exploded,df_text_exploded],axis=1)

In [1228]:
print(df_fishfacts.columns)

Index(['class_names', 'class_names_transformed', 'taxonid', 'scientific_name',
       'kingdom', 'phylum', 'class', 'order', 'family', 'genus',
       'main_common_name', 'authority', 'published_year', 'assessment_date',
       'category', 'criteria', 'population_trend', 'marine_system',
       'freshwater_system', 'terrestrial_system', 'assessor', 'reviewer',
       'aoo_km2', 'eoo_km2', 'elevation_upper', 'elevation_lower',
       'depth_upper', 'depth_lower', 'errata_flag', 'errata_reason',
       'amended_flag', 'amended_reason'],
      dtype='object')
Index(['class_names', 'class_names_transformed', 'species_id',
       'taxonomicnotes', 'rationale', 'geographicrange', 'population',
       'populationtrend', 'habitat', 'threats', 'conservationmeasures',
       'usetrade'],
      dtype='object')


In [1229]:
df_fishfacts.to_csv('data/fish_data/fish_facts.csv')


In [72]:
class_names = ['A73EGS-P', 'CUNWCB-Y', 'Istiophorus_platypterus', 'P1ROZC-Z', 'PQV7DP-S', 'acanthaluteres_brownii', 'acanthaluteres_spilomelanurus', 'acanthaluteres_vittiger', 'acanthistius_cinctus', 'acanthopagrus_australis', 'acanthopagrus_berda', 'acanthopagrus_latus', 'achoerodus_gouldii', 'achoerodus_viridis', 'acreichthys_tomentosus', 'aesopia_cornuta', 'aethaloperca_rogaa', 'alectis_ciliaris', 'alectis_indica', 'alepes_kleinii', 'aluterus_monoceros', 'aluterus_scriptus', 'amanses_scopas', 'anampses_caeruleopunctatus', 'anampses_elegans', 'anampses_femininus', 'anampses_geographicus', 'anampses_lennardi', 'anampses_melanurus', 'anampses_meleagrides', 'anampses_neoguinaicus', 'anampses_twistii', 'anodontostoma_chacunda', 'anyperodon_leucogrammicus', 'aphareus_furca', 'aphareus_rutilans', 'aprion_virescens', 'argyrops_spinifer', 'aseraggodes_melanostictus', 'atractoscion_aequidens', 'atule_mate', 'auxis_rochei', 'auxis_thazard', 'bathylagichthys_greyae', 'beryx_decadactylus', 'bodianus_anthioides', 'bodianus_axillaris', 'bodianus_bilunulatus', 'bodianus_bimaculatus', 'bodianus_diana', 'bodianus_loxozonus', 'bodianus_mesothorax', 'bodianus_perditio', 'bodianus_unimaculatus', 'bodianus_vulpinus', 'bothus_mancus', 'bothus_myriaster', 'bothus_pantherinus', 'brachaluteres_jacksonianus', 'brachirus_orientalis', 'caesioperca_lepidopterus', 'cantherhines_dumerilii', 'cantherhines_fronticinctus', 'cantherhines_pardalis', 'cantheschenia_grandisquamis', 'caprodon_longimanus', 'caprodon_schlegelii', 'carangoides_caeruleopinnatus', 'carangoides_chrysophrys', 'carangoides_equula', 'carangoides_ferdau', 'carangoides_fulvoguttatus', 'carangoides_hedlandensis', 'carangoides_malabaricus', 'carangoides_orthogrammus', 'carangoides_plagiotaenia', 'caranx_ignobilis', 'caranx_lugubris', 'caranx_melampygus', 'caranx_sexfasciatus', 'carcharhinus_albimarginatus', 'carcharhinus_amblyrhynchos', 'carcharhinus_falciformis', 'carcharhinus_galapagensis', 'carcharhinus_limbatus', 'carcharhinus_melanopterus', 'carcharhinus_obscurus', 'carcharhinus_plumbeus', 'carcharhinus_sorrah', 'centroberyx_affinis', 'centrogenys_vaigiensis', 'centroscymnus_coelolepis', 'cephalopholis_argus', 'cephalopholis_boenak', 'cephalopholis_cyanostigma', 'cephalopholis_formosa', 'cephalopholis_igarashiensis', 'cephalopholis_leopardus', 'cephalopholis_microprion', 'cephalopholis_miniata', 'cephalopholis_sexmaculata', 'cephalopholis_sonnerati', 'cephalopholis_spiloparaea', 'chascanopsetta_lugubris', 'cheilinus_chlorourus', 'cheilinus_fasciatus', 'cheilinus_oxycephalus', 'cheilinus_trilobatus', 'cheilinus_undulatus', 'cheilio_inermis', 'cheilodactylus_ephippium', 'cheilodactylus_fuscus', 'cheilodactylus_spectabilis', 'cheilodactylus_vestitus', 'chelidonichthys_kumu', 'chirocentrus_dorab', 'chirocentrus_nudus', 'choerodon_anchorago', 'choerodon_cauteroma', 'choerodon_cyanodus', 'choerodon_fasciatus', 'choerodon_graphicus', 'choerodon_jordani', 'choerodon_rubescens', 'choerodon_schoenleinii', 'choerodon_venustus', 'choerodon_vitta', 'choerodon_zamboangae', 'chromileptes_altivelis', 'cirrhilabrus_bathyphilus', 'cirrhilabrus_condei', 'cirrhilabrus_cyanopleura', 'cirrhilabrus_exquisitus', 'cirrhilabrus_laboutei', 'cirrhilabrus_punctatus', 'cirrhilabrus_scottorum', 'cirrhilabrus_temminckii', 'coris_aygula', 'coris_batuensis', 'coris_bulbifrons', 'coris_caudimacula', 'coris_dorsomacula', 'coris_gaimard', 'coris_picta', 'coris_pictoides', 'coris_sandeyeri', 'crenimugil_crenilabis', 'cymbacephalus_nematophthalmus', 'cymolutes_praetextatus', 'cymolutes_torquatus', 'cynoglossus_puncticeps', 'cyttopsis_rosea', 'dactylophora_nigricans', 'decapterus_macrosoma', 'decapterus_russelli', 'diproctacanthus_xanthurus', 'dotalabrus_aurantiacus', 'elagatis_bipinnulata', 'epibulus_insidiator', 'epinephelus_areolatus', 'epinephelus_bleekeri', 'epinephelus_chlorostigma', 'epinephelus_coeruleopunctatus', 'epinephelus_coioides', 'epinephelus_corallicola', 'epinephelus_cyanopodus', 'epinephelus_epistictus', 'epinephelus_fasciatus', 'epinephelus_fuscoguttatus', 'epinephelus_hexagonatus', 'epinephelus_howlandi', 'epinephelus_lanceolatus', 'epinephelus_latifasciatus', 'epinephelus_macrospilos', 'epinephelus_maculatus', 'epinephelus_melanostigma', 'epinephelus_merra', 'epinephelus_morrhua', 'epinephelus_multinotatus', 'epinephelus_ongus', 'epinephelus_polyphekadion', 'epinephelus_quoyanus', 'epinephelus_radiatus', 'epinephelus_retouti', 'epinephelus_rivulatus', 'epinephelus_sexfasciatus', 'epinephelus_spilotoceps', 'epinephelus_tauvina', 'epinephelus_undulatostriatus', 'etelis_carbunculus', 'etelis_coruscans', 'eubalichthys_cyanoura', 'eubalichthys_mosaicus', 'eupetrichthys_angustipes', 'euthynnus_affinis', 'evistias_acutirostris', 'gempylus_serpens', 'gnathanodon_speciosus', 'gnathodentex_aureolineatus', 'gracila_albomarginata', 'gymnocranius_audleyi', 'gymnocranius_euanus', 'gymnocranius_grandoculis', 'gymnocranius_microdon', 'gymnosarda_unicolor', 'halichoeres_argus', 'halichoeres_biocellatus', 'halichoeres_chloropterus', 'halichoeres_chrysus', 'halichoeres_hartzfeldii', 'halichoeres_hortulanus', 'halichoeres_leucurus', 'halichoeres_margaritaceus', 'halichoeres_marginatus', 'halichoeres_melanochir', 'halichoeres_melanurus', 'halichoeres_melasmapomus', 'halichoeres_miniatus', 'halichoeres_nebulosus', 'halichoeres_nigrescens', 'halichoeres_scapularis', 'halichoeres_trimaculatus', 'harriotta_raleighana', 'hemigymnus_fasciatus', 'hemigymnus_melapterus', 'hemiramphus_far', 'herklotsichthys_quadrimaculatus', 'hologymnosus_annulatus', 'hologymnosus_doliatus', 'hyporhamphus_affinis', 'hyporhamphus_dussumieri', 'inegocia_japonica', 'johnius_borneensis', 'katsuwonus_pelamis', 'labrichthys_unilineatus', 'labroides_bicolor', 'labroides_dimidiatus', 'labroides_pectoralis', 'labropsis_australis', 'labropsis_manabei', 'labropsis_xanthonota', 'latridopsis_forsteri', 'lepidocybium_flavobrunneum', 'leptojulis_cyanopleura', 'lethrinus_amboinensis', 'lethrinus_atkinsoni', 'lethrinus_erythracanthus', 'lethrinus_genivittatus', 'lethrinus_harak', 'lethrinus_lentjan', 'lethrinus_microdon', 'lethrinus_miniatus', 'lethrinus_nebulosus', 'lethrinus_obsoletus', 'lethrinus_olivaceus', 'lethrinus_ornatus', 'lethrinus_rubrioperculatus', 'lethrinus_semicinctus', 'lethrinus_variegatus', 'lethrinus_xanthochilus', 'liopropoma_mitratum', 'liopropoma_susumi', 'liza_subviridis', 'liza_vaigiensis', 'lniistius_aneitensis', 'lniistius_pavo', 'lutjanus_adetii', 'lutjanus_argentimaculatus', 'lutjanus_biguttatus', 'lutjanus_bohar', 'lutjanus_carponotatus', 'lutjanus_decussatus', 'lutjanus_ehrenbergii', 'lutjanus_erythropterus', 'lutjanus_fulviflamma', 'lutjanus_fulvus', 'lutjanus_gibbus', 'lutjanus_johnii', 'lutjanus_kasmira', 'lutjanus_lemniscatus', 'lutjanus_lutjanus', 'lutjanus_malabaricus', 'lutjanus_monostigma', 'lutjanus_quinquelineatus', 'lutjanus_rivulatus', 'lutjanus_russellii', 'lutjanus_sebae', 'lutjanus_semicinctus', 'lutjanus_semicinctus_quoy', 'lutjanus_timoriensis', 'lutjanus_vitta', 'macolor_macularis', 'macolor_niger', 'macropharyngodon_choati', 'macropharyngodon_kuiteri', 'macropharyngodon_meleagris', 'macropharyngodon_negrosensis', 'macropharyngodon_ornatus', 'megalaspis_cordyla', 'meuschenia_australis', 'meuschenia_freycineti', 'meuschenia_galii', 'meuschenia_hippocrepis', 'meuschenia_scaber', 'meuschenia_trachylepis', 'monacanthus_chinensis', 'monotaxis_grandoculis', 'mugim_cephalus', 'naucrates_ductor', 'negaprion_acutidens', 'nemadactylus_douglasii', 'nemipterus_furcosus', 'nemipterus_hexodon', 'nemipterus_peronii', 'netuma_thalassina', 'nibea_soldado', 'notolabrus_fucicola', 'notolabrus_gymnogenis', 'notolabrus_tetricus', 'notorynchus_cepedianus', 'novaculichthys_taeniourus', 'novaculoides_macrolepidotus', 'oedalechilus_labiosus', 'ophthalmolepis_lineolatus', 'otolithes_ruber', 'oxycheilinus_bimaculatus', 'oxycheilinus_celebicus', 'oxycheilinus_digrammus', 'oxycheilinus_unifasciatus', 'oxymonacanthus_longirostris', 'pagrus_auratus', 'paracaesio_kusakarii', 'paracheilinus_filamentosus', 'paraluteres_prionurus', 'paramonacanthus_choirocephalus', 'paraplagusia_bilineata', 'parastromateus_niger', 'pardachirus_hedleyi', 'pardachirus_pavoninus', 'pentapodus_aureofasciatus', 'pentapodus_paradiseus', 'pentapodus_vitta_quoy', 'pervagor_alternans', 'pervagor_aspricaudus', 'pervagor_janthinosoma', 'pervagor_melanocephalus', 'pervagor_nigrolineatus', 'pinjalo_lewisi', 'platycephalus_indicus', 'plectranthias_longimanus', 'plectranthias_nanus', 'plectranthias_winniensis', 'plectropomus_areolatus', 'plectropomus_laevis', 'plectropomus_leopardus', 'plectropomus_maculatus', 'plectropomus_oligacanthus', 'plotosus_lineatus', 'pristipomoides_argyrogrammicus', 'pristipomoides_auricilla', 'pristipomoides_filamentosus', 'pristipomoides_flavipinnis', 'pristipomoides_sieboldii', 'pristipomoides_zonatus', 'promethichthys_prometheus', 'protonibea_diacanthus', 'psettodes_erumei', 'pseudalutarius_nasicornis', 'pseudanthias_bicolor', 'pseudanthias_cooperi', 'pseudanthias_dispar', 'pseudanthias_fasciatus', 'pseudanthias_huchtii', 'pseudanthias_hypselosoma', 'pseudanthias_lori', 'pseudanthias_luzonensis', 'pseudanthias_pictilis', 'pseudanthias_pleurotaenia', 'pseudanthias_rubrizonatus', 'pseudanthias_sheni', 'pseudanthias_smithvanizi', 'pseudanthias_squamipinnis', 'pseudanthias_tuka', 'pseudanthias_ventralis', 'pseudocaranx_dentex', 'pseudocarcharias_kamoharai', 'pseudocheilinus_evanidus', 'pseudocheilinus_hexataenia', 'pseudocheilinus_ocellatus', 'pseudocheilinus_octotaenia', 'pseudodax_moluccanus', 'pseudojuloides_cerasinus', 'pseudolabrus_biserialis', 'pseudolabrus_guentheri', 'pseudolabrus_luculentus', 'pseudorhombus_argus', 'pseudorhombus_arsius', 'pseudorhombus_elevatus', 'pteragogus_cryptus', 'pteragogus_enneacanthus', 'pteragogus_flagellifer', 'rastrelliger_kanagurta', 'retropinna_semoni', 'rhabdosargus_sarba', 'rhincodon_typus', 'rhizoprionodon_acutus', 'ruvettus_pretiosus', 'samaris_cristatus', 'samariscus_triocellatus', 'sarda_orientalis', 'sardinella_albella', 'sardinella_gibbosa', 'sardinops_sagax', 'scaevius_milii', 'scolopsis_affinis', 'scolopsis_bilineata', 'scolopsis_lineata', 'scolopsis_margaritifer', 'scolopsis_monogramma', 'scolopsis_trilineata', 'scolopsis_vosmeri', 'scolopsis_xenochrous', 'scomberoides_commersonnianus', 'scomberoides_lysan', 'scomberomorus_commerson', 'selar_crumenophthalmus', 'selaroides_leptolepis', 'seriola_dumerili', 'seriola_hippos', 'seriola_rivoliana', 'seriolina_nigrofasciata', 'serranocirrhitus_latus', 'sillago_ciliata', 'sillago_sihama', 'soleichthys_heterorhinos', 'sphyraena_barracuda', 'sphyraena_forsteri', 'sphyraena_jello', 'sphyraena_obtusata', 'stegostoma_fasciatum', 'stethojulis_bandanensis', 'stethojulis_interrupta', 'stethojulis_strigiventer', 'stethojulis_trilineata', 'stolephorus_waitei', 'suezichthys_arquatus', 'suezichthys_cyanolaemus', 'suezichthys_gracilis', 'symphorichthys_spilurus', 'symphorus_nematophorus', 'thalassoma_amblycephalum', 'thalassoma_hardwicke', 'thalassoma_jansenii', 'thalassoma_lunare', 'thalassoma_lutescens', 'thalassoma_nigrofasciatum', 'thalassoma_purpureum', 'thalassoma_quinquevittatum', 'thalassoma_trilobatum', 'thryssa_baelama', 'thryssa_hamiltonii', 'thunnus_alalunga', 'thunnus_albacares', 'thysanophrys_celebica', 'thysanophrys_chiltonae', 'trachichthys_australis', 'trachinotus_baillonii', 'trachinotus_blochii', 'trachinotus_botla', 'trachypoma_macracanthus', 'triaenodon_obesus', 'uraspis_secunda', 'valamugil_cunnesius', 'valamugil_engeli', 'valamugil_seheli', 'variola_albimarginata', 'variola_louti', 'wattsia_mossambica', 'wetmorella_albofasciata', 'wetmorella_nigropinnata', 'xiphocheilus_typus', 'zenarchopterus_dispar', 'zeus_faber']

### Access informationin the dataframe

In [2]:
# Formatting the column display in jupyter labs
import pandas as pd

pd.set_option('display.max_columns', None)
pd.set_option("display.max_rows", 200)
pd.set_option('display.max_colwidth', None)

In [3]:
import pandas as pd

df_fishfacts = pd.read_csv('data/fish_data/fish_facts.csv')


In [4]:
df_fishfacts.columns

Index(['Unnamed: 0', 'class_names', 'class_names_transformed', 'taxonid',
       'scientific_name', 'kingdom', 'phylum', 'class', 'order', 'family',
       'genus', 'main_common_name', 'authority', 'published_year',
       'assessment_date', 'category', 'criteria', 'population_trend',
       'marine_system', 'freshwater_system', 'terrestrial_system', 'assessor',
       'reviewer', 'aoo_km2', 'eoo_km2', 'elevation_upper', 'elevation_lower',
       'depth_upper', 'depth_lower', 'errata_flag', 'errata_reason',
       'amended_flag', 'amended_reason'],
      dtype='object')

In [11]:
species = 'Istiophorus platypterus'
info_column = 'habitat'

df_fishfacts[df_fishfacts['class_names'] == species][info_column]

Series([], Name: habitat, dtype: object)

In [12]:
df_fishfacts.head(3)

Unnamed: 0.1,Unnamed: 0,class_names,class_names_transformed,taxonid,scientific_name,kingdom,phylum,class,order,family,genus,main_common_name,authority,published_year,assessment_date,category,criteria,population_trend,marine_system,freshwater_system,terrestrial_system,assessor,reviewer,aoo_km2,eoo_km2,elevation_upper,elevation_lower,depth_upper,depth_lower,errata_flag,errata_reason,amended_flag,amended_reason
0,0,A73EGS-P,A73egs-p,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,1,CUNWCB-Y,Cunwcb-y,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,2,Istiophorus_platypterus,Istiophorus platypterus,170338,Istiophorus platypterus,ANIMALIA,CHORDATA,ACTINOPTERYGII,PERCIFORMES,ISTIOPHORIDAE,Istiophorus,Sailfish,"(Shaw, 1792)",2022.0,2021-05-01,VU,A2bd,Decreasing,True,False,False,"Collette, B.B., Di Natale, A., Fox, W., Graves, J., Juan Jorda, M., Pohlot, B., Restrepo, V. & Schratwieser, J.","Polidoro, B., Kemppinen, K., Sun, C.-L., Pollard, D.A. & Hinton, M.",,,,,0.0,500.0,,,,


In [23]:
# workaround to JUST get the string from a query

conservation_status = df_fishfacts[df_fishfacts['class_names_transformed'] == species]["category"]
conservation_status = list(conservation_status)
conservation_status[0]


'VU'

In [21]:
print(df_species.loc[df_species['class_names_transformed'] == species]['category'])
print(df_fishfacts[df_fishfacts['class_names_transformed'] == species]["category"])


2    VU
Name: category, dtype: object

### IUCN category labels

* Extinct (EX) – beyond reasonable doubt that the species is no longer extant.
* Extinct in the wild (EW) – survives only in captivity, cultivation and/or outside native range, as presumed after exhaustive surveys.
* Critically endangered (CR) – in a particularly and extremely critical state.
* Endangered (EN) – very high risk of extinction in the wild, meets any of criteria A to E for Endangered.
* Vulnerable (VU) – meets one of the 5 Red List criteria and thus considered to be at high risk of unnatural (human-caused) extinction without further human intervention.
* Near threatened (NT) – close to being endangered in the near future.
* Least concern (LC) – unlikely to become endangered or extinct in the near future.
* Data deficient (DD)
* Not evaluated (NE)