In [9]:
import subprocess 
import sys
def install_bs4():
    subprocess.check_call([sys.executable, "-m", "pip", "install", "bs4"])

try:
    from bs4 import BeautifulSoup
except:
    install_bs4()
    from bs4 import BeautifulSoup

Collecting bs4
  Downloading bs4-0.0.1.tar.gz (1.1 kB)
Collecting beautifulsoup4
  Downloading beautifulsoup4-4.11.1-py3-none-any.whl (128 kB)
Collecting soupsieve>1.2
  Downloading soupsieve-2.3.2.post1-py3-none-any.whl (37 kB)
Building wheels for collected packages: bs4
  Building wheel for bs4 (setup.py): started
  Building wheel for bs4 (setup.py): finished with status 'done'
  Created wheel for bs4: filename=bs4-0.0.1-py3-none-any.whl size=1272 sha256=5a333c662fb988a5d9c43aa737274c8c2dbd1c4661f395c78736a6a31712e525
  Stored in directory: /home/shivaram/.cache/pip/wheels/73/2b/cb/099980278a0c9a3e57ff1a89875ec07bfa0b6fcbebb9a8cad3
Successfully built bs4
Installing collected packages: soupsieve, beautifulsoup4, bs4
Successfully installed beautifulsoup4-4.11.1 bs4-0.0.1 soupsieve-2.3.2.post1


In [13]:
#from bs4 import BeautifulSoup as bs
import pandas as pd
import requests
import os
import shutil
import urllib.request
import cv2
import pathlib
import random
import re

In [2]:
random.seed(5)

# Create Hawaii Fish Table
Scrape the relevant fish data from fishbase and generate a dataframe from it.

In [3]:
try:
    fish_df = pd.read_csv("data/fish_df.csv")
except:
    print("Fish Dataframe not Found. Regenerating...")
    html = requests.get("https://www.fishbase.se/Country/CountryChecklist.php?showAll=yes&c_code=840B&vhabitat=all2&cpresence=present")
    soup = bs(html.content, "html.parser")

    fish_df = pd.read_html(str(table))[0]

    # Add fish page Links
    links = []
    for anchor in table.findAll('a'):
        links.append("https://www.fishbase.se/Country/" + anchor["href"])
    fish_df["Link"] = links

    # Add fishbase ids
    fishbase_ids = [link.split('=')[-1] for link in links]
    fish_df.insert(0, "fishbase_id", fishbase_ids)

    # Add image counts and image page url for each species
    pic_links = ["https://www.fishbase.se/photos/thumbnailssummary.php?ID=" + fid for fid in fishbase_ids]  
    fish_df["image_page_link"] = pic_links
    
    image_counts = []
    for link in pic_links:
        page = requests.get(link)
        p_soup = bs(page.content, "html.parser")
        images = p_soup.findAll('img')
        image_counts.append(len(images))
    fish_df["image_counts"] = image_counts    

    fish_df.to_csv("data/fish_df.csv", index = False)

In [4]:
fish_df.head()

Unnamed: 0,fishbase_id,Order,Family,Species,Occurrence,FishBase name,Name,Link,image_counts,image_page_link
0,972,Beloniformes,Belonidae,Ablennes hians,native,Flat needlefish,'Aha'aha,https://www.fishbase.se/Country/CountrySpecies...,30,https://www.fishbase.se/photos/thumbnailssumma...
1,6652,Ovalentaria/misc,Pomacentridae,Abudefduf abdominalis,native,Green damselfish,Mamamo,https://www.fishbase.se/Country/CountrySpecies...,10,https://www.fishbase.se/photos/thumbnailssumma...
2,5689,Ovalentaria/misc,Pomacentridae,Abudefduf sordidus,native,Blackspot sergeant,Ao'aonui,https://www.fishbase.se/Country/CountrySpecies...,69,https://www.fishbase.se/photos/thumbnailssumma...
3,6630,Ovalentaria/misc,Pomacentridae,Abudefduf vaigiensis,native,Indo-Pacific sergeant,Indo-Pacific sergeant,https://www.fishbase.se/Country/CountrySpecies...,145,https://www.fishbase.se/photos/thumbnailssumma...
4,89,Scombriformes,Scombridae,Acanthocybium solandri,native,Wahoo,Ono,https://www.fishbase.se/Country/CountrySpecies...,111,https://www.fishbase.se/photos/thumbnailssumma...


# Download Images from Fishbase
Download all of the images from fishbase that correspond to the species pulled from the database above.

In [5]:
if not os.path.exists("data/fishbase_images/"):
    os.makedirs("data/fishbase_images/")

In [6]:
for ind, row in fish_df.iterrows():
    id_path = "data/fishbase_images/" + str(row["fishbase_id"])
    if os.path.exists(id_path):
        #print(str(row["fishbase_id"]) + " already generated. Skipping...")
        continue
    print(str(row["fishbase_id"]) + " is being downloaded...")
        
    os.makedirs(id_path)
        
    html = requests.get(row["image_page_link"])
    p_soup = bs(html.content, "html.parser")
    
    img_tags = p_soup.find_all('img')
    urls = []
    for url in [img['src'] for img in img_tags]:
        if (len(url.split('/'))) < 2 or url.split('/')[2] == "thumbnails" or url.split('.')[-1] not in set(["jpg", "gif", "png", "jpeg"]) :
            continue
        urls.append(url)
        
    base_url = "https://www.fishbase.se/"
    
    # Download Images
    for url in urls:
        full_url = base_url + url[3:]
        save_path = id_path+ "/" + url.split('/')[-1]
        try: 
            urllib.request.urlretrieve(full_url, save_path)
        except:
            print(f"{full_url} did not download successfully. Skipping")

# Generate Is Fish Classifier Dataset
To generate the Is Fish dataset, I resized the fishbase images to 224x224 and saved them to train, test, and val directories in a 6/2/2 split. I then further augmented this data using mini imagenet images. For these, I found all of the fish and non-fish images through a combination of manual and automated parsing. The imagenet images had a train and a validation set, so I took about 20% of each of them and put them in the test set. The rest went to their respective directories. 

At the end of this processing, I ended up with train, test, and validation directories, each of which has both the fish and the not_fish subdirectories. At the end of this, the train/val/test split was 33,386/5,112/9,616 or 69.4%-10.6%-20%.


### Generate Directories

In [91]:
paths = ["data/is_fish_images/test/fish", "data/is_fish_images/test/not_fish", "data/is_fish_images/train/fish", 
         "data/is_fish_images/train/not_fish", "data/is_fish_images/val/fish", "data/is_fish_images/val/not_fish"]

for path in paths:
    if not os.path.exists(path):
        os.makedirs(path)


### Resize Fishbase Images for Fish Classifier

In [92]:
fishbase_path = "data/fishbase_images/"
fishbase_images = [os.path.join(path, name) for path, subdirs, files in os.walk(fishbase_path) for name in files if os.path.join(path, name).split('.')[-1]in set(["jpg", "png", "jpeg"])]

In [93]:
# Generate small images of fish
for path in fishbase_images:
    save_spot = (random.randint(0, 9))
    # Save to training 
    if save_spot  <=5:
        save_path = "data/is_fish_images/train/fish/" + path.split('/')[-1]
        alt_path = "data/is_fish_images/test/fish/" + path.split('/')[-1]
        alt2_path = "data/is_fish_images/val/fish/" + path.split('/')[-1]
    # Save to test
    elif save_spot <=7:
        save_path = "data/is_fish_images/test/fish/" + path.split('/')[-1]
        alt_path = "data/is_fish_images/train/fish/" + path.split('/')[-1]
        alt2_path = "data/is_fish_images/val/fish/" + path.split('/')[-1]
    # Save to validation
    else:
        save_path = "data/is_fish_images/val/fish/" + path.split('/')[-1]
        alt_path = "data/is_fish_images/test/fish/" + path.split('/')[-1]
        alt2_path = "data/is_fish_images/train/fish/" + path.split('/')[-1]
    if os.path.exists(save_path) or os.path.exists(alt_path) or os.path.exists(alt2_path): 
        continue
        
    img = cv2.imread(path, 1)
    try:
        img_stretch = cv2.resize(img, (224, 224))
    except:
        print(f"{path} resize failed. Skipping...")
        continue
      
    cv2.imwrite(save_path, img_stretch)

    


data/fishbase_images/1261/acxan_u3.jpg resize failed. Skipping...
data/fishbase_images/457/1378126376_50.150.156.71.jpg resize failed. Skipping...


### Incorporate Mini Imagenet

In [94]:
# Get all of the potential classes that could be a fish
imagenet_mapping = pd.read_csv("imagenet/words.txt", sep = "	", header = None, names = ["id", "class"])
potential_fish_rows = imagenet_mapping[imagenet_mapping["class"].str.contains("fish") | 
                                  imagenet_mapping["class"].str.contains("[ ]?shark[ ]?") | 
                                  imagenet_mapping["class"].str.contains("eel") | 
                                  imagenet_mapping["class"].str.contains("ray")]
potential_fish_ids = set(potential_fish_rows["id"])

At this point, I looked through all of the potential fish ids and all of the potential not-fish ids to find which ones did not belong. Based on this, I generated the lists of fish ids and list of ambiguous ids. If an id was not ambiguous or not a fish id, then I classified it as a not fish id. Unfortunately, none of the ambiguous classes were in the mini imagenet, so this step turned out to be uneccesary.

In [95]:
fish_id_string = """    - n01428580
    - n01429349
    - n01432517
    - n01438208
    - n01439121
    - n01443537
    - n01444339
    - n01445429
    - n01446760
    - n01447331
    - n01447946
    - n01448594
    - n01448951
    - n01449374
    n01450661                                       squirrelfish
7378   n01450950            reef squirrelfish, Holocentrus coruscus
7379   n01451115        deepwater squirrelfish, Holocentrus bullisi
7381   n01451426                          soldierfish, soldier-fish
7384   n01451863                         anomalops, flashlight fish
7387   n01452345        flashlight fish, Photoblepharon palpebratus
7395   n01453475                              boarfish, Capros aper
7397   n01453742                                           boarfish
7401   n01454545                                         cornetfish
7408   n01455778                               pipefish, needlefish
7410   n01456137             dwarf pipefish, Syngnathus hildebrandi
7412   n01456454          deepwater pipefish, Cosmocampus profundus
7416   n01457082                            snipefish, bellows fish
7418   n01457407                            shrimpfish, shrimp-fish
7421   n01457852                  trumpetfish, Aulostomus maculatus
7505   n01474283         jawless vertebrate, jawless fish, agnathan
7521   n01477525                   lamprey, lamprey eel, lamper eel
7526   n01478511                           hagfish, hag, slime eels
7538   n01480516                 cartilaginous fish, chondrichthian
7544   n01481498                     rabbitfish, Chimaera monstrosa
7547   n01482330                                              shark
7550   n01483021     cow shark, six-gilled shark, Hexanchus griseus
7553   n01483522                                     mackerel shark
7557   n01484097                                   mako, mako shark
7560   n01484562         bonito shark, blue pointed, Isurus glaucus
7562   n01484850  great white shark, white shark, man-eater, man...
7565   n01485479                  basking shark, Cetorhinus maximus
7568   n01486010  thresher, thrasher, thresher shark, fox shark,...
7571   n01486540                 carpet shark, Orectolobus barbatus
7573   n01486838                nurse shark, Ginglymostoma cirratum
7576   n01487506  sand tiger, sand shark, Carcharias taurus, Odo...
7579   n01488038                       whale shark, Rhincodon typus
7581   n01488385                                          cat shark
7583   n01488918                                      requiem shark
7585   n01489501         bull shark, cub shark, Carcharhinus leucas
7586   n01489709               sandbar shark, Carcharhinus plumbeus
7587   n01489920  blacktip shark, sandbar shark, Carcharhinus li...
7588   n01490112  whitetip shark, oceanic whitetip shark, white-...
7589   n01490360                 dusky shark, Carcharhinus obscurus
7591   n01490670                lemon shark, Negaprion brevirostris
7593   n01491006      blue shark, great blue shark, Prionace glauca
7595   n01491361                    tiger shark, Galeocerdo cuvieri
7597   n01491661  soupfin shark, soupfin, soup-fin, Galeorhinus ...
7598   n01491874                                            dogfish
7601   n01492357                                     smooth dogfish
7602   n01492569  smoothhound, smoothhound shark, Mustelus mustelus
7603   n01492708            American smooth dogfish, Mustelus canis
7606   n01493146  whitetip shark, reef whitetip shark, Triaenodo...
7608   n01493541                                      spiny dogfish
7610   n01493829          Atlantic spiny dogfish, Squalus acanthias
7611   n01494041            Pacific spiny dogfish, Squalus suckleyi
7614   n01494475                       hammerhead, hammerhead shark
7617   n01495006  shovelhead, bonnethead, bonnet shark, Sphyrna ...
7620   n01495493  angel shark, angelfish, Squatina squatina, mon...
7621   n01495701                                                ray
7624   n01496331         electric ray, crampfish, numbfish, torpedo
7627   n01497118                                            sawfish
7629   n01497413             smalltooth sawfish, Pristis pectinatus
7631   n01497738                                         guitarfish
7633   n01498041                                           stingray
7635   n01498406             roughtail stingray, Dasyatis centroura
7637   n01498699                                      butterfly ray
7639   n01498989                                          eagle ray
7641   n01499396  spotted eagle ray, spotted ray, Aetobatus nari...
7643   n01499732     cownose ray, cow-nosed ray, Rhinoptera bonasus
7645   n01500091                        manta, manta ray, devilfish
7649   n01500854
n02512053                                               fish
13337  n02512830                              game fish, sport fish
13338  n02512938                                          food fish
13339  n02513248                                         rough fish
13340  n02513355                            groundfish, bottom fish
13341  n02513560                                         young fish
13349  n02514825                                          bony fish
13351  n02515214         crossopterygian, lobefin, lobe-finned fish
13356  n02516188                                           lungfish
13361  n02516994  Australian lungfish, Queensland lungfish, Neoc...
13363  n02517442                           catfish, siluriform fish
13365  n02517938                              silurid, silurid fish
13367  n02518324        European catfish, sheatfish, Silurus glanis
13369  n02518622          electric catfish, Malopterurus electricus
13372  n02519148                         bullhead, bullhead catfish
13376  n02519686  channel catfish, channel cat, Ictalurus punctatus
13377  n02519862  blue catfish, blue cat, blue channel catfish, ...
13379  n02520147  flathead catfish, mudcat, goujon, shovelnose c...
13381  n02520525                                    armored catfish
13383  n02520810                                        sea catfish
13385  n02521129                                      crucifix fish
13388  n02521646                                gadoid, gadoid fish
13391  n02522399
n02523427             burbot, eelpout, ling, cusk, Lota lota
13414  n02525703                   grenadier, rattail, rattail fish
13416  n02526121                                                eel
13420  n02526818                         common eel, freshwater eel
13423  n02527271                                   moray, moray eel
13425  n02527622                                 conger, conger eel
13427  n02528163                   teleost fish, teleost, teleostan
13431  n02529293  beaked salmon, sandfish, Gonorhynchus gonorhyn...
13433  n02529772                              clupeid fish, clupeid
13464  n02535163                                          blackfish
13465  n02535258                                            redfish
13484  n02538985                                          whitefish
13486  n02539424             lake whitefish, Coregonus clupeaformis
13489  n02539894  round whitefish, Menominee whitefish, Prosopiu...
13490  n02540091   Rocky Mountain whitefish, Prosopium williamsonii
13502  n02542017                 ladyfish, tenpounder, Elops saurus
13505  n02542432                            bonefish, Albula vulpes
13510  n02543255                                        lanternfish
13512  n02543565                  lizardfish, snakefish, snake-fish
13516  n02544274                  lancetfish, lancet fish, wolffish
13517  n02544475                                       handsaw fish
13525  n02545841                     opah, moonfish, Lampris regius
13528  n02546331                                         ribbonfish
13530  n02546627                    dealfish, Trachipterus arcticus
13533  n02547014  oarfish, king of the herring, ribbonfish, Rega...
13536  n02547733                                            batfish
13539  n02548247  goosefish, angler, anglerfish, angler fish, mo...
13541  n02548689                              toadfish, Opsanus tau
13542  n02548884               oyster fish, oyster-fish, oysterfish
13544  n02549248                                           frogfish
13545  n02549376                                     sargassum fish
13548  n02549989                          needlefish, gar, billfish
13551  n02550460                                        flying fish
13552  n02550655        monoplane flying fish, two-wing flying fish
13553  n02550780         biplane flying fish, four-wing flying fish
13558  n02551668                saury, billfish, Scomberesox saurus
13560  n02552171                spiny-finned fish, acanthopterygian
13566  n02554730                  percoid fish, percoid, percoidean
13583  n02558560                                           sandfish
13585  n02558860                                           cusk-eel
13589  n02559383                              pearlfish, pearl-fish
13606  n02562315                               sunfish, centrarchid
13615  n02563949   spotted sunfish, stumpknocker, Lepomis punctatus
13618  n02564403     rock bass, rock sunfish, Ambloplites rupestris
13626  n02566109                            serranid fish, serranid
13637  n02568087  striped bass, striper, Roccus saxatilis, rockfish
13639  n02568447        stone bass, wreckfish, Polyprion americanus
13641  n02568807              belted sandfish, Serranus subligarius
13648  n02569905                   creole-fish, Paranthias furcifer
13650  n02570164                       jewfish, Mycteroperca bonaci
13652  n02570484                                           soapfish
13654  n02570838                     surfperch, surffish, surf fish
13662  n02572196                                       cardinalfish
13664  n02572484            flame fish, flamefish, Apogon maculatus
13666  n02572763                    conchfish, Astropogon stellatus
13669  n02573249            tilefish, Lopholatilus chamaeleonticeps
13672  n02573704                      bluefish, Pomatomus saltatrix
13675  n02574271         cobia, Rachycentron canadum, sergeant fish
13678  n02574910                   remora, suckerfish, sucking fish
13680  n02575325                    sharksucker, Echeneis naucrates
13684  n02576223                            carangid fish, carangid
13695  n02577952          threadfish, thread-fish, Alectis ciliaris
13697  n02578233  moonfish, Atlantic moonfish, horsefish, horseh...
13698  n02578454              lookdown, lookdown fish, Selene vomer
13700  n02578771                               amberjack, amberfish
13702  n02579091      rudderfish, banded rudderfish, Seriola zonata
13703  n02579303                          kingfish, Seriola grandis
13709  n02580188                        pilotfish, Naucrates ductor
13718  n02581642  round scad, cigarfish, quiaquia, Decapterus pu...
13720  n02581957                     dolphinfish, dolphin, mahimahi
13727  n02583096                               blanquillo, tilefish
13730  n02583567                  characin, characin fish, characid
13738  n02584915                                    antenna, feeler
13740  n02585285                                     barbel, feeler
13743  n02585872                              cichlid, cichlid fish
13750  n02587300  grey snapper, gray snapper, mangrove snapper, ...
13751  n02587479        mutton snapper, muttonfish, Lutjanus analis
13764  n02589623        porkfish, pork-fish, Anisotremus virginicus
13767  n02590094        pigfish, hogfish, Orthopristis chrysopterus
13769  n02590495                                sparid, sparid fish
13780  n02592371  pinfish, sailor's-choice, squirrelfish, Lagodo...
13790  n02594250                            sciaenid fish, sciaenid
13791  n02594552                                     drum, drumfish
13794  n02595056                jackknife-fish, Equetus lanceolatus
13798  n02595702  red drum, channel bass, redfish, Sciaenops oce...
13800  n02596067              mulloway, jewfish, Sciaena antarctica
13806  n02597004  yellowfin croaker, surffish, surf fish, Umbrin...
13809  n02597608                                           kingfish
13815  n02598573  white croaker, chenfish, kingfish, Genyonemus ...
13817  n02598878         white croaker, queenfish, Seriphus politus
13820  n02599347                        weakfish, Cynoscion regalis
13821  n02599557  spotted weakfish, spotted sea trout, spotted s...
13825  n02600298  goatfish, red mullet, surmullet, Mullus surmul...
13826  n02600503                       red goatfish, Mullus auratus
13828  n02600798         yellow goatfish, Mulloidichthys martinicus
13831  n02601344                   mullet, grey mullet, gray mullet
13849  n02604480       Bermuda chub, rudderfish, Kyphosus sectatrix
13852  n02604954         spadefish, angelfish, Chaetodipterus faber
13854  n02605316                                     butterfly fish
13858  n02605936                                          angelfish
13861  n02606384                             damselfish, demoiselle
13865  n02607072                                       anemone fish
13866  n02607201             clown anemone fish, Amphiprion percula
13872  n02608284         pigfish, giant pigfish, Achoerodus gouldii
13874  n02608547         hogfish, hog snapper, Lachnolaimus maximus
13881  n02609617                             razor fish, razor-fish
13882  n02609823          pearly razorfish, Hemipteronatus novacula
13884  n02610066                  tautog, blackfish, Tautoga onitis
13888  n02610664                  parrotfish, polly fish, pollyfish
13894  n02611561                                            jawfish
13900  n02612657                          blennioid fish, blennioid
13908  n02614140                                clinid, clinid fish
13915  n02615298          rock gunnel, butterfish, Pholis gunnellus
13920  n02616128                                          eelblenny
13922  n02616397     wrymouth, ghostfish, Cryptacanthodes maculatus
13925  n02616851                       wolffish, wolf fish, catfish
13927  n02617207                                      eelpout, pout
13929  n02617537              viviparous eelpout, Zoarces viviparus
13931  n02617819                      fish doctor, Gymnelis viridis
13936  n02618513          sand lance, sand launce, sand eel, launce
13949  n02620578                     archerfish, Toxotes jaculatrix
13951  n02620956                                          worm fish
13953  n02621258                                        surgeonfish
13955  n02621577      doctorfish, doctor-fish, Acanthurus chirurgus
13962  n02622712                        oilfish, Ruvettus pretiosus
13964  n02622955                  cutlassfish, frost fish, hairtail
13966  n02623445                          scombroid, scombroid fish
13979  n02626265     cero, pintado, kingfish, Scomberomorus regalis
13997  n02629716                         swordfish, Xiphias gladius
13999  n02630281                                           sailfish
14001  n02630615            Atlantic sailfish, Istiophorus albicans
14002  n02630739                                           billfish
14010  n02632039                                          spearfish
14015  n02632989            butterfish, stromateid fish, stromateid
14017  n02633422                  dollarfish, Poronotus triacanthus
14021  n02633977                   harvestfish, Paprilus alepidotus
14023  n02634285                                          driftfish
14025  n02634545                                          driftfish
14029  n02635154  barrelfish, black rudderfish, Hyperglyphe perc...
14032  n02635580                                          clingfish
14033  n02635781      skillet fish, skilletfish, Gobiesox strumosus
14050  n02638596                                ganoid, ganoid fish
14053  n02639087               bowfin, grindle, dogfish, Amia calva
14056  n02639605            paddlefish, duckbill, Polyodon spathula
14058  n02639922               Chinese paddlefish, Psephurus gladis
14066  n02641379  gar, garfish, garpike, billfish, Lepisosteus o...
14069  n02642107                      scorpaenoid, scorpaenoid fish
14071  n02642644                        scorpaenid, scorpaenid fish
14073  n02643112          scorpionfish, scorpion fish, sea scorpion
14074  n02643316        plumed scorpionfish, Scorpaena grandicornis
14076  n02643566                                           lionfish
14078  n02643836                     stonefish, Synanceja verrucosa
14080  n02644113                                           rockfish
14081  n02644360               copper rockfish, Sebastodes caurinus
14082  n02644501   vermillion rockfish, rasher, Sebastodes miniatus
14083  n02644665                red rockfish, Sebastodes ruberrimus
14084  n02644817          rosefish, ocean perch, Sebastodes marinus
14096  n02646667                       lumpfish, Cyclopterus lumpus
14100  n02647294    snailfish, seasnail, sea snail, Liparis liparis
14106  n02648313       alligatorfish, Aspidophoroides monopterygius
14112  n02649218  painted greenling, convict fish, convictfish, ...
14128  n02652132        flying gurnard, flying robin, butterflyfish
14130  n02652668                      plectognath, plectognath fish
14132  n02653145                                        triggerfish
14134  n02653497  queen triggerfish, Bessy cerca, oldwench, oldw...
14136  n02653786                                           filefish
14138  n02654112                         leatherjacket, leatherfish
14140  n02654425                                 boxfish, trunkfish
14142  n02654745                   cowfish, Lactophrys quadricornis
14144  n02655020            puffer, pufferfish, blowfish, globefish
14148  n02655848      porcupinefish, porcupine fish, Diodon hystrix
14149  n02656032                    balloonfish, Diodon holocanthus
14151  n02656301                                           burrfish
14154  n02656670             ocean sunfish, sunfish, mola, headfish
14157  n02657368                                           flatfish
14164  n02658811              European flatfish, Platichthys flesus
14183  n02661892      grey flounder, gray flounder, Etropus rimosus
14194  n02663485
n07775375                                               fish
42398  n07775905                                     saltwater fish
42399  n07776545                                    freshwater fish
42413  n07778938                              dolphinfish, mahimahi
42415  n07779263                                        buffalofish
42419  n07779747                                           monkfish
42421  n07780038                                    catfish, mudcat
42423  n07780307                                            sunfish
42425  n07780627                             tuna, tuna fish, tunny
42433  n07781972            blowfish, sea squab, puffer, pufferfish
42439  n07782939                                            panfish
42440  n07783102                                          stockfish
42441  n07783210                                          shellfish
42445  n07783967                                                eel
42446  n07784197                                         smoked eel
42448  n07784367                   mullet, grey mullet, gray mullet
42450  n07784810                                           kingfish
42457  n07785783                                           bluefish
42458  n07785887                                          swordfish
42459  n07786005                                         butterfish
42477  n07788885             crayfish, crawfish, crawdad, ecrevisse
42478  n07789063                                       cod, codfish
42486  n07790400                                           flatfish
42493  n07791434                               grey sole, gray sole
42499  n07792117                     redfish, rosefish, ocean perch
42500  n07792219                                           rockfish
42501  n07792383                                           sailfish
42502  n07792470                                           weakfish
42507  n07793260   spiny lobster, langouste, rock lobster, crayfish
42520  n07795133                                          whitefish
42521  n07795317                                          whitefish
42535  n07797518                                       red rockfish
n10586557                                              shark
57371  n10586674


ADDED FROM NOT FISH

    - n01440764 
    - n02514041
    - n02536864
    - n02606052
    - n02649242


"""

ambiguous_string = """
ambiguous in fish:
    - n00434657
    - n00453935
    - n00454121
    - n00454395
    - n00455173
    - n01046571
    - n02732072
    - n03350204
    n03350352                                   fisherman's bend
18225  n03350456  fisherman's knot, true lover's knot, truelove ...
18226  n03350602                        fisherman's lure, fish lure
18227  n03350880                                   fishery, piscary
18228  n03351036                                          fish farm
18229  n03351151                                           fishhook
18230  n03351262        fishing boat, fishing smack, fishing vessel
18231  n03351434  fishing gear, tackle, fishing tackle, fishing ...
18232  n03351768                                       fishing line
18233  n03351979                          fishing rod, fishing pole
18234  n03352232                                         fish joint
18235  n03352366                                         fish knife
18236  n03352484                                        fish ladder
18237  n03352628                               fishnet, fishing net
18238  n03352853                                          fishplate
18239  n03352961                                         fish slice
18240  n03353147                            fishtail bit, blade bit
18348  n03369512                                     fluid flywheel
18380  n03374649
n07577144                                           fish fry
41108  n07588111                                       fish chowder
n07808352                        tuna fish salad, tuna salad
42933  n07856992                                          fishpaste
42992  n07865575                         codfish ball, codfish cake
43005  n07867324                                     fish and chips
43016  n07868955                            gefilte fish, fish ball
43066  n07875693                                          fish loaf
43223  n07899769                                            Vouvray
43449  n07931733                                   fish house punch
43470  n07934800
41133  n07591473
n08111027 
n09280913
"""

In [96]:
fish_ids = re.findall(r'\bn0\w+', fish_id_string)
ambiguous_ids = re.findall(r'\bn0\w+', ambiguous_string)

In [97]:
def move_files(source_path, target_path, fish_ids=fish_ids, ambiguous_ids = ambiguous_ids, size = (224, 224)):
    # For each class type in mini imagenet
    for dir_path in os.listdir(source_path):
        
        # If the class is unclear, skip.
        if dir_path in ambiguous_ids:
            print(f"Skipped {dir_path}")
            continue
        # Elif the class is a fish type, set save_path to fish
        elif dir_path in fish_ids:
            save_class =  "fish" 
        # Otherwise, set save_path tp not fish
        else:
            save_class =   "not_fish" 

        files = os.listdir(source_path + dir_path)
        for file in files:
            test_save = (random.randint(0, 4) == 0)
            if test_save:
                file_save_path = "data/is_fish_images/test/" + save_class + "/" + file
                alt_fsp = target_path + save_class + "/" + file
            else:
                file_save_path = target_path + save_class + "/" + file
                alt_fsp = "data/is_fish_images/test/" + save_class + "/" + file
                
            file_source_path = source_path + dir_path + "/" + file
            if not os.path.exists(file_save_path) and not os.path.exists(alt_fsp):
                img = cv2.imread(file_source_path, 1)
                img_stretch = cv2.resize(img, size)
                try:
                    img_stretch = cv2.resize(img, size)
                except:
                    print(f"{path} resize failed. Skipping...")
                    continue

                cv2.imwrite(file_save_path, img_stretch)
    

In [98]:
# Move Training and validation Files (put 20% of each in test)
move_files("imagenet/train/", "data/is_fish_images/train/")
move_files("imagenet/val/", "data/is_fish_images/val/")

### Augment IsFish with Relevant Examples

In [14]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import requests
from urllib.request import urlopen

from PIL import Image

from WebScraping import search_and_download

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))




Current google-chrome version is 101.0.4951
Get LATEST chromedriver version for 101.0.4951 google-chrome
Trying to download new driver from https://chromedriver.storage.googleapis.com/101.0.4951.41/chromedriver_linux64.zip
Driver has been saved in cache [/home/shivaram/.wdm/drivers/chromedriver/linux64/101.0.4951.41]


In [19]:
terms = ["species range map", "scientific academic journal sample", "aquatic range map"]
save_folder = ""

for term in terms:
    search_and_download(term, save_folder = term, driver = driver, target_path = "/media/shivaram/SharedVolum/Projects/FishID/extra_scraped/", number_images = 500)

In [30]:
#Resize and Move to IsFish Directory
size =  (224, 224)
save_directories = ["/media/shivaram/SharedVolum/Projects/FishID/extra_scraped/" + term + "/" for term in terms]
for s_dir in save_directories:
    for file in os.listdir(s_dir):
        saving = random.randint(0, 9)
        
        # Test
        if saving == 0 or saving == 1:
            save_path = "data/is_fish_images/test/not_fish/" + file
            alt_save_path = "data/is_fish_images/train/not_fish/" + file
            alt_save_path_2 = "data/is_fish_images/val/not_fish/" + file
        # Train
        elif saving <=8:
            save_path = "data/is_fish_images/train/not_fish/" + file
            alt_save_path = "data/is_fish_images/test/not_fish/" + file
            alt_save_path_2 = "data/is_fish_images/val/not_fish/" + file
            
        # Val
        else:
            save_path = "data/is_fish_images/val/not_fish/" + file
            alt_save_path = "data/is_fish_images/train/not_fish/" + file
            alt_save_path_2 = "data/is_fish_images/test/not_fish/" + file
        
        file_path = s_dir + file
        # If image hasn't already been transferred, do so
        if not os.path.exists(save_path) and not os.path.exists(alt_save_path) and not os.path.exists(alt_save_path_2):
                img = cv2.imread(file_path, 1)
                img_stretch = cv2.resize(img, size)
                try:
                    img_stretch = cv2.resize(img, size)
                except:
                    print(f"{file} resize failed. Skipping...")
                    continue
                cv2.imwrite(save_path, img_stretch)

NameError: name 'size' is not defined

In [None]:
resize and move to not isfish folders