In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

In [4]:
fish_data = []
titles = ['none', 'date', 'fish_type', 'origin', 'gender', 'weight', 'size', 'catch_release', 'position', 'position_number', 'position_name', 'fly_or_spinner', 'bait', 'bait_size', 'extra_1', 'extra_2']


for year in range(2010, 2022):
    
    r = requests.get(f"https://www.fnjoska.is/logbooks/fnjoska/{year}/")
    soup = BeautifulSoup(r.content)
    
    for td in soup.select("tr"):
        content = [_.text for _ in td.select('td')]
        if len(content) <= 0:
            continue

        data = {}
        for key, value in zip(titles, content):
            data[key] = value
        fish_data.append(data)
    
    print(f"{year} finished. count: ", len(fish_data))

    time.sleep(5) # Always be kind to the system admins.

2010 finished. count:  1047
2011 finished. count:  1728
2012 finished. count:  1993
2013 finished. count:  2799
2014 finished. count:  3543
2015 finished. count:  4498
2016 finished. count:  5010
2017 finished. count:  5517
2018 finished. count:  5864
2019 finished. count:  6261
2020 finished. count:  6883
2021 finished. count:  7309


In [5]:
df = pd.DataFrame(fish_data).drop("none", axis=1)

In [6]:
df.head(5)

Unnamed: 0,date,fish_type,origin,gender,weight,size,catch_release,position,position_number,position_name,fly_or_spinner,bait,bait_size,extra_1,extra_2
0,19.9.2010,Lax,,Hrygna,63,850,X,Svæði 4,65,Mógilsbreiða,Fluga,Þýsk snælda,"1 1/2""",,
1,19.9.2010,Lax,,Hængur,25,640,,Svæði 2,33,Ferjupollur,Fluga,Sunray,14,Hreistursýni,
2,19.9.2010,Lax,,Hrygna,18,550,,Svæði 2,28,Árbugsárós,Fluga,Skröggur,"1""",Sleppt,
3,19.9.2010,Lax,,Hængur,53,800,,Svæði 2,34,Sandur,Fluga,Rauð frances keila,"1""",,
4,19.9.2010,Lax,,Hængur,25,610,,Svæði 2,33,Ferjupollur,Fluga,Sunray keila,,,


In [7]:
df['date'] = pd.to_datetime(df['date'], format='%d.%m.%Y')
df['weight'] = pd.to_numeric(df['weight'].apply(lambda r: r.replace('*', '').replace(',', '.')))
df['size'] = pd.to_numeric(df['size'].apply(lambda r: r.replace('*', '').replace(',', '.')))
df['position_number'] = df['position_number'].str.strip()

In [8]:
r = requests.get("https://www.fnjoska.is/areapools/fnjoska")
soup = BeautifulSoup(r.content)

position_data = []
for td in soup.select("tr"):
    content = [_.text for _ in td.select('td')]
    if len(content) <= 0:
        continue
    position_data.append({
        'position_number': content[0].split(' - ')[0].strip(),
        'position_name': content[0].split(' - ')[1].strip()
    })


In [9]:
df_positions = pd.DataFrame(position_data)
df_positions.head()

Unnamed: 0,position_number,position_name
0,1,Hríslubreiða
1,2,Klapparhylur
2,3,Skúlaskeið
3,4,Bjarghorn
4,5,Laufáshola


In [10]:
def position(n):
    if int(n) <= 22:
        return "Svæði 1"
    elif int(n) <= 38:
        return "Svæði 2"
    elif int(n) <= 52:
        return "Svæði 3"
    return "Svæði 4"

In [11]:
df_positions['position'] = df_positions['position_number'].apply(position)

In [12]:
df_merge = df.drop(['position_name', 'position'], axis=1).merge(df_positions, on='position_number')

In [14]:
df_merge.head(5)

Unnamed: 0,date,fish_type,origin,gender,weight,size,catch_release,position_number,fly_or_spinner,bait,bait_size,extra_1,extra_2,position_name,position
0,2010-09-19,Lax,,Hrygna,6.3,85.0,X,65,Fluga,Þýsk snælda,"1 1/2""",,,Mógilsbreiða,Svæði 4
1,2010-09-16,Lax,,Hrygna,5.0,78.0,X,65,Fluga,Frances,"1""",,,Mógilsbreiða,Svæði 4
2,2010-09-15,Lax,,Hængur,2.1,58.0,,65,Fluga,Frances,"1""",,,Mógilsbreiða,Svæði 4
3,2010-09-12,Lax,,Hrygna,6.1,84.0,X,65,Fluga,Rauð frances,"1/2""",,,Mógilsbreiða,Svæði 4
4,2010-09-11,Lax,,Hængur,1.8,54.0,,65,Fluga,Frances blá,"1/2""",,,Mógilsbreiða,Svæði 4


In [13]:
df_merge.to_csv("fnjoska.csv", index=False)