In [1]:
import requests
from bs4 import BeautifulSoup as bs
import math
import re
import pandas as pd
from datetime import date as date_datetime
import datetime

def get_number_of_pages():
    r = requests.get("https://www.ukclimbing.com/logbook/showlog.php?id=242328&nresults=100&pg=1#my_logbook")
    soup = bs(r.text, "html.parser")
    soup = (soup.find_all("span", {"class": "d-none d-sm-inline"}))
    soup =  soup[1].get_text()
    total_climbs =  [int(s) for s in soup.split() if s.isdigit()][0]
    no_of_pages = math.ceil(total_climbs/100)

    return no_of_pages

def standardise_dates(list_of_dates):
    dates_1 = []
    for i in list_of_dates:
        try:
            dates_1.append(datetime.datetime.strptime(i, "%d %b, %Y"))
        except ValueError:
            year = date_datetime.today()
            year = year.year
            dates_1.append(datetime.datetime.strptime(i + ", "+str(year),  "%d %b, %Y"))
    return dates_1

def standardise_grades(list_of_grades):
    grades_1 =[]
    for i in list_of_grades:
        i = i.replace(" ", "")
        grades_1.append(i.replace("*",""))


    grades_1 = ['f3' if x=="VB" or x=="3" else x for x in grades_1]
    grades_1 = ['f3+' if x=="V0-" or x=="3+" else x for x in grades_1]
    grades_1 = ['f4' if x=="V0" or x=="4" else x for x in grades_1]
    grades_1 = ['f4+' if x=="V0+" or x=="4+" else x for x in grades_1]
    grades_1 = ['f5' if x=="V1" or x=="5" else x for x in grades_1]
    grades_1 = ['f5+' if x=="V2" or x=="5+" else x for x in grades_1]
    grades_1 = ['f6A' if x=="V3" or x=="6A" else x for x in grades_1]
    grades_1 = ['f6B' if x=="V4" or x=="6B" else x for x in grades_1]
    grades_1 = ['f6C' if x=="V5" or x=="6C" else x for x in grades_1]
    grades_1 = ['f7A' if x=="V6" or x=="7A" else x for x in grades_1]
    grades_1 = ['f7A+' if x=="V7" or x=="7A+" else x for x in grades_1]
    grades_1 = ['f7B' if x=="V8" or x=="7B" else x for x in grades_1]
    grades_1 = ['f7C' if x=="V9" or x=="7C" else x for x in grades_1]
    grades_1 = ['f7C+' if x=="V10" or x=="7C+" else x for x in grades_1]

    return grades_1

def scrape_logbook():
    names = []
    grades=[]
    dates=[]
    crags=[]
    n = get_number_of_pages()
    for i in range(1,n+1):
        UKC_logbook = "https://www.ukclimbing.com/logbook/showlog.php?id=242328&nresults=100&pg="+str(i)+"#my_logbook"
        r = requests.get(UKC_logbook)
        soup = bs(r.text, 'html.parser')
        Logbook = soup.find_all(id="myLogbookTable")[0]
        table = Logbook.find("tbody")
        rows = table.find_all("tr")
        # names = [i.find("a", {"class":"climbName"}).get_text() for i in rows]
        for i in rows:
            name = i.find("a", {"class":"climbName"}).get_text()
            grade = i.find("td", {"class": "grade"}).get_text()
            date = i.find("td", {"class":"logdate text-center text-nowrap"}).get_text()
            ven = (i.find_all("a", href=re.compile(r"^/logbook/crags/")))
            crag = ven[1].get_text()
            names.append(name)
            grades.append(grade)
            crags.append(crag)
            dates.append(date)

    dates_1 = standardise_dates(dates)

    grades_1= standardise_grades(grades)

    outdoor_bouldering_dataframe = pd.DataFrame(
        {'Name': names,
        'Grade': grades_1,
        'Crag': crags,
        'Date': dates_1
        })

    try:
        outdoor_bouldering_dataframe['Date'] =  pd.to_datetime(outdoor_bouldering_dataframe['Date'], 
            format='%d %b, %Y')
    except ValueError:
        outdoor_bouldering_dataframe['Date'] = outdoor_bouldering_dataframe['Date']

    outdoor_bouldering_dataframe.to_csv("../data/bouldering_data.csv")

scrape_logbook()
