In [None]:
!pip3 install icalendar
!pip3 install beautifulsoup4
!pip3 install requests
!pip3 install python-slugify
!pip3 install pandas
!pip3 install icalendar
!pip3 install uuid

In [None]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import re
import slugify
import os

clubs_url = "http://www.cpliege.be/caleclub.asp"

clubs_url_html = requests.get(clubs_url).text

soup = BeautifulSoup(clubs_url_html, 'html.parser')

# read html and get every links
clubs = soup.find_all('a')

# tranform to get a dict with club name and url
clubs_dict = {}
for club in clubs:
    # removes "all whitespace characters (space, tab, newline, return, formfeed)"
    club_name = " ".join(club.text.split())
    clubs_dict[club_name] = "http://www.cpliege.be/" + club['href']
    
print(clubs_dict)    


In [None]:

def get_club_agenda(club_url):
    agenda = pd.read_html(club_url, header=5)[0]

    # remove if column "Unnamed: 7" is empty OR starts with "(" and ends with ")"
    agenda = agenda[~(agenda["Unnamed: 7"].isnull() | agenda["Unnamed: 7"].str.startswith("(") & agenda["Unnamed: 7"].str.endswith(")"))]
        
    print(len(agenda))

    # rename columns
    agenda.columns = ["Code", "Unknown", "Weekday", "Date", "Heure", "Équipe 1", "Équipe 2", "Catégorie", "Autre"]

    # drop "Unknown" column
    agenda.drop("Unknown", axis=1, inplace=True)

    agenda = agenda[~agenda["Date"].isnull()]
    
    # Heure to string
    agenda["Heure"] = agenda["Heure"].astype(str)

    # replace . and ; in Heure by :
    for char in [".", ";"]:
        agenda["Heure"] = agenda["Heure"].str.replace(char, ":", regex=False)
    
    # if Heure contains only one number after : add a 0
    agenda["Heure"] = agenda["Heure"].apply(lambda x: x if len(x.split(":")[1]) == 2 else x + "0")

    # remove Weekday column
    agenda.drop("Weekday", axis=1, inplace=True)

    # Date as datetime
    agenda["Date"] = pd.to_datetime(agenda["Date"], format="%d/%m/%y")

    # order by catégorie and then by date
    agenda.sort_values(by=["Catégorie", "Date"], inplace=True)
 
    return agenda

In [None]:
from icalendar import Calendar, Event
import uuid
from datetime import datetime

def generate_ics(agenda_category, filepath, club_name, club_url):
    
    filename = filepath + "/" + slugify.slugify(agenda_category["Catégorie"].iloc[0]) + ".ics"

    cal_name = club_name + " - " + agenda_category["Catégorie"].iloc[0]
    
    cal = Calendar()
    
    cal.add('prodid', 'Made with ❤️ by Martin Erpicum')
    cal.add('version', '2.0')
    cal.add('method', "PUBLISH")
    cal.add('X-WR-TIMEZONE', "Europe/Brussels")
    cal.add('X-WR-CALNAME', cal_name)
    cal.add('X-WR-CALDESC', cal_name)
    cal.add('X-WR-TIMEZONE', "Europe/Brussels")
        
    for event in agenda_category.iterrows():
        
        e = Event()
          
        name = '🏀 ' + event[1]["Catégorie"] + ': '+ event[1]["Équipe 1"] + " et " + event[1]["Équipe 2"]
        
        startime = pd.to_datetime(event[1]["Date"]).strftime("%Y-%m-%d") + " " + event[1]["Heure"]  
        endtime = pd.to_datetime(startime) + pd.Timedelta(minutes=120)
        startime = pd.to_datetime(startime).tz_localize('Europe/Brussels')
        endtime = pd.to_datetime(endtime).tz_localize('Europe/Brussels')
        
        # if ["Équipe 1"] is not empty 
        if not pd.isnull(event[1]["Équipe 1"]):
            location = event[1]["Équipe 1"]
        else:
            location = ""   
        
        # generate uuid from name
        event_uuid = uuid.uuid5(uuid.NAMESPACE_DNS, name)
        
        e.begin = startime.strftime("%Y-%m-%d %H:%M:%S")
        e.end = endtime.strftime("%Y-%m-%d %H:%M:%S")
        e.add('summary', name)
        e.add('dtstart', startime)
        e.add('dtend', endtime)
        e.add('dtstamp', datetime.now())
        e.add('location', location)
        e.add('priority', 5)
        e.add('sequence', 1)
        e.add('description', name)
        e.add('url', club_url)
        e.add('uid', event_uuid)

        cal.add_component(e)
        
    # save to file
    with open(filename, 'wb') as f:
        f.write(cal.to_ical())
        f.close()

In [None]:
base_raw_path = "https://raw.githubusercontent.com/tintamarre/sport-events-to-calendar/main/"

md = "# 🏀 Les clubs du CPLiège\n\n"

md += "Dernière mise à jour: " + datetime.now().strftime("%d/%m/%Y %H:%M:%S") + "\n\n"

for club_name, club_url in clubs_dict.items():
    md += "## [" + club_name + "](" + club_url + ")\n\n"

    agenda = get_club_agenda(club_url)
    
    storage_path = "data/" + slugify.slugify(club_name)
    
    if not os.path.exists(storage_path):
        os.mkdir(storage_path)
    
    agenda.to_csv(storage_path + "/" + slugify.slugify(club_name) + ".csv", index=False)
    
    md += "* [Agenda](" + base_raw_path + storage_path + "/" + slugify.slugify(club_name) + ".csv)\n"
    
    categories = agenda["Catégorie"].unique()
    
    for category in categories:
        agenda_category = agenda[agenda["Catégorie"] == category]              
        
        generate_ics(agenda_category, storage_path, club_name, club_url)
        
        md += "* [" + category + "](" + base_raw_path + storage_path + "/" + slugify.slugify(category) + ".ics)\n"
        
    md += "\n" 
    
with open("listing.md", "w") as f:
    f.write(md)
    f.close()