# TV Grid Scrapper
Python Version 3.11.4  
`pip install requests bs4`

In [126]:
from requests import get as get_url
from bs4 import BeautifulSoup
from dataclasses import dataclass
from json import dumps as json_dumps

CAPTURE_URL = 'https://www.programme-tv.net/programme/programme-tnt.html'

In [127]:
TVGRID_PAGE = get_url(CAPTURE_URL)
soup = BeautifulSoup(TVGRID_PAGE.content, 'html.parser')
TVGRID_RAW_BLOCKS = soup.select('.mainBroadcastCard-infos')

In [128]:
@dataclass
class Channel:
    channel_number: str
    channel_name: str

channels = []

for channel in soup.select('.gridRow-cardsChannelName'):
    channel_number = channel.select_one('.sr-only').text.strip().replace('N°', '') # Numéro de la chaine
    channel_name = channel.text.strip().replace(f'N°{channel_number}', '') # Description de la chaîne
    channels.append(Channel(channel_number=channel_number, channel_name=channel_name))

In [129]:
@dataclass
class Program:
    startBroadcasting: str
    title: str
    subtitle: str
    duration_hours: str
    duration_minutes: str
    is_broadcast_new: bool
    channel: Channel

programs = []

channel_counter = 0
for programme in TVGRID_RAW_BLOCKS:
    startBroadcasting = programme.select_one('.mainBroadcastCard-startingHour')
    if startBroadcasting:
        startBroadcasting = startBroadcasting.text.strip()

    title = programme.select_one('.mainBroadcastCard-title')
    if title:
        title = title.text.strip()

    subtitle = programme.select_one('.mainBroadcastCard-subtitle')
    if subtitle:
        subtitle = subtitle.text.strip()

    duration = programme.select_one('.mainBroadcastCard-durationContent')
    if duration:
        duration = duration.text.strip()
        duration = duration.replace('min', '')
        if (duration.count('h')):
            duration_hours = duration.split('h')[0]
            duration_minutes = duration.split('h')[1]
        else:
            duration_hours = '0'
            duration_minutes = duration
    
    broadcast_new = programme.select_one('.mainBroadcastCard-new')
    if broadcast_new:
        broadcast_new = broadcast_new.text.strip()
        if broadcast_new == 'Inédit':
            is_broadcast_new = True
        else:
            is_broadcast_new = False
    else:
        is_broadcast_new = False

    programs.append(Program(startBroadcasting=startBroadcasting,
                            title=title,
                            subtitle=subtitle,
                            duration_hours=duration_hours,
                            duration_minutes=duration_minutes,
                            is_broadcast_new=is_broadcast_new,
                            channel=channels[int(channel_counter)]))
    channel_counter += 0.5

dump = json_dumps(programs, default=lambda o: o.__dict__, ensure_ascii=False)
print(dump)

[{"startBroadcasting": "21h10", "title": "Ninja Warrior, le parcours des héros", "subtitle": "Face aux légendes 2", "duration_hours": "2", "duration_minutes": "25", "is_broadcast_new": true, "channel": {"channel_number": "1", "channel_name": "TF1"}}, {"startBroadcasting": "23h35", "title": "Vendredi, tout est permis avec Arthur", "subtitle": null, "duration_hours": "2", "duration_minutes": "", "is_broadcast_new": true, "channel": {"channel_number": "1", "channel_name": "TF1"}}, {"startBroadcasting": "21h05", "title": "Concert de Paris 2023", "subtitle": null, "duration_hours": "2", "duration_minutes": "30", "is_broadcast_new": false, "channel": {"channel_number": "2", "channel_name": "France 2"}}, {"startBroadcasting": "23h35", "title": "Drag Race France", "subtitle": "Moteur, dragtion !", "duration_hours": "1", "duration_minutes": "15", "is_broadcast_new": true, "channel": {"channel_number": "2", "channel_name": "France 2"}}, {"startBroadcasting": "21h10", "title": "Alexandra Ehle", "