# TV Grid Scrapper
Python Version 3.11.4  
`pip install requests bs4`

In [86]:
from requests import get as get_url
from bs4 import BeautifulSoup
from dataclasses import dataclass
from json import dumps as json_dumps
from IPython.core.display import display_json, display_html

CAPTURE_URL = 'https://www.programme-tv.net/programme/programme-tnt.html'

In [87]:
TVGRID_PAGE = get_url(CAPTURE_URL)
soup = BeautifulSoup(TVGRID_PAGE.content, 'html.parser')
TVGRID_RAW_BLOCKS = soup.select('.mainBroadcastCard-infos')

In [88]:
@dataclass
class Channel:
    channel_number: str
    channel_name: str

channels = []

for channel in soup.select('.gridRow-cardsChannelName'):
    channel_number = channel.select_one('.sr-only').text.strip().replace('N°', '') # Numéro de la chaine
    channel_name = channel.text.strip().replace(f'N°{channel_number}', '') # Description de la chaîne
    channels.append(Channel(channel_number=channel_number, channel_name=channel_name))

## Render data as JSON

In [89]:
@dataclass
class Program:
    startBroadcasting: str
    title: str
    subtitle: str
    duration_hours: str
    duration_minutes: str
    is_broadcast_new: bool
    channel: Channel

programs = []

channel_counter = 0
for programme in TVGRID_RAW_BLOCKS:
    startBroadcasting = programme.select_one('.mainBroadcastCard-startingHour')
    if startBroadcasting:
        startBroadcasting = startBroadcasting.text.strip()

    title = programme.select_one('.mainBroadcastCard-title')
    if title:
        title = title.text.strip()

    subtitle = programme.select_one('.mainBroadcastCard-subtitle')
    if subtitle:
        subtitle = subtitle.text.strip()

    duration = programme.select_one('.mainBroadcastCard-durationContent')
    if duration:
        duration = duration.text.strip()
        duration = duration.replace('min', '')
        if (duration.count('h')):
            duration_hours = duration.split('h')[0]
            if duration.split('h')[1]:
                duration_minutes = duration.split('h')[1]
            else:
                duration_minutes = '0'
        else:
            duration_hours = '0'
            duration_minutes = duration
    
    broadcast_new = programme.select_one('.mainBroadcastCard-new')
    if broadcast_new:
        broadcast_new = broadcast_new.text.strip()
        if broadcast_new == 'Inédit':
            is_broadcast_new = True
        else:
            is_broadcast_new = False
    else:
        is_broadcast_new = False

    programs.append(Program(startBroadcasting=startBroadcasting,
                            title=title,
                            subtitle=subtitle,
                            duration_hours=duration_hours,
                            duration_minutes=duration_minutes,
                            is_broadcast_new=is_broadcast_new,
                            channel=channels[int(channel_counter)]))
    channel_counter += 0.5

dump = json_dumps(programs, default=lambda o: o.__dict__, ensure_ascii=False)
display_json(dump, raw=True)

## Render data as HTML

In [91]:
raw_html = '''<h1>Les programmes de ce soir</h1>
<style>
.card_broadcast {
background-color: lightgrey;
}
.card_program_odd {
background-color: lightgreen;
}
.card_program_even {
background-color: lightblue;
}
.broadcast_start {
font-weight: bold;
}
.duration {
font-weight: bold;
}
.first_broadcast {
display: flex;
font-weight: bold;
font-size: large;
text-transform: uppercase;
justify-content: flex-end;
}
</style>
'''
odd_line = True
for program in programs:
    if odd_line:
        raw_html = raw_html + f'<div class="card_broadcast"><h2>{program.channel.channel_name} (TNT {program.channel.channel_number})</h2>'
    
    if odd_line:
        raw_html = raw_html + f'<div class="card_program_odd">'
    else:
        raw_html = raw_html + f'<div class="card_program_even">'

    raw_html = raw_html + f'<h3>{program.title}</h3>'

    if program.subtitle:
        raw_html = raw_html + f'<h4>{program.subtitle}</h4>'
    
    raw_html = raw_html + f'<p>Départ <span class="broadcast_start">{program.startBroadcasting}</span>, durée <span class="duration">{program.duration_hours}h{program.duration_minutes}</span></p>'
    
    if program.is_broadcast_new:
        raw_html = raw_html + '<p class="first_broadcast">inédit</p>'
    raw_html = raw_html + f'</div>'

    if not odd_line:
        raw_html = raw_html + f'</div>'
    odd_line = not odd_line

display_html(raw_html, raw=True)