In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
def extract_meteo_surf(url) :
    
    """
    This function takes a  url an user_agent, scrapes surf forecast data from a given url page and stores 
    data points in a csv_file
    
    - output : pandas dataframe contaning the data extracted
    """
    
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')
    
    # Initialize empty lists to hold the data needed :
    dates = []
    hours = []
    vagues = []
    wind_speeds = []
    wind_directions = []

    # Looping through each forecast-tab block (each block represents one day)
    forecast_tabs = soup.find_all('div', class_='forecast-tab')
    for tab in forecast_tabs:
        # Extracting the date for this tab
        date_div = tab.find('div', class_='title')
        forecast_date = date_div.find('b').text.strip() if date_div else 'N/A'

        # Extracting the hours, waves, wind speeds, and wind directions for this date
        time_divs = tab.find_all('div', class_='cell date with-border')
        for time_div in time_divs:
            hour = time_div.text.strip()
            hours.append(hour)
            dates.append(forecast_date)  # Repeat the date for each hour

        # Extracting waves (vagues)
        for wave_div in tab.find_all('div', class_='cell large waves with-border'):
            vague = wave_div.text.strip()
            vagues.append(vague)

        # Extracting wind speeds
        for wind_div in tab.select('div[class^="wind wind-color-"]'):
            wind_speed = wind_div.find('span').text.strip()
            wind_speeds.append(wind_speed)

        # Extracting wind directions
        for wind_dir_div in tab.find_all('div', class_='wind img'):
            wind_direction = wind_dir_div.find('img')['alt'] if wind_dir_div else 'N/A'
            wind_directions.append(wind_direction)

    # Creating a DataFrame with the extracted data
    wave_data = pd.DataFrame({
        'Date': dates,
        'Hour': hours,
        'Vagues': vagues,
        'Wind Speed (km/h)': wind_speeds,
        'Wind Direction': wind_directions
    })

    wave_data.to_csv('output.csv', index=False)
    return wave_data

In [3]:
wave_data = extract_meteo_surf('https://www.surf-report.com/meteo-surf/lacanau-s1043.html')

In [4]:
wave_data

Unnamed: 0,Date,Hour,Vagues,Wind Speed (km/h),Wind Direction
0,Dimanche 17 Novembre,01:00,0.3 - 0.5,22,Orientation vent Nord Nord Ouest
1,Dimanche 17 Novembre,04:00,0.3 - 0.5,22,Orientation vent Nord
2,Dimanche 17 Novembre,07:00,0.3 - 0.4,15,Orientation vent Nord Est
3,Dimanche 17 Novembre,10:00,0.4 - 0.5,15,Orientation vent Nord Est
4,Dimanche 17 Novembre,13:00,0.4 - 0.6,1,Orientation vent Nord Nord Ouest
5,Dimanche 17 Novembre,16:00,0.5 - 0.7,10,Orientation vent Ouest
6,Dimanche 17 Novembre,19:00,0.5 - 0.8,11,Orientation vent Sud Ouest
7,Dimanche 17 Novembre,22:00,0.6 - 0.9,14,Orientation vent Sud Sud Est
8,Lundi 18 Novembre,01:00,0.6 - 1.0,21,Orientation vent Ouest
9,Lundi 18 Novembre,04:00,0.7 - 1.0,12,Orientation vent Est Nord Est


In [None]:
wave_data.to_csv('./wave_data.csv', index=False)