In [9]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import date, timedelta

def get_article_content(article_url):
    response = requests.get(article_url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        article_content = soup.find('div', class_='read__content').find_all('p')
        content = ''.join([p.get_text() for p in article_content])
        return content
    return ""

def scrape_news_data(start_date, end_date):
    base_url = "https://health.kompas.com/search/{}-{}-{}"
    data = []

    current_date = start_date
    while current_date <= end_date:
        url = base_url.format(current_date.year, current_date.month, current_date.day)
        response = requests.get(url)

        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')
            news_elements = soup.find_all('h3', class_='article__title article__title--medium')

            for element in news_elements:
                title = element.a.text
                article_url = element.a['href']

                article_response = requests.get(article_url)
                article_soup = BeautifulSoup(article_response.text, 'html.parser')

                date_time_element = article_soup.find('div', class_='read__time')
                date_time = date_time_element.text.strip() if date_time_element else ""
                date_time = date_time.replace('Kompas.com -', '')

                article_link_element = article_soup.find('a', class_='article__link')
                article_link = article_link_element['href'] if article_link_element else ""

                class_element = article_soup.find('div', class_='article__subtitle article__subtitle--inline')
                class_info = class_element.text.strip() if class_element else ""

                content = get_article_content(article_url)

                data.append({
                    'Date': date_time,
                    'Title': title,
                    'URL': article_link,
                    'Content': content,
                    'Class': class_info
                })

            current_date += timedelta(days=1)
        else:
            print(f"Failed to fetch data for {current_date.strftime('%d-%m-%Y')}")

    return data

start_date = date(2024, 2, 23)
end_date = date(2024, 2, 24)

news_data_health = scrape_news_data(start_date, end_date)
df_health = pd.DataFrame(news_data_health)
df_health

Unnamed: 0,Date,Title,URL,Content,Class
0,"23/02/2024, 22:00 WIB",Manfaat Jahe Sebagai Pengobatan Diabetes ala R...,https://health.kompas.com/read/24C01210000268/...,KOMPAS.com - Jahe dikenal sebagai salah satu r...,Health
1,"23/02/2024, 21:00 WIB",Apa yang Menyebabkan Kecanduan Seks? Berikut 4...,https://health.kompas.com/read/24B21220000568/...,KOMPAS.com - Kecanduan seks terjadi ketika ses...,Health
2,"23/02/2024, 20:00 WIB",Apa Akibat Sering Begadang bagi Wanita? Beriku...,https://health.kompas.com/read/24B19210000068/...,KOMPAS.com - Kebiasaan begadang dapat memberik...,Health
3,"23/02/2024, 19:00 WIB",Berapa Jam Jalan Kaki untuk Menurunkan Berat B...,https://health.kompas.com/read/24B12103000168/...,KOMPAS.com - Berjalan kaki secara teratur tern...,Health
4,"23/02/2024, 18:00 WIB",Apakah Pepaya Mengandung Kadar Gula Tinggi? Be...,https://health.kompas.com/read/24B17180000868/...,KOMPAS.com - Pepaya memiliki banyak kandungan ...,Health
5,"23/02/2024, 16:30 WIB",Apakah Fibroid Rahim Berbahaya? Ini Faktanya...,https://health.kompas.com/read/24B21120000168/...,KOMPAS.com - Mungkin muncul pertanyaan pada An...,Health
6,"23/02/2024, 15:00 WIB",Cara Mengobati Pradiabetes agar Tidak Jadi Dia...,https://health.kompas.com/read/24B26150000168/...,KOMPAS.com - Pradiabetes menjadi tanda peringa...,Health
7,"23/02/2024, 13:30 WIB",Mengukur Gula Darah Pakai Smartwatch Tidak Akurat,https://health.kompas.com/read/24B19060000468/...,KOMPAS.com - Berhati-hatilah dalam mengukur ka...,Health
8,"23/02/2024, 12:00 WIB",5 Ciri-ciri Penyakit Gondongan Akan Sembuh,https://health.kompas.com/read/24B07220000668/...,KOMPAS.com - Gondongan adalah peradangan kelen...,Health
9,"23/02/2024, 10:30 WIB",40 Puskesmas di Jakarta Beri Layanan Pengantar...,https://health.kompas.com/read/24B27133000068/...,KOMPAS.com - Layanan pengantaran obat ARV untu...,Health
