# Scraping headlines

In [1]:
import requests
from bs4 import BeautifulSoup

In [2]:
response = requests.get("https://www.lrt.lt/en/news-in-english")
doc = BeautifulSoup(response.text)

In [9]:
items = doc.select(".news")

articles = []
for item in items:
    url = item.select_one("a")['href']
    headline = item.select_one("h3").text
    img = item.select_one("img").get('data-src', None)
    article = {
        'url': url,
        'headline': headline,
        'image': img
    }
    articles.append(article)

len(articles)

42

In [10]:
import pandas as pd

df = pd.DataFrame(articles)
df.head()

Unnamed: 0,url,headline,image
0,/en/news-in-english/19/2050571/lithuanian-pm-v...,Lithuanian PM voices confidence in defence min...,/img/2023/02/18/1451044-637891-150x84.jpg
1,/en/news-in-english/19/2050524/lithuania-deems...,"Lithuania deems 1,164 Belarusian and Russian n...",/img/2022/03/01/1207094-733403-150x84.jpg
2,/en/news-in-english/19/2050519/vilnius-ex-mayo...,Vilnius ex-mayor Å imaÅ¡ius returns to private...,/img/2023/04/17/1491828-404400-150x84.jpg
3,/en/news-in-english/19/2050432/lithuania-s-sup...,Lithuania's support to Ukraine includes helico...,/img/2023/07/20/1555673-67448-150x84.jpg
4,/en/news-in-english/19/2050380/latvia-to-ask-t...,Latvia to ask thousands of Russian citizens to...,/img/2019/08/05/485338-154587-150x84.jpg


# What to do with the results?

## Save one file of current data

In [19]:
df.to_csv("current-headlines.csv", index=False)

## New file for every scrape

In [14]:
import os

os.makedirs('data', exist_ok=True)

In [20]:
from datetime import datetime

date_string = datetime.now().strftime("%Y-%m-%d-%H.%M.%S")
filepath = f"data/headlines-{date_string}.csv"

df.to_csv(filepath, index=False)

## Append to existing file

In [28]:
import os

existing_filename = "running-log.csv"

try:
    existing = pd.read_csv(existing_filename)
except:
    existing = pd.DataFrame([])
existing

In [29]:
complete = pd.concat([existing, df], ignore_index=True)
complete.head()

Unnamed: 0,url,headline,image
0,/en/news-in-english/19/2050571/lithuanian-pm-v...,Lithuanian PM voices confidence in defence min...,/img/2023/02/18/1451044-637891-150x84.jpg
1,/en/news-in-english/19/2050524/lithuania-deems...,"Lithuania deems 1,164 Belarusian and Russian n...",/img/2022/03/01/1207094-733403-150x84.jpg
2,/en/news-in-english/19/2050519/vilnius-ex-mayo...,Vilnius ex-mayor Å imaÅ¡ius returns to private...,/img/2023/04/17/1491828-404400-150x84.jpg
3,/en/news-in-english/19/2050432/lithuania-s-sup...,Lithuania's support to Ukraine includes helico...,/img/2023/07/20/1555673-67448-150x84.jpg
4,/en/news-in-english/19/2050380/latvia-to-ask-t...,Latvia to ask thousands of Russian citizens to...,/img/2019/08/05/485338-154587-150x84.jpg


In [30]:
complete.to_csv(existing_filename)