# Eurovision Song Lyrics Collection 

Fan website https://eurovisionworld.com serves as a data source.

#### 1. Access list of participating countries/songs per year. 

In [111]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Headers to mimic a browser request
my_headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) '
                  'AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15'
}

#current year
url = "https://eurovisionworld.com/eurovision/songs-videos"

#other years, for example:
#url = "https://eurovisionworld.com/eurovision/2023/songs-videos"

session = requests.Session()
response = session.get(url, headers=my_headers)
soup = BeautifulSoup(response.text, 'html.parser')

In [112]:
#print(soup)

In [113]:
# Initialize list to collect data
data = []

# Loop through each country block
for country_div in soup.find_all('div', class_='inc_tr'):
    try:
        # Extract country
        country = country_div.find('div', class_='inc_td1').find('a').text.strip()
        
        # Extract song title
        song = country_div.find('div', class_='inc_td2').find('a').text.strip()

        # Extract artist name
        artist = country_div.find('div', class_='v_artist').text.strip()

        data.append([country, song, artist])
    except AttributeError:
        # In case of missing elements
        continue

# Create DataFrame
df = pd.DataFrame(data, columns=['Country', 'Song', 'Artist'])
df

Unnamed: 0,Country,Song,Artist
0,Albania,Zjerm,Shkodra Elektronike
1,Armenia,Survivor,Parg
2,Australia,Milkshake Man,Go-Jo
3,Austria,Wasted Love,JJ
4,Azerbaijan,Run With U,Mamagama
5,Belgium,Strobe Lights,Red Sebastian
6,Croatia,Poison Cake,Marko Bošnjak
7,Cyprus,Shh,Theo Evan
8,Czechia,Kiss Kiss Goodbye,Adonxs
9,Denmark,Hallucination,Sissal


In [114]:
# save to csv file
#df.to_csv("2025songs.csv")

#### 2. Collect song lyrics: original and English translations 

In [115]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Headers to mimic a browser request
my_headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) '
                  'AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15'
}

# List of URLs to scrape
urls = [
    "https://eurovisionworld.com/eurovision/2025/finland",
    "https://eurovisionworld.com/eurovision/2025/switzerland",
    "https://eurovisionworld.com/eurovision/2025/slovenia"
        ]

# Create an empty list to store results
data = []

# Start a session to keep cookies, headers, etc.
session = requests.Session()

# Loop through each URL
for url in urls:
    response = session.get(url, headers=my_headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')

        # Get the title
        title_tag = soup.find("h1", class_="mm lyrics_header")
        title = title_tag.text.strip() if title_tag else "No Title Found"

        # === Original Lyrics ===
        all_lyrics = []
        original_blocks = soup.find_all("div", class_="lyrics_div lyr_div lyr_mobile lyr_desktop")

        for block in original_blocks:
            lyrics_parts = block.find_all("p")
            lyrics = "\n".join([p.get_text(separator=' ') for p in lyrics_parts])
            all_lyrics.append(lyrics)

        final_lyrics = "\n\n---\n\n".join(all_lyrics)

        # === English Lyrics (from data-lyrics-version="English") ===
        eng_lyrics_blocks = soup.find_all("div", class_="lyrics_div lyr_div lyr_desktop")
        eng_lyrics = []

        for block in eng_lyrics_blocks:
            if block.get("data-lyrics-version") != "English":
                continue

            section_title_tag = block.find("h3")
            section_title = section_title_tag.get_text(strip=True) if section_title_tag else ""

            lyrics_parts = block.find_all("p")
            lyrics_text = "\n".join(p.get_text(separator=' ') for p in lyrics_parts)

            # Combine section title if it exists
            if section_title:
                full_section = f"### {section_title}\n{lyrics_text}"
            else:
                full_section = lyrics_text

            eng_lyrics.append(full_section)

        final_eng_lyrics = "\n\n---\n\n".join(eng_lyrics)

        # === Append to data list ===
        data.append({
            "Title": title,
            "Lyrics": final_lyrics,
            "Lyrics Eng": final_eng_lyrics,
            "URL": url
        })
    else:
        print(f"Failed to retrieve the webpage {url}. Status Code:", response.status_code)

# Create the DataFrame
df = pd.DataFrame(data)


In [116]:
df

Unnamed: 0,Title,Lyrics,Lyrics Eng,URL
0,"Eurovision 2025 Finland:Erika Vikman - ""Ich ko...","(Ich komme) On yö, sydän lyö Hän loveen lankea...","### I'm Coming\n(I'm coming) Night falls, hear...",https://eurovisionworld.com/eurovision/2025/fi...
1,"Eurovision 2025 Switzerland:Zoë Më - ""Voyage""",Mes yeux candides découvrent le monde D'une fa...,### Journey\nMy candid eyes discover the world...,https://eurovisionworld.com/eurovision/2025/sw...
2,"Eurovision 2025 Slovenia:Klemen - ""How Much Ti...",Our baby boy Was crawling in the living room Y...,,https://eurovisionworld.com/eurovision/2025/sl...


In [110]:
#save do csv file
#df.to_csv("25_lyrics3.csv")

In [None]:
#Lyrics URLs <--- change year and country 

urls = ["https://eurovisionworld.com/eurovision/2024/france",
        "https://eurovisionworld.com/eurovision/2024/germany",
        "https://eurovisionworld.com/eurovision/2024/italy",
        "https://eurovisionworld.com/eurovision/2024/spain",
        "https://eurovisionworld.com/eurovision/2024/sweden",
        "https://eurovisionworld.com/eurovision/2024/united-kingdom",
        "https://eurovisionworld.com/eurovision/2024/cyprus",
        "https://eurovisionworld.com/eurovision/2024/serbia",
        "https://eurovisionworld.com/eurovision/2024/lithuania",
        "https://eurovisionworld.com/eurovision/2024/ireland",
        "https://eurovisionworld.com/eurovision/2024/ukraine",
        "https://eurovisionworld.com/eurovision/2024/poland",
        "https://eurovisionworld.com/eurovision/2024/croatia",
        "https://eurovisionworld.com/eurovision/2024/iceland",
        "https://eurovisionworld.com/eurovision/2024/slovenia",
        "https://eurovisionworld.com/eurovision/2024/finland",
        "https://eurovisionworld.com/eurovision/2024/moldova",
        "https://eurovisionworld.com/eurovision/2024/azerbaijan",
        "https://eurovisionworld.com/eurovision/2024/australia",
        "https://eurovisionworld.com/eurovision/2024/portugal",
        "https://eurovisionworld.com/eurovision/2024/luxembourg",
        "https://eurovisionworld.com/eurovision/2024/malta",
        "https://eurovisionworld.com/eurovision/2024/albania",
        "https://eurovisionworld.com/eurovision/2024/greece",
        "https://eurovisionworld.com/eurovision/2024/switzerland",
        "https://eurovisionworld.com/eurovision/2024/czechia",
        "https://eurovisionworld.com/eurovision/2024/austria",
        "https://eurovisionworld.com/eurovision/2024/denmark",
        "https://eurovisionworld.com/eurovision/2024/armenia",
        "https://eurovisionworld.com/eurovision/2024/latvia",
        "https://eurovisionworld.com/eurovision/2024/san-marino",
        "https://eurovisionworld.com/eurovision/2024/georgia",
        "https://eurovisionworld.com/eurovision/2024/belgium",
        "https://eurovisionworld.com/eurovision/2024/estonia",
        "https://eurovisionworld.com/eurovision/2024/israel",
        "https://eurovisionworld.com/eurovision/2024/norway",
        "https://eurovisionworld.com/eurovision/2024/netherlands"]

#### Access original lyrics only.

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Headers to mimic a browser request
my_headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) '
                  'AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15'
}

# List of URLs to scrape
urls = [
        "https://eurovisionworld.com/eurovision/2025/albania",
        "https://eurovisionworld.com/eurovision/2025/cyprus"
        ]

# Create an empty list to store results
data = []

# Start a session to keep cookies, headers, etc.
session = requests.Session()

# Loop through each URL in the list
for url in urls:
    response = session.get(url, headers=my_headers)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Find all divs with the specified class and the title
        headings = soup.find_all("div", class_="lyrics_div lyr_div lyr_mobile lyr_desktop")
        title = soup.find("h1", class_="mm lyrics_header").text.strip() if soup.find("h1", class_="mm lyrics_header") else "No Title Found"
        
        all_lyrics = []
        
        # Extract lyrics from each div
        for heading in headings:
            lyrics_parts = heading.find_all("p")
            lyrics = "\n".join([p.get_text(separator=' ') for p in lyrics_parts])
            all_lyrics.append(lyrics)
        
        # Combine all lyrics sections into one string
        final_lyrics = "\n\n---\n\n".join(all_lyrics)
        
        # Append the title, lyrics, and URL to the data list
        data.append({"Title": title, "Lyrics": final_lyrics, "URL": url})
    else:
        print(f"Failed to retrieve the webpage {url}. Status Code:", response.status_code)

# Create a DataFrame from the collected data
df = pd.DataFrame(data)
