In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

In [2]:
def extract_headers(url):
    try:
        # Fetch the webpage
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raise an error for bad status codes
        
        # Parse the HTML content
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Find all h3 and h4 elements
        h3_headers = soup.find_all('h3')
        h4_headers = soup.find_all('h4')
        
        # Extract and clean the text
        h3_texts = [h3.get_text(strip=True) for h3 in h3_headers]
        h4_texts = [h4.get_text(strip=True) for h4 in h4_headers]
        
        return {
            'h3_headers': h3_texts,
            'h4_headers': h4_texts
        }
        
    except requests.exceptions.RequestException as e:
        print(f"Error fetching the URL: {e}")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

In [4]:
year = 1940
data = []
while year < 2030:
    url = f"https://en.wikipedia.org/wiki/{year}s_in_music"
    headers = extract_headers(url)
    if headers:
        print("\nH3 Headers:")
        for i, header in enumerate(headers['h3_headers'], 1):
            print(f"{i}. {header}")
            data.append([f"{year}s", header])
            
        print("\nH4 Headers:")
        for i, header in enumerate(headers['h4_headers'], 1):
            print(f"{i}. {header}")
            data.append([f"{year}s", header])
    else:
        print("Failed to extract headers.")

    year += 10

print(data)
df = pd.DataFrame(data, columns=['decade','headers'])


H3 Headers:
1. Pop
2. Best Selling Records of Decade 1940s
3. Jazz
4. Country music
5. Other Trends

H4 Headers:

H3 Headers:
1. Rock and roll
2. Classic pop
3. R&B
4. Blues
5. Country music
6. Jazz
7. Other trends
8. Folk music
9. France

H4 Headers:

H3 Headers:
1. Beat music and the British Invasion
2. British blues boom
3. British psychedelia
4. Folk music
5. Rock
6. Psychedelic rock
7. Surf rock
8. Garage rock
9. Blues-rock
10. Roots rock
11. Progressive rock
12. Pop
13. R&B, Motown and soul music
14. Country music
15. Other trends and musical events
16. Bossa Nova
17. Romantics
18. Nueva ola
19. Nueva canción
20. Salsa
21. Tango
22. Música cebolla
23. Sources

H4 Headers:

H3 Headers:
1. Rock
2. Pop
3. Disco, R&B and urban
4. Soft rock and pop
5. Punk rock
6. Hard rock, arena rock and heavy metal
7. Progressive rock
8. New wave
9. Blues rock
10. Country
11. Other developments
12. Japan
13. Hong Kong
14. Southeast Asia
15. Nueva canción
16. Rock
17. Tropical
18. Reggae and Afrobe

In [5]:
df

Unnamed: 0,decade,headers
0,1940s,Pop
1,1940s,Best Selling Records of Decade 1940s
2,1940s,Jazz
3,1940s,Country music
4,1940s,Other Trends
...,...,...
261,2020s,Shoegaze
262,2020s,Hyperpop
263,2020s,Notable electronic & dance acts
264,2020s,Jersey club


In [6]:
df.to_csv('decades-in-music-headers')