In [44]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

In [45]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def get_senators(soup, num):
    senate_section = soup.find("h3", id="Senate_3")
    senators = []
    if senate_section:
        senate_table = senate_section.find_next("table", class_="col-begin")
        for state_div in senate_table.find_all("div", class_="mw-heading4"):
            state_name = state_div.h4.get_text(strip=True)
            for senator_entry in state_div.find_next("dl").find_all("dd"):
                senator_name = senator_entry.find("a").get_text(strip=True)
                party_affiliation = senator_entry.get_text().split()[-1]
                senators.append({
                    "State": state_name,
                    "Name": senator_name,
                    "Party": party_affiliation,
                    "Congress_Number": num
                })
    else:
        print(f"No Senate section found for Congress {num}")
    return senators

def get_reps(soup, num):
    house_section = soup.find("h3", id="House_of_Representatives_3")
    representatives = []
    if house_section:
        house_table = house_section.find_next("table", class_="col-begin")
        for state_div in house_table.find_all("div", class_="mw-heading4"):
            state_name = state_div.h4.get_text(strip=True)
            for rep_entry in state_div.find_next("dl").find_all("dd"):
                district_link = rep_entry.find("a")
                district_number = district_link.get_text(strip=True) if district_link else "At-large"
                links = rep_entry.find_all("a")
                if len(links) >= 2:
                    rep_name = links[1].get_text(strip=True)
                    party_affiliation = rep_entry.get_text().split()[-1]
                    representatives.append({
                        "State": state_name,
                        "District": district_number,
                        "Name": rep_name,
                        "Party": party_affiliation,
                        "Congress_Number": num
                    })
    else:
        print(f"No House section found for Congress {num}")
    return representatives

def get_senate_committee(soup, num):
    senate_committees = []
    senate_section = soup.find("h3", id="Senate_5")
    if senate_section:
        committee_list = senate_section.find_next("ul")
        if committee_list:
            for committee_item in committee_list.find_all("li", recursive=False):
                committee_link = committee_item.find("a")
                if committee_link:
                    committee_name = committee_link.get_text(strip=True)
                    committee_url = f"https://en.wikipedia.org{committee_link['href']}"
                    senate_committees.append({
                        "Committee": committee_name,
                        "Link": committee_url,
                        "Congress_Number": num
                    })
    else:
        print(f"No Senate committee section found for Congress {num}")
    return senate_committees

def get_house_committee(soup, num):
    house_committees = []
    house_section = soup.find("h3", id="House_of_Representatives_5")
    if house_section:
        committee_list = house_section.find_next("ul")
        if committee_list:
            for committee_item in committee_list.find_all("li", recursive=False):
                committee_link = committee_item.find("a")
                if committee_link:
                    committee_name = committee_link.get_text(strip=True)
                    committee_url = f"https://en.wikipedia.org{committee_link['href']}"
                    house_committees.append({
                        "Committee": committee_name,
                        "Link": committee_url,
                        "Congress_Number": num
                    })
    else:
        print(f"No House committee section found for Congress {num}")
    return house_committees

In [46]:
senators = []
representatives = []
senate_committees = []
house_committees = []

congress_list = ['113', '114', '115', '116', '117', '118']

for n in congress_list:
    try:
        url = f"https://en.m.wikipedia.org/wiki/{n}th_United_States_Congress"
        print(f"Processing Congress {n}...")
        response = requests.get(url)
        soup = BeautifulSoup(response.content, "html.parser")
        
        senators_data = get_senators(soup, n)
        representatives_data = get_reps(soup, n)
        senate_committees_data = get_senate_committee(soup, n)
        house_committees_data = get_house_committee(soup, n)

        if senators_data:
            senators.extend(senators_data)
        else:
            print(f"No senators data for Congress {n}")
        
        if representatives_data:
            representatives.extend(representatives_data)
        else:
            print(f"No representatives data for Congress {n}")
        
        if senate_committees_data:
            senate_committees.extend(senate_committees_data)
        else:
            print(f"No senate committees data for Congress {n}")
        
        if house_committees_data:
            house_committees.extend(house_committees_data)
        else:
            print(f"No house committees data for Congress {n}")
    
    except Exception as e:
        print(f"Error on Congress {n}: {e}")

senators_df = pd.DataFrame(senators)
representatives_df = pd.DataFrame(representatives)
senate_committees_df = pd.DataFrame(senate_committees)
house_committees_df = pd.DataFrame(house_committees)

senators_df.to_csv('senators_by_congress_number.csv', index=False)  
representatives_df.to_csv('representatives_by_congress_number.csv', index=False)  
senate_committees_df.to_csv('senate_commitee_links.csv', index=False)  
house_committees_df.to_csv('house_commitee_links.csv', index=False)

Processing Congress 113...
Processing Congress 114...
Processing Congress 115...
Error on Congress 115: 'NoneType' object has no attribute 'get_text'
Processing Congress 116...
Processing Congress 117...
No Senate section found for Congress 117
No House section found for Congress 117
No Senate committee section found for Congress 117
No House committee section found for Congress 117
No senators data for Congress 117
No representatives data for Congress 117
No senate committees data for Congress 117
No house committees data for Congress 117
Processing Congress 118...
No Senate committee section found for Congress 118
No House committee section found for Congress 118
No senate committees data for Congress 118
No house committees data for Congress 118


In [47]:
senate_committees_df

Unnamed: 0,Committee,Link,Congress_Number
0,"Agriculture, Nutrition and Forestry",https://en.wikipedia.org/wiki/United_States_Se...,113
1,Aging (Special),https://en.wikipedia.org/wiki/United_States_Se...,113
2,Appropriations,https://en.wikipedia.org/wiki/United_States_Se...,113
3,Armed Services,https://en.wikipedia.org/wiki/United_States_Se...,113
4,"Banking, Housing, and Urban Affairs",https://en.wikipedia.org/wiki/United_States_Se...,113
5,Budget,https://en.wikipedia.org/wiki/United_States_Se...,113
6,"Commerce, Science and Transportation",https://en.wikipedia.org/wiki/United_States_Se...,113
7,Energy and Natural Resources,https://en.wikipedia.org/wiki/United_States_Se...,113
8,Chaplain,https://en.wikipedia.org/wiki/Chaplain_of_the_...,114
9,Curator,https://en.wikipedia.org/wiki/Curator_of_the_U...,114
