In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup, NavigableString, Tag
import json
import time
from typing import Dict, Optional

In [2]:
def fetch_legislators():
    """
    Fetches legislators data from NASS API and converts to pandas DataFrame.
    
    Returns:
        pd.DataFrame: DataFrame containing legislators information
    """
    url = "https://nass.gov.ng/mps/get_legislators/?chamber=1"
    
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    try:
        # Fetch data from API
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raise exception for bad status codes
        
        # Get JSON data
        data = response.json()
        
        # Extract the 'data' list from the JSON and create DataFrame
        df = pd.DataFrame(data['data'], columns=['name', 'state', 'district', 'party', 'id'])
        
        # Convert column names to lowercase
        df.columns = df.columns.str.lower()
        
        return df
        
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {str(e)}")
        return None
    except ValueError as e:
        print(f"Error parsing JSON: {str(e)}")
        return None
    except Exception as e:
        print(f"Unexpected error: {str(e)}")
        return None

In [3]:
senators_df = fetch_legislators()
senators_df.head()

Unnamed: 0,name,state,district,party,id
0,Binos Dauda Yaroe,Adamawa,Adamawa South,PDP,87
1,Ezenwa Francis Onyewuchi,Imo,Imo East,LP,94
2,Orji Uzor Kalu,Abia,Abia-North,APC,97
3,Jibrin Isah,Kogi,Kogi East,APC,101
4,Michael Opeyemi Bamidele,Ekiti,Ekiti Central,APC,158


In [10]:
import requests
import time
import pandas as pd
from bs4 import BeautifulSoup
from typing import Dict, Optional

def extract_senator_info(html_content: str) -> Dict:
    """
    Extracts senator contact information from the given HTML content.

    Args:
        html_content: HTML response text from the senator's profile page.

    Returns:
        A dictionary containing the senator's details.
    """
    soup = BeautifulSoup(html_content, 'html.parser')

    details = {
        "name": None,
        "phone": None,
        "email": None,
        "parliament_address": None,
        "parliament_number": None,
        "address": None,
        "social_media": {}
    }

    # Extract senator name
    name_tag = soup.find('h3')
    if name_tag:
        details["name"] = name_tag.text.strip()
        print(f"Extracted Name: {details['name']}")

    # Find all <a> tags that contain the contact details
    contact_section = soup.find_all('a')
    if not contact_section:
        print("⚠️ No contact info section found.")
    else:
        for tag in contact_section:
            text = tag.get_text(strip=True)
            print(f"🔍 Found: {text}")  # Debugging print
            
            if "Phone Number:" in text:
                details["phone"] = text.replace("Phone Number:", "").strip()
                print(f"📞 Extracted Phone: {details['phone']}")
            elif "Email:" in text:
                details["email"] = text.replace("Email:", "").strip()
                print(f"📧 Extracted Email: {details['email']}")
            elif "Parliament Address:" in text:
                details["parliament_address"] = text.replace("Parliament Address:", "").strip()
                print(f"🏛️ Extracted Parliament Address: {details['parliament_address']}")
            elif "Parliament Number:" in text:
                details["parliament_number"] = text.replace("Parliament Number:", "").strip()
                print(f"🆔 Extracted Parliament Number: {details['parliament_number']}")
            elif "Address:" in text:
                details["address"] = text.replace("Address:", "").strip()
                print(f"🏠 Extracted Address: {details['address']}")

    # Extract social media links
    for link in soup.find_all('a', class_="social-icon"):
        platform = link.get('title', 'Unknown')
        url = link.get('href', '#').strip()
        if url:
            details['social_media'][platform] = url
            print(f"🌐 Extracted Social Media ({platform}): {url}")

    return details


def get_senator_details(senator_id: str) -> Optional[Dict]:
    """
    Scrapes details for a single senator from their profile page.

    Args:
        senator_id: The ID of the senator to scrape.

    Returns:
        Dictionary containing senator's contact details or None if failed.
    """
    url = f"https://nass.gov.ng/mps/single/{senator_id}"
    
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }

        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raise an error for HTTP failures
        
        print(f"\n🔎 Scraping: {url}")  # Debug print
        return extract_senator_info(response.text)
    
    except Exception as e:
        print(f"❌ Error scraping senator {senator_id}: {str(e)}")
        return None


def scrape_all_senators(df: pd.DataFrame, delay: float = 1.5) -> pd.DataFrame:
    """
    Scrape details for all senators in the DataFrame.

    Args:
        df: DataFrame containing senator information (must have a column 'id').
        delay: Time to wait between requests in seconds (default: 1.5 sec).

    Returns:
        DataFrame with additional columns for contact details.
    """
    # Ensure new columns exist
    for col in ["name", "phone", "email", "parliament_address", "parliament_number", "address", "social_media"]:
        df[col] = None

    for idx, row in df.iterrows():
        senator_id = row['id']
        print(f"\n⚡ Scraping details for {row['name']} (ID: {senator_id})...")

        details = get_senator_details(senator_id)
        if details:
            for field in ["name", "phone", "email", "parliament_address", "parliament_number", "address", "social_media"]:
                df.at[idx, field] = details[field]
        
        time.sleep(delay)  # Wait before making the next request

    return df

In [11]:
senators_df = scrape_all_senators(senators_df)

senators_df.head()


⚡ Scraping details for None (ID: 87)...

🔎 Scraping: https://nass.gov.ng/mps/single/87
Extracted Name: Sen. BINOS YAROE
🔍 Found: 
🔍 Found: 
🔍 Found: Home
🔍 Found: About NASS
🔍 Found: About Senate
🔍 Found: About House
🔍 Found: About The Parliament
🔍 Found: Mission & Vision
🔍 Found: Management Team
🔍 Found: Legislators
🔍 Found: Distinguished Senators
🔍 Found: Honourable Representatives
🔍 Found: COMMITTEES
🔍 Found: Senate Committees
🔍 Found: House Committees
🔍 Found: News
🔍 Found: Resources
🔍 Found: Bills
🔍 Found: Hansard
🔍 Found: Order Papers
🔍 Found: Votes and Proceedings
🔍 Found: Magazine
🔍 Found: Contact Us
🔍 Found: 
🔍 Found: Home
🔍 Found: Legislators
🔍 Found: About
🔍 Found: Previous Offices
🔍 Found: Educational History
🔍 Found: Secretary, Zing Local Government, Gongola State, 1988-1989
🔍 Found: Date of Birth:1955-01-01
🔍 Found: Chamber:Senate
🔍 Found: Party:PDP
🔍 Found: Senator
🔍 Found: Phone Number:08034050460
📞 Extracted Phone: 08034050460
🔍 Found: Email:bdyaroe@gmail.com
📧 Extrac

Unnamed: 0,name,state,district,party,id,phone,email,parliament_address,parliament_number,address,social_media
0,Sen. BINOS YAROE,Adamawa,Adamawa South,PDP,87,8034050460,bdyaroe@gmail.com,,,"makwada street, opposite makwada square, numan...",{'Unknown': '#'}
1,Sen. EZENWA ONYEWUCHI,Imo,Imo East,LP,94,8032012132,ezeonyewuchi@gmail.com,,,plot c13 opposite chief emmanuel iwuanyanwu gl...,{'Unknown': '#'}
2,Sen. ORJI KALU,Abia,Abia-North,APC,97,8034000001,OKALU@ORJIKALU.COM,,,"onu ibina square, igbere, bende lga, abia state",{'Unknown': '#'}
3,Sen. JIBRIN ISAH,Kogi,Kogi East,APC,101,8185651909,isahj@ymail.com,,,old egume road ayangba kogi state,{'Unknown': '#'}
4,Sen. MICHAEL BAMIDELE,Ekiti,Ekiti Central,APC,158,23480911112,amicusng@gmail.com,,,"ikere road, ado ekiti, ekiti state.",{'Unknown': '#'}


In [12]:
senators_df.shape

(73, 11)

In [13]:
senators_df.to_csv('nigeria_senators.csv', index=False)