In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL of the Wikipedia page
url = "https://en.wikipedia.org/wiki/List_of_presidents_of_the_United_States"

# Send an HTTP request to the webpage
response = requests.get(url)

# Parse the page content with BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')

# Find the table containing the list of presidents
table = soup.find("table", {"class": "wikitable"})

# Extract data from the table
presidents = []
if table:
    rows = table.find_all("tr")
    for row in rows[1:]:  # Skip the header row
        cols = row.find_all("td")
        if len(cols) > 0:
            number = cols[0].text.strip()
            name = cols[1].text.strip()
            link_tag = cols[1].find("a")
            wiki_link = "https://en.wikipedia.org" + link_tag['href'] if link_tag else "No_link"
            term = cols[2].text.strip()
            party = cols[4].text.strip() if len(cols) > 4 else "N/A"
            presidents.append([number, name, term, party,wiki_link])

# Convert the data into a Pandas DataFrame
df = pd.DataFrame(presidents, columns=["No.", "Name", "Term", "Party","link"])

# Display the first few rows
print(df.head())

# Optionally, save the data to a CSV file
df.to_csv("us_presidents.csv", index=False)
print("Data saved to us_presidents.csv")


  No.                              Name                          Term  \
0      George Washington(1732–1799)[19]  April 30, 1789–March 4, 1797   
1             John Adams(1735–1826)[21]   March 4, 1797–March 4, 1801   
2       Thomas Jefferson(1743–1826)[23]   March 4, 1801–March 4, 1809   
3          James Madison(1751–1836)[24]   March 4, 1809–March 4, 1817   
4           James Monroe(1758–1831)[26]   March 4, 1817–March 4, 1825   

                   Party                                             link  
0           Unaffiliated  https://en.wikipedia.org/wiki/George_Washington  
1             Federalist         https://en.wikipedia.org/wiki/John_Adams  
2  Democratic-Republican   https://en.wikipedia.org/wiki/Thomas_Jefferson  
3  Democratic-Republican      https://en.wikipedia.org/wiki/James_Madison  
4  Democratic-Republican       https://en.wikipedia.org/wiki/James_Monroe  
Data saved to us_presidents.csv


In [2]:
y =pd.DataFrame( df[:][["Name","link"]])
y

Unnamed: 0,Name,link
0,George Washington(1732–1799)[19],https://en.wikipedia.org/wiki/George_Washington
1,John Adams(1735–1826)[21],https://en.wikipedia.org/wiki/John_Adams
2,Thomas Jefferson(1743–1826)[23],https://en.wikipedia.org/wiki/Thomas_Jefferson
3,James Madison(1751–1836)[24],https://en.wikipedia.org/wiki/James_Madison
4,James Monroe(1758–1831)[26],https://en.wikipedia.org/wiki/James_Monroe
5,John Quincy Adams(1767–1848)[27],https://en.wikipedia.org/wiki/John_Quincy_Adams
6,Andrew Jackson(1767–1845)[30],https://en.wikipedia.org/wiki/Andrew_Jackson
7,Martin Van Buren(1782–1862)[31],https://en.wikipedia.org/wiki/Martin_Van_Buren
8,William Henry Harrison(1773–1841)[32],https://en.wikipedia.org/wiki/William_Henry_Ha...
9,John Tyler(1790–1862)[33],https://en.wikipedia.org/wiki/John_Tyler


In [3]:
import requests
from bs4 import BeautifulSoup
import os
import re

# URL of the Wikipedia page
url = "https://en.wikipedia.org/wiki/List_of_presidents_of_the_United_States"

# Send an HTTP request to the webpage
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

# Find the table containing the list of presidents
table = soup.find("table", {"class": "wikitable"})

# Create a folder to save images
if not os.path.exists("presidents_images"):
    os.makedirs("presidents_images")

# Extract images and save them with the president's name and link
if table:
    rows = table.find_all("tr")[1:]  # Skip the header row
    for row in rows:
        cols = row.find_all("td")
        if len(cols) > 1:
            # Get the president's name and reference link
            name = cols[1].text.strip()
            link_tag = cols[1].find("a")
            wiki_link = "https://en.wikipedia.org" + link_tag['href'] if link_tag else "No_link"

            # Clean the name for filename purposes
            clean_name = re.sub(r'[\\/*?:"<>|]', "", name)

            img_tag = cols[0].find("img")
            if img_tag:
                img_url = "https:" + img_tag['src']  # Get the image URL
                img_name = f"{clean_name}.jpg"  # Save with the president's name

                # Download the image
                img_response = requests.get(img_url)
                if img_response.status_code == 200:
                    with open(os.path.join("presidents_images", img_name), "wb") as img_file:
                        img_file.write(img_response.content)
                    print(f"Downloaded {img_name} - {wiki_link}")

print("All images have been downloaded successfully!")


Downloaded George Washington(1732–1799)[19].jpg - https://en.wikipedia.org/wiki/George_Washington
Downloaded John Adams(1735–1826)[21].jpg - https://en.wikipedia.org/wiki/John_Adams
Downloaded Thomas Jefferson(1743–1826)[23].jpg - https://en.wikipedia.org/wiki/Thomas_Jefferson
Downloaded James Madison(1751–1836)[24].jpg - https://en.wikipedia.org/wiki/James_Madison
Downloaded James Monroe(1758–1831)[26].jpg - https://en.wikipedia.org/wiki/James_Monroe
Downloaded John Quincy Adams(1767–1848)[27].jpg - https://en.wikipedia.org/wiki/John_Quincy_Adams
Downloaded Andrew Jackson(1767–1845)[30].jpg - https://en.wikipedia.org/wiki/Andrew_Jackson
Downloaded Martin Van Buren(1782–1862)[31].jpg - https://en.wikipedia.org/wiki/Martin_Van_Buren
Downloaded William Henry Harrison(1773–1841)[32].jpg - https://en.wikipedia.org/wiki/William_Henry_Harrison
Downloaded John Tyler(1790–1862)[33].jpg - https://en.wikipedia.org/wiki/John_Tyler
Downloaded James K. Polk(1795–1849)[36].jpg - https://en.wikipedia

In [4]:
import face_recognition
import cv2
import os
import numpy as np
import re

# Directory containing president images
image_folder = "presidents_images"

# Dictionary to store president encodings and their respective Wikipedia links
presidents_data = {}

# Function to clean names and extract the Wikipedia link
def extract_name_and_link(filename):
    name = os.path.splitext(filename)[0]  # Remove file extension
    name_parts = name.split(" - ")
    clean_name = name_parts[0]
    wiki_link = name_parts[1] if len(name_parts) > 1 else "No link available"
    return clean_name, wiki_link

# Load images and encode faces
print("Encoding faces...")
for image_file in os.listdir(image_folder):
    if image_file.endswith(".jpg") or image_file.endswith(".jpeg") or image_file.endswith(".png"):
        img_path = os.path.join(image_folder, image_file)
        
        # Load image and encode
        img = face_recognition.load_image_file(img_path)
        face_encodings = face_recognition.face_encodings(img)

        if face_encodings:
            encoding = face_encodings[0]
            name, link = extract_name_and_link(image_file)
            presidents_data[name] = (encoding, link)
            print(f"Encoded {name} with link {link}")

print("Encoding complete.")

# Start webcam for real-time recognition
video_capture = cv2.VideoCapture(0)

print("Starting face recognition. Press 'q' to exit.")

while True:
    ret, frame = video_capture.read()
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Detect faces in the frame
    face_locations = face_recognition.face_locations(rgb_frame)
    face_encodings = face_recognition.face_encodings(rgb_frame, face_locations)

    for face_encoding, face_location in zip(face_encodings, face_locations):
        matches = face_recognition.compare_faces([data[0] for data in presidents_data.values()], face_encoding)
        face_distances = face_recognition.face_distance([data[0] for data in presidents_data.values()], face_encoding)
        best_match_index = np.argmin(face_distances)

        if matches[best_match_index]:
            matched_name = list(presidents_data.keys())[best_match_index]
            matched_link = presidents_data[matched_name][1]

            # Draw a box around the face and display the name with the link
            top, right, bottom, left = face_location
            cv2.rectangle(frame, (left, top), (right, bottom), (0, 255, 0), 2)
            cv2.putText(frame, f"{matched_name}", (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
            cv2.putText(frame, f"Link: {matched_link}", (left, bottom + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

            print(f"Recognized: {matched_name} - {matched_link}")
            

    cv2.imshow("Face Recognition", frame)

    # Press 'q' to exit
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Cleanup
video_capture.release()
cv2.destroyAllWindows()


ModuleNotFoundError: No module named 'face_recognition'

In [None]:
matched_name

In [None]:
y.values

In [53]:
res = y[y["Name"]== "Donald Trump(b. 1946)[76]" ].iloc[0]["link"]

In [54]:
res

'https://en.wikipedia.org/wiki/Donald_Trump'