In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By

# Restart the 'automated' browser
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

In [2]:
# 1. Open the page (ensure there is no underscore at the very end of 'order')
driver.get("https://en.wikipedia.org/wiki/List_of_countries_by_alphabetical_order")

# 2. Target the links inside the Wikipedia tables
# This is much more accurate than looking for bold text
country_elements = driver.find_elements(By.CSS_SELECTOR, "table.wikitable td a")

# 3. Clean the list: ignore empty items and only keep actual country names
countries_list = [el.text for el in country_elements if len(el.text) > 2]

# 4. Remove any duplicates and sort them A-Z
countries_list = sorted(list(set(countries_list)))

# VERIFY: This should now print ['Afghanistan', 'Albania'...] 
print(f"Total countries found: {len(countries_list)}")
print(countries_list[:10])

Total countries found: 0
[]


In [3]:
import time
from selenium.webdriver.common.by import By

# 1. Open the page
driver.get("https://en.wikipedia.org/wiki/List_of_countries_by_alphabetical_order")

# 2. WAIT for 3 seconds to let the table load completely
time.sleep(3) 

# 3. Find the country links inside the table cells
country_elements = driver.find_elements(By.CSS_SELECTOR, "table.wikitable td a")

# 4. Extract text (filtering out empty names and specific unwanted links)
countries_list = []
for el in country_elements:
    name = el.text.strip()
    # Ensure it's not empty, not a number, and longer than 2 characters
    if len(name) > 2 and not name.isnumeric():
        countries_list.append(name)

# 5. Remove duplicates and sort A-Z
countries_list = sorted(list(set(countries_list)))

# VERIFY: This should no longer be 0!
print(f"Total countries found: {len(countries_list)}")
print(countries_list[:10])

Total countries found: 0
[]


In [4]:
# Open the file and write each country on a new line
with open('countries_lookup.txt', 'w', encoding='utf-8') as f:
    for country in countries_list:
        f.write(country + '\n')

print("File 'countries_lookup.txt' has been updated and saved!")

File 'countries_lookup.txt' has been updated and saved!


In [5]:
import time
from selenium.webdriver.common.by import By

# 1. Open the page
driver.get("https://en.wikipedia.org/wiki/List_of_countries_by_alphabetical_order")
time.sleep(5) # Giving it a full 5 seconds to be safe

# 2. Get EVERY link on the page
all_links = driver.find_elements(By.TAG_NAME, "a")

# 3. Filter them
countries_list = []
for link in all_links:
    text = link.text.strip()
    # Countries usually start with a capital letter and are 3-20 characters long
    if len(text) > 2 and text[0].isupper() and "\n" not in text:
        # Avoid common Wikipedia menu links
        if text not in ["Main page", "Contents", "Current events", "Random article", "About Wikipedia"]:
            countries_list.append(text)

# 4. Clean up
countries_list = sorted(list(set(countries_list)))

print(f"Success! Found {len(countries_list)} potential names.")
print(countries_list[20:30]) # Printing a middle slice to see the results

Success! Found 29 potential names.
['Wikibooks', 'Wikidata', 'Wikinews', 'Wikiquote', 'Wikisource', 'Wikispecies', 'Wikiversity', 'Wikivoyage', 'Wiktionary']


In [6]:
# Run this cell if the scraper keeps returning 0
import pandas as pd

# This uses Pandas to read the table directly without needing Selenium's 'find_elements'
try:
    url = "https://en.wikipedia.org/wiki/List_of_countries_by_alphabetical_order"
    tables = pd.read_html(url)
    # Usually, the countries are in tables 0 through 25 on this specific page
    manual_list = []
    for i in range(len(tables)):
        if 'Country' in tables[i].columns:
            manual_list.extend(tables[i]['Country'].tolist())
    
    countries_list = sorted(list(set(manual_list)))
    print(f"Manual recovery successful: Found {len(countries_list)} countries.")
except:
    print("Could not reach Wikipedia. Check your internet connection.")

Could not reach Wikipedia. Check your internet connection.


In [7]:
# List of countries for your lookup list
countries_data = [
    "Afghanistan", "Albania", "Algeria", "Andorra", "Angola", "Antigua and Barbuda", 
    "Argentina", "Armenia", "Australia", "Austria", "Azerbaijan", "Bahamas", "Bahrain", 
    "Bangladesh", "Barbados", "Belarus", "Belgium", "Belize", "Benin", "Bhutan", 
    "Bolivia", "Bosnia and Herzegovina", "Botswana", "Brazil", "Brunei", "Bulgaria", 
    "Burkina Faso", "Burundi", "Cambodia", "Cameroon", "Canada", "Central African Republic", 
    "Chad", "Chile", "China", "Colombia", "Congo", "Costa Rica", "Croatia", "Cuba", 
    "Cyprus", "Czech Republic", "Denmark", "Dominican Republic", "Ecuador", "Egypt", 
    "El Salvador", "Estonia", "Ethiopia", "Fiji", "Finland", "France", "Gabon", "Gambia", 
    "Georgia", "Germany", "Ghana", "Greece", "Grenada", "Guatemala", "Guinea", "Guyana", 
    "Haiti", "Honduras", "Hungary", "Iceland", "India", "Indonesia", "Iran", "Iraq", 
    "Ireland", "Israel", "Italy", "Jamaica", "Japan", "Jordan", "Kazakhstan", "Kenya", 
    "Korea", "Kuwait", "Laos", "Latvia", "Lebanon", "Liberia", "Libya", "Lithuania", 
    "Luxembourg", "Madagascar", "Malawi", "Malaysia", "Maldives", "Mali", "Malta", 
    "Mexico", "Moldova", "Monaco", "Mongolia", "Montenegro", "Morocco", "Mozambique", 
    "Myanmar", "Namibia", "Nepal", "Netherlands", "New Zealand", "Nicaragua", "Niger", 
    "Nigeria", "Norway", "Oman", "Pakistan", "Panama", "Paraguay", "Peru", "Philippines", 
    "Poland", "Portugal", "Qatar", "Romania", "Russia", "Rwanda", "Saudi Arabia", 
    "Senegal", "Serbia", "Singapore", "Slovakia", "Slovenia", "Somalia", "South Africa", 
    "Spain", "Sri Lanka", "Sudan", "Sweden", "Switzerland", "Syria", "Taiwan", "Tajikistan", 
    "Tanzania", "Thailand", "Togo", "Trinidad and Tobago", "Tunisia", "Turkey", "Uganda", 
    "Ukraine", "United Arab Emirates", "United Kingdom", "United States", "Uruguay", 
    "Uzbekistan", "Venezuela", "Vietnam", "Yemen", "Zambia", "Zimbabwe"
]

# Write this list to your file
with open('countries_lookup.txt', 'w', encoding='utf-8') as f:
    for country in countries_data:
        f.write(country + '\n')

print("Success! 'countries_lookup.txt' has been created manually.")

Success! 'countries_lookup.txt' has been created manually.
