In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_national_flowers(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    flowers_list = []

    # Find the correct table (assuming there is only one such table on the page)
    table = soup.find('table', {'class': 'wikitable'})
    rows = table.find_all('tr')

    for row in rows[1:]:  # skip header row
        cols = row.find_all('td')
        if len(cols) >= 3:  # check if row contains sufficient columns
            country = cols[0].text.strip()
            flower = cols[1].text.strip()
            description = cols[2].text.strip() if len(cols) > 2 else ''
            
            flowers_list.append({
                'Country': country,
                'Flower': flower,
                'Description': description
            })

    return pd.DataFrame(flowers_list)

# Usage
url = 'https://en.wikipedia.org/wiki/List_of_national_flowers'
national_flowers_data = scrape_national_flowers(url)
national_flowers_data.to_csv('national_flowers.csv', index=False)
print('National flowers data scraped and saved to national_flowers.csv')

National flowers data scraped and saved to national_flowers.csv


In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_state_flowers(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    flowers_list = []

    # Find the correct table (assuming the state flowers are well structured in the page)
    table = soup.find('table', {'class': 'wikitable'})
    rows = table.find_all('tr')

    for row in rows[1:]:  # skip header row
        cols = row.find_all('td')
        if len(cols) >= 2:  # check if row contains sufficient columns
            state = cols[0].text.strip()
            flower = cols[1].text.strip()
            
            flowers_list.append({
                'State': state,
                'Flower': flower
            })

    return pd.DataFrame(flowers_list)

# Usage
url = 'https://en.wikipedia.org/wiki/List_of_U.S._state_and_territory_flowers'
state_flowers_data = scrape_state_flowers(url)
state_flowers_data.to_csv('us_state_flowers.csv', index=False)
print('State flowers data scraped and saved to us_state_flowers.csv')

State flowers data scraped and saved to us_state_flowers.csv


In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def get_scientific_name(flower_name):
    """Fetch the scientific name of a flower from Wikipedia."""
    search_url = f"https://en.wikipedia.org/wiki/{flower_name.replace(' ', '_')}"
    try:
        response = requests.get(search_url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')
            infobox = soup.find('table', {'class': 'infobox'})
            if infobox:
                scientific_name = infobox.find('i')
                if scientific_name:
                    return scientific_name.text
        return "Not found"
    except Exception as e:
        return "Error"

def process_flowers(file_path, location_type):
    """Process flower data to add scientific names."""
    df = pd.read_csv(file_path)
    df['Flowers'] = df['Flower'].str.split(' and |,| or ')  # Split entries with multiple flowers
    df = df.explode('Flowers').reset_index(drop=True)  # Create a new row for each flower
    df['Flowers'] = df['Flowers'].str.strip().str.capitalize()  # Standardize capitalization
    df['Scientific Name'] = df['Flowers'].apply(get_scientific_name)  # Get scientific names

    # Drop the old Flower column and rename Flowers column back to Flower
    df.drop('Flower', axis=1, inplace=True)
    df.rename(columns={'Flowers': 'Flower'}, inplace=True)
    output_path = f"{location_type}_flowers_with_scientific_names.csv"
    df.to_csv(output_path, index=False)
    return output_path

# Process each CSV
output_national = process_flowers('national_flowers.csv', 'national')
output_state = process_flowers('us_state_flowers.csv', 'state')

print(f"Processed files saved as {output_national} and {output_state}.")

Processed files saved as national_flowers_with_scientific_names.csv and state_flowers_with_scientific_names.csv.


In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def fetch_wikipedia_data(flower_name):
    """Fetch scientific name, an image URL, and symbolism from a flower's Wikipedia page."""
    search_url = f"https://en.wikipedia.org/wiki/{flower_name.replace(' ', '_')}"
    data = {
        "Scientific Name": "Not found",
        "Image URL": "Not found",
        "Symbolism": ""
    }
    try:
        response = requests.get(search_url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')
            # Scientific Name
            infobox = soup.find('table', {'class': 'infobox'})
            if infobox:
                scientific_name = infobox.find('i')
                if scientific_name:
                    data["Scientific Name"] = scientific_name.text
            # Image URL
            image = infobox.find('img')
            if image:
                data["Image URL"] = "https:" + image['src']
            # Symbolism
            headers = soup.find_all('span', {'class': 'mw-headline'})
            for header in headers:
                if 'symbolism' in header.text.lower():
                    next_node = header.parent.find_next_sibling()
                    if next_node and next_node.name in ['p', 'ul']:
                        data["Symbolism"] = next_node.text.strip()
                    break
        return data
    except Exception as e:
        print(f"Error processing {flower_name}: {str(e)}")
        return data

def process_flowers(file_path, location_type):
    """Process flower data to add scientific names, images, and symbolism."""
    df = pd.read_csv(file_path)
    df['Flowers'] = df['Flower'].str.split(' and |,| or ')  # Split entries with multiple flowers
    df = df.explode('Flowers').reset_index(drop=True)  # Create a new row for each flower
    df['Flowers'] = df['Flowers'].str.strip().str.capitalize()  # Standardize capitalization
    
    # Fetch data from Wikipedia
    wiki_data = df['Flowers'].apply(fetch_wikipedia_data).apply(pd.Series)
    df = pd.concat([df, wiki_data], axis=1)
    
    # Clean up DataFrame
    df.drop('Flower', axis=1, inplace=True)
    df.rename(columns={'Flowers': 'Flower'}, inplace=True)
    output_path = f"{location_type}_flowers_enhanced.csv"
    df.to_csv(output_path, index=False)
    return output_path

# Process each CSV
output_national = process_flowers('national_flowers.csv', 'national')
output_state = process_flowers('us_state_flowers.csv', 'state')

print(f"Enhanced files saved as {output_national} and {output_state}.")

Error processing Water lily: 'NoneType' object has no attribute 'find'
Error processing Water lily: 'NoneType' object has no attribute 'find'
Error processing Sakura blossom: 'NoneType' object has no attribute 'find'
Error processing Poppy: 'NoneType' object has no attribute 'find'
Error processing Daisy: 'NoneType' object has no attribute 'find'
Error processing Chamomile: 'NoneType' object has no attribute 'find'
Error processing Tudor rose: 'NoneType' object has no attribute 'find'
Error processing : 'NoneType' object has no attribute 'find'
Error processing Others: 'NoneType' object has no attribute 'find'
Error processing Maple leaf: 'NoneType' object has no attribute 'find'
Error processing Black orchid: 'NoneType' object has no attribute 'find'
Error processing Lignum vitae: 'NoneType' object has no attribute 'find'
Error processing Others: 'NoneType' object has no attribute 'find'
Error processing Ceibo: 'NoneType' object has no attribute 'find'
Error processing Kantuta: 'NoneT