In [None]:
# This script was made after the exam concluded.
import requests
import json
from bs4 import BeautifulSoup
import re

def get_category_members(category):
    session = requests.Session()
    url = 'https://en.wikipedia.org/w/api.php'
    members = []

    params = {
        'action': 'query',
        'format': 'json',
        'list': 'categorymembers',
        'cmtitle': category,
        'cmlimit': '500'
    }

    while True:
        response = session.get(url=url, params=params)
        data = response.json()
        members.extend(data['query']['categorymembers'])

        if 'continue' in data:
            params['cmcontinue'] = data['continue']['cmcontinue']
        else:
            break

    return members

def clean_coordinates(coord_str):
    normalized_str = re.sub(r'[^\x00-\x7F]+', ' ', coord_str)
    pattern = re.compile(r"(\d+\.\d+)\s*([NS])[\s;,/]*\s*(\d+\.\d+)\s*([EW])")
    match = pattern.search(normalized_str)
    if match:
        lat, lat_dir, lon, lon_dir = match.groups()
        lat = float(lat) * (-1 if lat_dir == 'S' else 1)
        lon = float(lon) * (-1 if lon_dir == 'W' else 1)
        return [lon, lat]
    return None

def convert_diameter_to_fraction(diameter_str, lunar_radius_km=1737.4):
    try:
        # Use a regular expression to extract the kilometers part from the string
        km_match = re.search(r"(\d+(\.\d+)?)\s*km", diameter_str)
        if km_match:
            # If km is found, use it
            diameter_km = float(km_match.group(1))
        else:
            # Check if the diameter is given in meters instead
            m_match = re.search(r"(\d+(\.\d+)?)\s*m", diameter_str)
            if m_match:
                # Convert from meters to kilometers
                diameter_km = float(m_match.group(1)) / 1000
            else:
                return 0  # Return 0 if no valid diameter is found

        # Convert diameter in km to a fraction of the lunar radius
        return (diameter_km / lunar_radius_km) / 0.17843
    except (TypeError, AttributeError, ValueError):
        return 0  # Return 0 if there's an issue with the conversion


def clean_name(name):
    return re.sub(r'\s*\((lunar\s+)?crater\)', '', name, flags=re.IGNORECASE)

def fetch_first_large_image_url_from_wiki(page_title):
    url = f"https://en.wikipedia.org/wiki/{page_title.replace(' ', '_')}"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    images = soup.find_all('img')

    # Known URL or pattern of the red question mark image
    red_question_mark = "Text_document_with_red_question_mark.svg"

    for img in images:
        src = img.get('src', '')
        if src.startswith("//"):  # Correct relative URLs
            src = "https:" + src

        # Skip the red question mark image
        if red_question_mark in src:
            continue  # Ignore this image and continue with the next one

        # Check if the image is suitable (i.e., hosted on Wikimedia Commons and is in an appropriate format)
        if 'upload.wikimedia.org/wikipedia/commons' in src and ('thumb' in src or 'b/b4' in src):
            if any(ext in src for ext in ['.jpg', '.jpeg', '.png']):
                return src

    return "No suitable image found"


def fetch_crater_data(page_title):
    data = {
        'name': clean_name(page_title.replace('_', ' ')),
        'Wiki': f"https://en.wikipedia.org/wiki/{page_title.replace(' ', '_')}",
        'image_url': fetch_first_large_image_url_from_wiki(page_title)  # Fetch the first large image directly
    }
    url = data['Wiki']
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    infobox = soup.find('table', class_='infobox')

    if infobox:
        for row in infobox.find_all('tr'):
            header = row.find('th')
            value = row.find('td')
            if header and value:
                header_text = header.text.strip()
                value_text = value.text.strip()
                if header_text == 'Coordinates':
                    data['coordinates'] = clean_coordinates(value_text) or "Not found"
                elif header_text.lower() == 'diameter':
                    data['radius'] = convert_diameter_to_fraction(value_text)
                elif header_text.lower() == 'depth':
                    data['depth'] = value_text
                elif header_text.lower() == 'eponym':
                    data['eponym'] = value_text

    return data

def save_data_to_geojson(data, filename='craters.geojson'):
    geojson = {
        "type": "FeatureCollection",
        "features": []
    }
    for entry in data:
        feature = {
            "type": "Feature",
            "properties": {
                "name": entry.get('name'),
                "Wiki": entry.get('Wiki', ''),
                "numPoints": 16,
                "radius": entry.get('radius', 0),
                "depth": entry.get('depth', 'Unknown'),
                "eponym": entry.get('eponym', 'Unknown'),
                "image_url": entry.get('image_url', 'No image available')
            },
            "geometry": {
                "type": "Point",
                "coordinates": entry.get('coordinates', [])
            }
        }
        geojson['features'].append(feature)

    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(geojson, f, indent=4)

category = 'Category:Impact_craters_on_the_Moon'
members = get_category_members(category)
crater_data = [fetch_crater_data(member['title']) for member in members]

save_data_to_geojson(crater_data)
print("GeoJSON data for all craters has been fetched and saved.")
