# EXTERNAL FACTOR FOR DISASTER DATA

In [None]:
import google.generativeai as genai
import pandas as pd
from datetime import timedelta
import re

# Set the API key and initialize the generative model
genai.configure(api_key="AIzaSyDDdefMptm2NvHHr4S18DyRxRfOGT2CNz8")

# Initialize the Gemini model
model = genai.GenerativeModel("gemini-1.5-pro")

def generate_disaster_data_with_gemini(start_year, end_year, location="Chennai"):
    # More specific prompt to get better structured data
    prompt = f"""List all the natural disasters (including floods, cyclones, heavy rains) that occurred in {location} between {start_year} and {end_year}.
    For each disaster, provide:
    1. The type of disaster
    2. Exact start and end dates
    
    Format STRICTLY as follows (one disaster per line):
    Event: Cyclone Vardah, Start: 2016-12-12, End: 2016-12-13
    Event: Flood, Start: 2017-11-01, End: 2017-11-03
    
    Include only verified historical disasters with specific dates."""

    # Get response from model
    response = model.generate_content(prompt)
    disaster_text = response.text
    print(f"Raw Response from Gemini:\n{disaster_text}\n")

    # Parse disaster data
    disaster_data = []
    
    # Regular expression for disasters with start and end dates
    disaster_pattern = r'Event: (.*?), Start: (\d{4}-\d{2}-\d{2}), End: (\d{4}-\d{2}-\d{2})'
    
    matches = list(re.finditer(disaster_pattern, disaster_text))
    if not matches:
        print("⚠️ No matching disaster data found in the response. Check the format.")
        return pd.DataFrame(columns=['ResidentDate', 'event_type'])

    # Process all disasters
    for match in matches:
        try:
            event_type, start_date, end_date = match.groups()
            start = pd.to_datetime(start_date)
            end = pd.to_datetime(end_date)
            date_range = pd.date_range(start, end, inclusive='both')
            
            # Clean event type
            event_type = event_type.strip()
            
            # Create entry for each day of the disaster
            for date in date_range:
                disaster_data.append({
                    'ResidentDate': date.date(),
                    'event_type': event_type
                })
            print(f"✓ Processed: {event_type} from {start_date} to {end_date}")
        except Exception as e:
            print(f"❌ Error parsing disaster: {match.group()}, Error: {e}")

    # Create DataFrame and sort by date
    df = pd.DataFrame(disaster_data)
    if not df.empty:
        df = df.sort_values('ResidentDate')
        print(f"\nFound {len(df)} total disaster days across {len(matches)} distinct events")
    else:
        print("\n⚠️ No disaster data was successfully parsed")
    
    return df

# Generate disaster data
print("Generating disaster data for Chennai (2016-2022)...")
disaster_df = generate_disaster_data_with_gemini(2016, 2022, location="Chennai")

# Save to CSV with error checking
if not disaster_df.empty:
    disaster_df.to_csv('/finalproject/collection_process/disaster_data_chennai_2016_2022_gemini.csv', index=False)
    print("\n✅ Disaster data saved to 'disaster_data_chennai_2016_2022_gemini.csv'")
    print("\nSample of generated data:")
    print(disaster_df.head(10))
else:
    print("\n❌ No data to save - DataFrame is empty")

  from .autonotebook import tqdm as notebook_tqdm


Generating disaster data for Chennai (2016-2022)...
Raw Response from Gemini:
Event: Cyclone Vardah, Start: 2016-12-12, End: 2016-12-13
Event: Heavy Rains/Flooding, Start: 2017-11-01, End: 2017-11-04
Event: Cyclone Ockhi, Start: 2017-11-30, End: 2017-12-02 (Peripheral impact with heavy rainfall)
Event: Drought, Start: 2018-06-01, End: 2019-06-30 (Approximate dates for a prolonged period)
Event: Heavy Rains/Flooding, Start: 2019-10-30, End: 2019-11-02
Event: Cyclone Nivar, Start: 2020-11-25, End: 2020-11-26
Event: Cyclone Burevi, Start: 2020-12-02, End: 2020-12-05
Event: Heavy Rains/Flooding, Start: 2021-11-06, End: 2021-11-11
Event: Heavy Rains/Flooding, Start: 2022-10-31, End: 2022-11-03


It's important to note:

* **Precision of dates:**  The start and end dates represent the most significant period of impact.  Some events, like droughts, have less precise boundaries.
* **Focus on major events:** This list focuses on impactful events.  Chennai experiences annual monsoon rains, some 