In [1]:
import io
import json
import os
import time
from datetime import datetime
from timeit import default_timer as timer

import polars as pl
import requests
import tqdm.notebook as tqdm
from dateutil.relativedelta import relativedelta
from dotenv import find_dotenv, load_dotenv
from groq import Groq

load_dotenv(find_dotenv())

# Load API keys from environment...
acled_api_key = os.getenv("ACLED_API_KEY")
acled_email = os.getenv("ACLED_EMAIL")
groq_api_key = os.getenv("GROQ_API_KEY")

# ...or enter them manually
if not acled_api_key:
    acled_api_key = input("Enter your ACLED API key: ")
    os.environ["ACLED_API_KEY"] = acled_api_key

if not acled_email:
    acled_email = input("Enter your ACLED email: ")
    os.environ["ACLED_EMAIL"] = acled_email

if not groq_api_key:
    groq_api_key = input("Enter your Groq API key: ")
    os.environ["GROQ_API_KEY"] = groq_api_key

In [2]:
# Choose a country to analyze
country = "Russia" # global variable

# Forward Outlook (CAST)

In [3]:
# Set up API parameters
parameters = {
    "email": acled_email,
    "key": acled_api_key,
    "country": country,
}

# GET request to ACLED API
response = requests.get("https://api.acleddata.com/cast/read.csv", params=parameters)
response.raise_for_status()

cast = pl.read_csv(io.BytesIO(response.content))
cast = cast.filter(pl.col("country") != "South Sudan")

month_map = {
    "January": 1,
    "February": 2,
    "March": 3,
    "April": 4,
    "May": 5,
    "June": 6,
    "July": 7,
    "August": 8,
    "September": 9,
    "October": 10,
    "November": 11,
    "December": 12,
}

cast = (
    cast.with_columns(pl.col("month").replace(month_map).alias("month_num"))
    .with_columns(
        (
            pl.col("year").cast(str) + "-" + pl.col("month_num").cast(str).str.zfill(2)
        ).alias("year_month")
    )
    .drop("month_num")
)


print(f"Retrieved {cast.height} events from ACLED API")
print(cast.head())

# # Save to file
# country = parameters["country"].replace(" ", "_").lower()
# filename = f"{country}_cast.csv"
# cast.write_csv(filename)
# print(f"Data saved as '{filename}'")

Retrieved 2656 events from ACLED API
shape: (5, 14)
┌─────────┬─────────────┬───────────┬──────┬───┬────────────┬────────────┬────────────┬────────────┐
│ country ┆ admin1      ┆ month     ┆ year ┆ … ┆ erv_observ ┆ vac_observ ┆ timestamp  ┆ year_month │
│ ---     ┆ ---         ┆ ---       ┆ ---  ┆   ┆ ed         ┆ ed         ┆ ---        ┆ ---        │
│ str     ┆ str         ┆ str       ┆ i64  ┆   ┆ ---        ┆ ---        ┆ i64        ┆ str        │
│         ┆             ┆           ┆      ┆   ┆ i64        ┆ i64        ┆            ┆            │
╞═════════╪═════════════╪═══════════╪══════╪═══╪════════════╪════════════╪════════════╪════════════╡
│ Russia  ┆ Republic of ┆ August    ┆ 2025 ┆ … ┆ null       ┆ null       ┆ 1746702918 ┆ 2025-08    │
│         ┆ Kabardino-B ┆           ┆      ┆   ┆            ┆            ┆            ┆            │
│         ┆ alkaria     ┆           ┆      ┆   ┆            ┆            ┆            ┆            │
│ Russia  ┆ Republic of ┆ September ┆ 2

### Calculate average number of events for 1, 3, 6 and 12 months from the current date

In [4]:
def create_rolling_averages(cast):
    cast = cast.with_columns(
        pl.col("year_month").str.strptime(pl.Date, "%Y-%m").alias("year_month_dt")
    ).sort(["admin1", "year_month_dt"])

    window_sizes = [1, 3, 6, 12]

    for window in window_sizes:
        avg_col = []
        rows = cast.to_dicts()

        data_dict = {}
        for row in rows:
            key = (row["admin1"], row["year_month_dt"])
            data_dict[key] = row["total_observed"]

        for row in rows:
            admin = row["admin1"]
            current_date = row["year_month_dt"]

            values = []
            for i in range(1, window + 1):
                check_date = current_date - relativedelta(months=i)
                key = (admin, check_date)
                if key in data_dict and data_dict[key] is not None:
                    values.append(data_dict[key])

            if values:
                avg_val = sum(values) / len(values)
            else:
                avg_val = None

            avg_col.append(avg_val)

        cast = cast.with_columns(pl.Series(name=f"avg{window}", values=avg_col))

    return cast.with_columns(
        pl.col("year_month_dt").dt.strftime("%Y-%m").alias("year_month")
    ).drop("year_month_dt")


cast_clean = create_rolling_averages(cast)

### Calculate percent increase of predicted events (compared to observed events in the most recent month). Tag as 1 if the percentage is greater than 25.

In [6]:
cast_clean = cast_clean.with_columns(
    pl.col(["avg1", "avg3", "avg6", "avg12"]).fill_null(strategy="forward")
)

cast_clean = cast_clean.with_columns(pl.col("total_observed").fill_null(0))

windows = [1, 3, 6, 12]
for w in windows:
    percent_col = f"percent_increase{w}"
    hot_col = f"hotspot{w}"
    cast_clean = cast_clean.with_columns(
        pl.when(
            (pl.col(f"avg{w}") == 0) & (pl.col("total_forecast") > 0)
        )  # Handle "inf" increase when average observed events = 0, and forecasted events > 0
        .then(pl.col("total_forecast") * 10)
        .otherwise(
            (pl.col("total_forecast") - pl.col(f"avg{w}")) / pl.col(f"avg{w}") * 100
        )
        .fill_null(0)
        .fill_nan(0)
        .alias(percent_col)
    ).with_columns((pl.col(percent_col) >= 25).cast(pl.Int8).alias(hot_col))

## Retreive "hotspots" for the selected horizon

In [None]:
# Parameters
window = 1 # Number of past month to calculate average # of events
hot_col = f"hotspot{window}"
horizon = 2 # Number of months ahead from current month to check for hotspots (if "hotspot1"==1)

# Get current month and calculate target months
current_date = datetime.now()
months_to_check = []
for i in range(horizon):
    check_date = current_date + relativedelta(months=i)
    months_to_check.append(check_date.strftime("%Y-%m"))

# Filter hotspots (hotspot1 == 1 and in the next 2 months)
hotspots = cast_clean.filter(
    (pl.col("year_month").is_in(months_to_check)) & (pl.col(hot_col) == 1)
)

hotspots_list = hotspots["admin1"].unique().to_list()
print(f"Number of hotspots in the next {horizon} months: {len(hotspots_list)}")
print(f"Regions with significant increase in conflict risk: {hotspots_list}")

admin1_values = hotspots["admin1"].unique().sort()
year_month_values = hotspots["year_month"].unique().sort()

# Create pivot table for better visualization
pivot_table = hotspots.pivot(
    index="admin1",
    columns="year_month",
    values="percent_increase1"
)

for c in year_month_values:
    pivot_table = pivot_table.with_columns(
        pl.col(c).round(0).cast(pl.Int64)
    )

avg1_dict = {}
predicted_dicts = {month: {} for month in months_to_check}

for row in hotspots.to_dicts():
    admin1 = row["admin1"]
    year_month = row["year_month"]
    avg1_dict[admin1] = row["avg1"]
    if year_month in months_to_check:
        predicted_dicts[year_month][admin1] = row["total_forecast"]

pivot_table = pivot_table.with_columns(
    pl.Series("avg1", [avg1_dict.get(admin1) for admin1 in pivot_table["admin1"]]).cast(int)
)

# Rename columns that are year_month to 'percent_year_month'
new_columns = []
for col in pivot_table.columns:
    if col != "avg1" and col != "admin1":
        new_columns.append(f"percent_{col}")
    else:
        new_columns.append(col)
pivot_table = pivot_table.rename(dict(zip(pivot_table.columns, new_columns)))
        
# Dynamically add predicted columns for each month
for month in months_to_check:
    col_name = f"pred_{month.replace('-', '_')}"
    pivot_table = pivot_table.with_columns(
        pl.Series(col_name, [predicted_dicts[month].get(admin1) for admin1 in pivot_table["admin1"]])
    )

# Reorder the columns for better readability 
def reorder_columns_by_index(df, order_indices):
    cols = df.columns
    new_order = [cols[i] for i in order_indices]
    return df.select(new_order)

pivot_table = reorder_columns_by_index(pivot_table, [0, 3, 4, 5, 1, 2])


print(pivot_table)
print(
    f"* Missing values mean that the state was not red in that month (below 25% increase)."
)

Number of hotspots in the next 2 months: 5
Regions with significant increase in conflict risk: ['Primorskiy', 'Republic of Dagestan', 'Rostov', 'Republic of Chuvash', 'Moscow Oblast']
shape: (5, 6)
┌──────────────────────┬──────┬──────────────┬──────────────┬─────────────────┬─────────────────┐
│ admin1               ┆ avg1 ┆ pred_2025_06 ┆ pred_2025_07 ┆ percent_2025-06 ┆ percent_2025-07 │
│ ---                  ┆ ---  ┆ ---          ┆ ---          ┆ ---             ┆ ---             │
│ str                  ┆ i64  ┆ i64          ┆ i64          ┆ i64             ┆ i64             │
╞══════════════════════╪══════╪══════════════╪══════════════╪═════════════════╪═════════════════╡
│ Moscow Oblast        ┆ 5    ┆ 12           ┆ 12           ┆ 140             ┆ 140             │
│ Primorskiy           ┆ 1    ┆ 2            ┆ 2            ┆ 100             ┆ 100             │
│ Republic of Chuvash  ┆ 1    ┆ 2            ┆ 2            ┆ 100             ┆ 100             │
│ Republic of Dage

  pivot_table = hotspots.pivot(


### Create a JSON string from the pivot_table (for LLM's use later)

In [20]:
hotspots_and_percentage = {}

for row in pivot_table.to_dicts():
    admin1 = row['admin1']
    
    if 'pred_2025_06' in row and row['percent_2025-06'] is not None: # prioritizing pred_2025-06, but using pred_2025-07 if pred_2025-06 is null
        percentage = row['percent_2025-06']
    elif 'pred_2025_07' in row and row['percent_2025-07'] is not None:
        percentage = row['percent_2025-07']
    else:
        percentage = None
    
    avg1 = row.get('avg1')
    predicted_2025_06 = row.get('pred_2025_06')
    predicted_2025_07 = row.get('pred_2025_07')
    
    hotspots_and_percentage[admin1] = {
        'percentage': percentage,
        'avg1': avg1,
        'pred_2025_06': predicted_2025_06,
        'pred_2025_07': predicted_2025_07
    }

hotspots_and_percentage = json.dumps(hotspots_and_percentage, indent=2)

In [21]:
hotspots_and_percentage

'{\n  "Moscow Oblast": {\n    "percentage": 140,\n    "avg1": 5,\n    "pred_2025_06": 12,\n    "pred_2025_07": 12\n  },\n  "Primorskiy": {\n    "percentage": 100,\n    "avg1": 1,\n    "pred_2025_06": 2,\n    "pred_2025_07": 2\n  },\n  "Republic of Chuvash": {\n    "percentage": 100,\n    "avg1": 1,\n    "pred_2025_06": 2,\n    "pred_2025_07": 2\n  },\n  "Republic of Dagestan": {\n    "percentage": 100,\n    "avg1": 1,\n    "pred_2025_06": 2,\n    "pred_2025_07": 2\n  },\n  "Rostov": {\n    "percentage": 367,\n    "avg1": 6,\n    "pred_2025_06": 28,\n    "pred_2025_07": 29\n  }\n}'

# Summarizing Notes with LLM
### Load ACLED data for `{country}`

In [9]:
cwd = os.getcwd()

save_dir = "data" # ...or specify your directory to save files
os.makedirs(save_dir, exist_ok=True) # Create directory if it doesn't exist

parameters = {
    "email": acled_email,
    "key": acled_api_key,
    "country": country,
    "year": 2025,
}

# Save files as country_year.csv
country = parameters["country"].replace(" ", "_").lower()
year = parameters["year"]
filename = f"{country}_{year}.csv"
filepath = os.path.join(save_dir, filename)

response = requests.get("https://api.acleddata.com/acled/read.csv", params=parameters)

if response.status_code == 200:
    with open(filepath, "wb") as f:
        f.write(response.content)
    print(f"Downloaded ACLED CSV data successfully as '{filepath}'.")
else:
    print(f"Error: Received status code {response.status_code}")

acled = pl.read_csv(filepath)
acled.head(3)

Downloaded ACLED CSV data successfully as 'data/russia_2025.csv'.


event_id_cnty,event_date,year,time_precision,disorder_type,event_type,sub_event_type,actor1,assoc_actor_1,inter1,actor2,assoc_actor_2,inter2,interaction,civilian_targeting,iso,region,country,admin1,admin2,admin3,location,latitude,longitude,geo_precision,source,source_scale,notes,fatalities,tags,timestamp
str,str,i64,i64,str,str,str,str,str,str,str,str,str,str,str,i64,str,str,str,str,str,str,f64,f64,i64,str,str,str,i64,str,i64
"""RUS37502""","""2025-05-30""",2025,1,"""Political violence""","""Explosions/Remote violence""","""Air/drone strike""","""Military Forces of Ukraine (20…",,"""External/Other forces""","""Civilians (Russia)""",,"""Civilians""","""External/Other forces-Civilian…","""Civilian targeting""",643,"""Europe""","""Russia""","""Belgorod""","""Borisovskiy""",,"""Zozuli""",50.5647,35.9259,2,"""Belgorod Governor""","""Subnational""","""On 30 May 2025, 1 Ukrainian dr…",0,,1748971252
"""RUS37532""","""2025-05-30""",2025,1,"""Political violence""","""Explosions/Remote violence""","""Air/drone strike""","""Military Forces of Ukraine (20…",,"""External/Other forces""","""Civilians (Russia)""",,"""Civilians""","""External/Other forces-Civilian…","""Civilian targeting""",643,"""Europe""","""Russia""","""Belgorod""","""Shebekinskiy""",,"""Arkhangelskoe""",50.3535,36.7845,2,"""Belgorod Governor""","""Subnational""","""On 30 May 2025, 1 Ukrainian dr…",0,,1748971252
"""RUS37533""","""2025-05-30""",2025,1,"""Political violence""","""Explosions/Remote violence""","""Air/drone strike""","""Military Forces of Ukraine (20…",,"""External/Other forces""",,,,"""External/Other forces only""",,643,"""Europe""","""Russia""","""Belgorod""","""Belgorodskiy""",,"""Oktyabrskiy""",50.4391,36.3601,1,"""Belgorod Governor""","""Subnational""","""On 30 May 2025, Ukrainian mili…",0,,1748971252


### Fetching notes for hotspots for the previous 2 months

In [10]:
# Define number of past month to check for recent events
months = 2

acled_recent = acled.filter(
    pl.col("admin1").is_in(hotspots_list)
    & (
        pl.col("event_date")
        >= (datetime.now() - relativedelta(months=months)).strftime("%Y-%m-%d")
    )
)

complete_hotspot_notes = pl.DataFrame({"admin1": hotspots_list})

# Get recent notes for states that have them
recent_notes = (
    acled_recent.select(["admin1", "notes"])
    .group_by("admin1")
    .agg(pl.col("notes").str.join(" ").alias("notes_list"))
)

# Find which states don't have recent events
states_with_recent = recent_notes["admin1"].to_list()
states_without_recent = [state for state in hotspots_list if state not in states_with_recent]

# Debug information
print(f"States without recent events: {states_without_recent}")

# For states without recent events, get their 10 most recent events (or any events at all)
older_events = {}
for state in states_without_recent:
    # Check if this state has ANY events in the dataset
    state_any_events = acled.filter(pl.col("admin1") == state)
    
    if state_any_events.height > 0:
        # If events exist, get the 10 most recent ones
        state_events = (
            state_any_events
            .sort("event_date", descending=True)
            .head(10)
        )
        
        notes = state_events["notes"].to_list()  # Extract full notes as a list
        notes_text = " ".join(notes)  # Join all notes with spaces
        older_events[state] = f"[NOTE: These events occurred more than 2 months ago] {notes_text}"
        print(f"Found {state_events.height} older events for {state}")
    else:
        # Look for similar names that might be variants
        all_admin1_values = acled["admin1"].unique().to_list()
        
        # Check for case-insensitive matches or region name variants
        possible_matches = []
        state_lower = state.lower()
        
        # Common spelling variations to check
        variations = {
            "al": "aj", "aj": "al",
            "el": "al", "al": "el",
            "-": " ", " ": "-"
        }
        
        for admin1 in all_admin1_values:
            admin1_lower = admin1.lower()
            # Check for substring match or similar naming variations
            if state_lower in admin1_lower or admin1_lower in state_lower:
                possible_matches.append(admin1)
            
            # Check for common spelling variations
            for original, variant in variations.items():
                if original in state_lower:
                    variant_state = state_lower.replace(original, variant)
                    if variant_state == admin1_lower:
                        possible_matches.append(admin1)
        
        if possible_matches:
            match_info = ", ".join(possible_matches)
            print(f"Found possible matches for {state}: {match_info}")
            
            # Get events from all similar regions
            similar_events = []
            for match in possible_matches:
                match_events = (
                    acled.filter(pl.col("admin1") == match)
                    .sort("event_date", descending=True)
                    .head(10)
                )
                
                if match_events.height > 0:
                    # Extract full notes text for each event
                    match_notes_list = match_events["notes"].to_list()
                    match_notes_text = " ".join(match_notes_list)
                    similar_events.append(f"[Events from similar region '{match}'] {match_notes_text}")
                    print(f"Found {match_events.height} events from similar region '{match}'")
            
            if similar_events:
                combined_notes = " ".join(similar_events)
                older_events[state] = f"NOTE: This region appears in forecasts but has no exact matches in historical data. Similar regions found: {match_info}. {combined_notes}"
            else:
                older_events[state] = f"NOTE: This region appears in forecasts but has no exact matches in historical data. Similar regions found: {match_info}, but no events were found."
        else:
            # If no events exist at all and no similar regions found
            older_events[state] = f"NOTE: This region appears in forecasts but has no historical events in the dataset. This may indicate either newly created administrative boundaries or an emerging conflict zone without previous violence."
            print(f"No events or similar regions found for {state}")
            
# Create a DataFrame for older events
older_notes = pl.DataFrame({
    "admin1": list(older_events.keys()),
    "notes_list": list(older_events.values())
})

# Combine recent and older notes
all_notes = pl.concat([recent_notes, older_notes])

# Join with complete red states list
acled_hotspots_notes = (
    complete_hotspot_notes.join(
        all_notes,
        on="admin1",
        how="left"
    )
    .with_columns(pl.col("notes_list").fill_null("No events information retrieved."))
    .sort("admin1")
)

print(acled_hotspots_notes)

States without recent events: []
shape: (5, 2)
┌──────────────────────┬─────────────────────────────────┐
│ admin1               ┆ notes_list                      │
│ ---                  ┆ ---                             │
│ str                  ┆ str                             │
╞══════════════════════╪═════════════════════════════════╡
│ Moscow Oblast        ┆ On 29 May 2025, Russian air de… │
│ Primorskiy           ┆ On 30 May 2025, according to t… │
│ Republic of Chuvash  ┆ Around 16 April 2025 (as repor… │
│ Republic of Dagestan ┆ On 16 May 2025, health resort … │
│ Rostov               ┆ Interception: Around 27 May 20… │
└──────────────────────┴─────────────────────────────────┘


### Pass the notes to the LLM for summarization

In [11]:
# LLM API configuration
client = Groq(api_key=groq_api_key)

In [23]:
hotspots_percentage_dict = json.loads(hotspots_and_percentage)

def truncate_text(text, max_chars=20000):  # ~5000 tokens, with max 6000 allowed by DeepSeek API
    if len(text) > max_chars:
        return text[:max_chars] + "... [truncated due to length]"
    return text

# Loop through each state
for row in acled_hotspots_notes.to_dicts():
    admin1 = row['admin1']
    notes_list = row['notes_list']
    notes_list_truncated = truncate_text(notes_list)
    
    if admin1 in hotspots_percentage_dict:
        state_data = hotspots_percentage_dict[admin1]
        percentage = state_data.get('percentage')
        avg1 = state_data.get('avg1')
        pred_2025_06 = state_data.get('pred_2025_06')
        pred_2025_07 = state_data.get('pred_2025_07')
        
        predicted = pred_2025_06 if pred_2025_06 is not None else pred_2025_07
        
        # Create a clean data snippet for this state only
        state_percentage_data = {
            admin1: {
                'percentage': percentage,
                'avg1': avg1,
                'pred_2025_06': pred_2025_06,
                'pred_2025_07': pred_2025_07
            }
        }
        
        try:
            completion = client.chat.completions.create(
                model="deepseek-r1-distill-llama-70b",
                messages=[
                    {"role": "user",
                     "content": f'''
CRITICAL: Your response will be checked against the source data for completeness.

TASK: Summarize the events for {admin1} strictly based ONLY on the provided notes in {notes_list_truncated}. 

CRITICAL INSTRUCTIONS:
- Do NOT add any information not contained in these notes
- Do NOT reference any events, dates, locations, or details not explicitly mentioned
- Do NOT infer or generate any external context or background information
- Stick exclusively to what is written in the provided notes
- Pay close attention to any casualties, significant infrastructure damage, equipment loss and other significant events

NUMBERS: Use ONLY the EXACT numbers from this JSON data: {json.dumps(state_percentage_data, indent=2)}
Do NOT generate, calculate, or infer any numbers beyond what is explicitly provided.

Structure your response exactly as follows:
    **{admin1}**
    Analysis of security situation.
    Patterns in events, as well as description of events, casualties, and significant impacts.
    Predicted increase in violent events: {percentage}% (from {avg1} to {predicted} events).

REMINDER: Base your entire analysis exclusively on the provided notes. Do not add external knowledge, context, or speculation.
'''},
                ],
                temperature=0.3,
                max_completion_tokens=131072,
                stream=True,
                stop=None,
            )
            
            for chunk in completion:
                print(chunk.choices[0].delta.content or "", end="")
            print("\n")
            
        except Exception as e:
            print(f"Error processing {admin1}: {e}")
        
        time.sleep(1)
    else:
        print(f"No percentage data available for {admin1}")

<think>
Okay, I need to summarize the events for Moscow Oblast based strictly on the provided notes. First, I'll go through each event listed and categorize them. It looks like there are several incidents involving Ukrainian drones being shot down, protests, environmental activism, violent attacks, and some criminal activities.

Starting from the most recent, on May 29, 2025, there was an incident where Russian air defense shot down three Ukrainian drones, causing debris to fall in Odintsovo. No casualties, but some property damage. Then on May 28, there were two attacks on military plants in Dubna, with fires breaking out. Also, on the same day, debris from shot-down drones caused house fires in Chekhov. 

Going back to May 25, there's a possible drone incident in Bronnitsy with unknown casualties and a violent attack on a football player by a far-right group in Shchyelkovo. On May 22, locals protested against development in Balashikha, and there was a drone strike on a theme park nea

# *Optional (implemented with Selenium, requires Chrome browser)

# Security Implications for UN Opertations

In [5]:
peacekeepers = pl.read_excel("/Users/newmac/Downloads/peacekeeping_2025-05-16.xlsx")
peacekeepers.head()

EVENT_ID_CNTY,EVENT_DATE,YEAR,TIME_PRECISION,DISORDER_TYPE,EVENT_TYPE,SUB_EVENT_TYPE,ACTOR1,ASSOC_ACTOR_1,INTER1,ACTOR2,ASSOC_ACTOR_2,INTER2,INTERACTION,CIVILIAN_TARGETING,ISO,REGION,COUNTRY,ADMIN1,ADMIN2,ADMIN3,LOCATION,LATITUDE,LONGITUDE,GEO_PRECISION,SOURCE,SOURCE_SCALE,NOTES,FATALITIES,TAGS,TIMESTAMP
str,date,i64,i64,str,str,str,str,str,str,str,str,str,str,str,i64,str,str,str,str,str,str,f64,f64,i64,str,str,str,i64,str,i64
"""XKX1147""",2025-04-01,2025,1,"""Strategic developments""","""Strategic developments""","""Change to group/activity""","""NATO: North Atlantic Treaty Or…","""KFOR: Kosovo Force (1999-); Mi…","""External/Other forces""","""""","""""","""""","""External/Other forces only""","""""",0,"""Europe""","""Kosovo""","""Ferizaj""","""Ferizaj""","""""","""Ferizaj""",42.371,21.155,3,"""KoSSev""","""National""","""Movement of forces: On 1 April…",0,"""""",1744148740
"""XKX1106""",2025-01-24,2025,1,"""Strategic developments""","""Strategic developments""","""Change to group/activity""","""NATO: North Atlantic Treaty Or…","""KFOR: Kosovo Force (1999-); Mi…","""External/Other forces""","""""","""""","""""","""External/Other forces only""","""""",0,"""Europe""","""Kosovo""","""Mitrovica""","""Leposaviq""","""""","""Leposaviq""",43.102,20.801,1,"""KoSSev""","""Subnational""","""Security measures: On 24 Janua…",0,"""""",1747163769
"""XKX1105""",2025-01-18,2025,1,"""Strategic developments""","""Strategic developments""","""Change to group/activity""","""NATO: North Atlantic Treaty Or…","""KFOR: Kosovo Force (1999-); Mi…","""External/Other forces""","""""","""""","""""","""External/Other forces only""","""""",0,"""Europe""","""Kosovo""","""Mitrovica""","""North Mitrovica""","""""","""North Mitrovica""",42.895,20.865,1,"""KoSSev""","""Subnational""","""Movement of forces: On 18 Janu…",0,"""""",1738102195
"""XKX1100""",2025-01-05,2025,1,"""Strategic developments""","""Strategic developments""","""Change to group/activity""","""NATO: North Atlantic Treaty Or…","""KFOR: Kosovo Force (1999-)""","""External/Other forces""","""""","""""","""""","""External/Other forces only""","""""",0,"""Europe""","""Kosovo""","""Mitrovica""","""North Mitrovica""","""""","""North Mitrovica""",42.895,20.865,1,"""KoSSev""","""Subnational""","""Movement of forces: On 5 Janua…",0,"""""",1737435899
"""XKX1075""",2024-11-07,2024,1,"""Strategic developments""","""Strategic developments""","""Change to group/activity""","""NATO: North Atlantic Treaty Or…","""KFOR: Kosovo Force (1999-)""","""External/Other forces""","""""","""""","""""","""External/Other forces only""","""""",0,"""Europe""","""Kosovo""","""Mitrovica""","""North Mitrovica""","""""","""North Mitrovica""",42.895,20.865,3,"""KoSSev""","""Subnational""","""Security measures: On 7 Novemb…",0,"""""",1731510677


In [None]:
import time

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait

In [31]:
def click_and_enter_credentials():
    driver = webdriver.Chrome()
    driver.maximize_window()  # Maximize window to avoid viewport issues
    wait = WebDriverWait(driver, 15)
    
    try:
        driver.get("https://acleddata.com/curated-data-files/#peacekeepers")
        time.sleep(5)
        
        download_btn = driver.find_element(By.ID, "download-link-21415")
        download_btn.click()
        
        time.sleep(3)
        
        # Enter API key
        api_key_input = wait.until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "input.credentials-api"))
        )
        api_key_input.clear()
        api_key_input.send_keys(acled_api_key)
        print(f"Enter API key: success")
        
        # Enter email
        email_input = wait.until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "input.credentials-email"))
        )
        email_input.clear()
        email_input.send_keys(acled_email)
        print(f"Enter email: success")
        
        # Fix: Use JavaScript click to bypass the overlapping element
        submit_button = wait.until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "button.btn-check-api-credentials"))
        )
        driver.execute_script("arguments[0].click();", submit_button)
        
        time.sleep(5)
        print("File downloaded to the default browser download folder.")
        
    except Exception as e:
        print(f"Error: {e}")
        driver.save_screenshot("credentials_error.png")
        
    finally:
        driver.close()

if __name__ == "__main__":
    click_and_enter_credentials()

Enter API key: success
Enter email: success
File downloaded to the default browser download folder.


In [24]:
factal = pl.read_csv('/Users/newmac/Downloads/factal_single_topic_report-2025-05-01-2025-06-05.csv')