#Extracting armed conflict locations from the notes column of ACLED dataset

In [None]:
import pandas as pd #Used for data manipulation and analysis, particularly for working with dataframes.
from openai import OpenAI #To access GPT4o model used to extract loction entity from text
import googlemaps #To get latitude and longitude for a given location
from dotenv import load_dotenv #to load environment variables(API keys) from a .env file.
import os #Provides ways to interact with the operating system, including accessing environment variables

load_dotenv()#load environment variables from a .env file

In [None]:
#Loading the ACLED Dataset for Nairobi
df = pd.read_csv('ACLED-Nairobi.csv')

In [None]:
#Dropping all column except notes
df=df.drop(columns=['event_date', 'year', 'time_precision',
       'disorder_type', 'event_type', 'sub_event_type', 'actor1',
       'assoc_actor_1', 'inter1', 'interaction', 'civilian_targeting', 'iso',
       'region', 'country', 'admin1', 'admin2', 'admin3', 'location',
       'latitude', 'longitude', 'geo_precision', 'source', 'source_scale','fatalities', 'tags', 'timestamp', 'population_1km',
       'population_2km', 'population_5km', 'population_best'])

##Using GPT-4o for extracting locations from Notes

In [None]:
# Initialize the OpenAI client with the API key retrieved from environment variables.
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))

In [None]:
# Function to extract location using GPT-4
def extract_location_gpt(text):
    response =  client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are a assistant who knows about locations in Nairobi, Kenya. You help extract the most accurate location in nairobi where the conflict occurred from a given text that describes the conflict. The location needs to be the closest possible to the conflict so that we can capture the latitute and longitute of it, the location can be a building name, road name, neighbourhood name, locality name or any other closest approximation. Sometimes people from a location might create conflict in another location, always extract the location of the conflict.  You output only the name of the location, if location canot be found you output NA"},
            {"role": "user", "content": f"Extract the locality name of nairobi where the conflict occurred from the following text that describes the conflict:\n\n{text}\n\nLocation:"}
        ],
        max_tokens=200,
        temperature=0,
        n=1,
        stop=["\n"]
    )
    # Extract and return the location from GPT-4's response
    location = response.choices[0].message.content.strip()
    return location


In [None]:
#Apply extract_location_gpt function to each row in 'notes' column and store the result in 'Location_extracted' column
df['Location_extracted'] = df['notes'].apply(extract_location_gpt) #Extract location entity from each row in 'notes' column

##Using Google Maps API to get latitude and longitude for the extracted locations

In [None]:
# Initialize Google Maps client with API key
gmaps = googlemaps.Client(key=os.getenv('GOOGLE_MAPS_API_KEY'))

In [None]:
# Function to get latitude and longitude
def get_lat_long(location):
    geocode_result = gmaps.geocode(location)
    if geocode_result:
        lat = geocode_result[0]['geometry']['location']['lat']
        lng = geocode_result[0]['geometry']['location']['lng']
        return lat, lng
    else:
        return None, None

In [None]:
# Apply the function to the extracted loaction column in the DataFrame
df[['Latitude', 'Longitude']] = df['Location_extracted'].apply(lambda loc: pd.Series(get_lat_long(loc)))

In [None]:
#Write the file with extracted location and its latitude and longitude for downstream analysis
df.to_csv('ACLED_Extracted.csv', index=False)