In [10]:
import stanza

stanza.download('en')

nlp = stanza.Pipeline(lang='en', processors='tokenize,ner', use_gpu=True)

def extract_locations(text):
    if not isinstance(text, str):
        return None
    
    doc = nlp(text)
    locations = [ent.text for ent in doc.ents if ent.type == "GPE"]  # Extract locations
    return locations if locations else None

df['place_name'] = df['selftext'].apply(extract_locations)

print(df[['selftext', 'place_name']].head())

Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.10.0.json: 424kB [00:00, 11.3MB/s]
2025-03-24 09:54:12 INFO: Downloaded file to /home/sakshi/stanza_resources/resources.json
2025-03-24 09:54:12 INFO: Downloading default packages for language: en (English) ...
2025-03-24 09:54:12 INFO: File exists: /home/sakshi/stanza_resources/en/default.zip
2025-03-24 09:54:16 INFO: Finished downloading models and saved to /home/sakshi/stanza_resources
2025-03-24 09:54:16 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES
Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.10.0.json: 424kB [00:00, 11.6MB/s]
2025-03-24 09:54:17 INFO: Downloaded file to /home/sakshi/stanza_resources/resources.json
2025-03-24 09:54:17 INFO: Loading these models for language: en (English):
| Processo

                                              selftext  \
28   fifty-something father of two, one in college ...   
112  i am sure im sick, ive been in the hospital an...   
129  i’m a 25 year old woman. i currently sleep on ...   
202  i am 16 years old and living in india, and i'm...   
248  info before reading this: i am 18, diagnosed w...   

                    place_name  
28                   [florida]  
112                   [canada]  
129  [florida, south carolina]  
202             [india, india]  
248                  [germany]  


In [1]:
import pandas as pd
import folium
from folium.plugins import HeatMap
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut

df = pd.read_csv("ner_extracted_locations.csv")

geolocator = Nominatim(user_agent="crisis_heatmap")

def get_lat_lon(place):
    try:
        location = geolocator.geocode(place, timeout=10)
        if location:
            return (location.latitude, location.longitude)
    except GeocoderTimedOut:
        return None
    return None

df = df.dropna(subset=['place_name'])

df['coordinates'] = df['place_name'].apply(get_lat_lon)

df = df.dropna(subset=['coordinates'])

df[['latitude', 'longitude']] = pd.DataFrame(df['coordinates'].tolist(), index=df.index)

m = folium.Map(location=[20, 0], zoom_start=2)  

heat_data = df[['latitude', 'longitude']].values.tolist()
HeatMap(heat_data).add_to(m)

m.save("crisis_heatmap.html")
print("Heatmap saved as crisis_heatmap.html")

Heatmap saved as crisis_heatmap.html


In [9]:
unique_places = df['place_name'].dropna().unique()
print(unique_places)

["['florida']" "['canada']" "['florida', 'south carolina']"
 "['india', 'india']" "['germany']" "['japan']" "['india']"
 "['somalia', 'somalia']" "['london']" "['california']"
 "['ohio', 'indiana']" "['america']" "['ukraine']"
 "['australia', 'canada']" "['ontario', 'canada']" "['vietnam']"
 "['texas']" "['illinois', 'arkansas']" "['australia']" "['u.s.']"
 "['venezuela']" "['augusta']" "['philadelphia']" "['morgantown']"
 "['texas', 'colorado']" "['brooklyn', 'california']" "['nyc']"
 "['tennessee']" "['sweden']" "['tijuana']" "['ontario canada']"
 "['braunschweig']" "['netherlands']" "['afghanistan']" "['sarasota']"
 "['ukraine', 'ukraine']" "['texas', 'georgia']" "['ohio']" "['russia']"
 "['colorado']" "['mexico']" "['mumbai']" "['switzerland']" "['kerala']"
 "['pakistan']" "['iraq', 'canada']" "['bolivia']" "['indiana']"
 "['malaysia', 'malaysia', 'malaysia', 'malaysia']"
 "['georgia', 'georgia']" "['bangalore']" "['uzbekistan']" "['georgia']"]
