In [1]:
from collections import Counter
from tqdm import tqdm
import pandas as pd
import spacy
import os
import glob

In [2]:
# Load spaCy language model
nlp = spacy.load('en_core_web_sm')

# Read the DataFrame
df = pd.read_csv('final.csv')

# Function to extract the location words from text
def extract_locations(text):
    doc = nlp(text)
    locations = [ent.text for ent in doc.ents if ent.label_ == 'GPE']
    return locations

# Function to generate the DOI link
def generate_doi_link(doi):
    return f'https://doi.org/{doi}'

# Add column for DOI link
df['DOI_Link'] = df['DOI'].apply(generate_doi_link)

In [3]:
# Create a list to store all location words
all_locations = []

# Iterate through each row and extract location words
for index, row in tqdm(df.iterrows(), total=len(df)):
    abstract = row['Abstract']
    if isinstance(abstract, str):
        locations = extract_locations(abstract)
    else:
        locations = []
    all_locations.append(locations)


100%|██████████| 640/640 [00:26<00:00, 24.08it/s]


In [4]:
# Create a Counter for each list inside 'all_locations'
counters_list = [Counter(locations) for locations in all_locations]

# Get the 5 most frequent words for each 'Abstract'
most_common_locations_lists = [counter.most_common(5) for counter in counters_list]

# Add columns for location words with appropriate labels
for i, locations_list in enumerate(most_common_locations_lists, start=1):
    for j, (location, count) in enumerate(locations_list, start=1):
        col_name = f'location{j}'
        if j <= 5:
            df.at[i - 1, col_name] = location

In [5]:
# Save the modified DataFrame
df.to_csv('final_with_sorted_locations_nested.csv', index=False)

In [6]:
df

Unnamed: 0,Title,Authors,Abstract,Published Year,Published Month,Journal,Volume,Issue,Pages,Accession Number,...,Covidence #,Study,Notes,Tags,DOI_Link,location1,location2,location3,location4,location5
0,Rapid deforestation of a coastal landscape dri...,"Ury, Emily A; Yang, Xi; Wright, Justin P; Bern...",Climate change is driving ecological shifts in...,2021,,Ecol. Appl.,,,e2339,,...,#1,Ury 2021,,,https://doi.org/10.1002/eap.2339,North Carolina's,,,,
1,Estuaries of the Northeastern United States: H...,"Roman, Charles T; Jaworski, Norbert; Short, Fr...","Geographic signatures are physical, chemical, ...",2000,,,23,,,,...,#6,Roman 2000,,,https://doi.org/nan,U.S.,Mexico,,,
2,Sea Level Rise and the Dynamics of the Marsh-U...,"Fagherazzi, Sergio; Anisfeld, Shimon C; Blum, ...","During sea level rise, salt marshes transgress...",2019,,Front. Environ. Sci. Eng. China,7,,25,,...,#8,Fagherazzi 2019,,,https://doi.org/10.3389/fenvs.2019.00025,,,,,
3,Salt Marsh Mosquito-Control Ditches: Sedimenta...,"Corman, Sarah S; Roman, Charles T; King, John ...",Mosquito ditches are a prolific physical featu...,2012,,J. Coast. Res.,28,4,874,,...,#11,Corman 2012,,,https://doi.org/10.2112/jcoastres-d-11-00012.1,Mosquito,NewYork,,,
4,The rising tide: assessing the risks of climat...,"McGranahan, Gordon; Balk, Deborah; Anderson, B...",,2007,,Environment and Urbanization,19,1,17-37,,...,#15,McGranahan 2007,,,https://doi.org/10.1177/0956247807076960,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
635,Natural and anthropogenic effects on microplas...,"Quesadas-Rojas, M; Enriquez, C; Valle-Levinson, A",Information on the transport and distribution ...,2021,,SCIENCE OF THE TOTAL ENVIRONMENT,776,,,WOS:000647703500001,...,#20759,Quesadas-Rojas 2021,,,https://doi.org/10.1016/j.scitotenv.2021.145803,,,,,
636,Mississippi River delta: an overview,"Coleman, JM; Roberts, HH; Stone, GW","Over the last century, the river-dominated Mis...",1998,,JOURNAL OF COASTAL RESEARCH,14,3,698-716,WOS:000075421100003,...,#20782,Coleman 1998,,,https://doi.org/nan,Mississippi,Louisiana,Holocene,the United States,
637,Consensus based planning: Developing a concept...,"Appelbaum, SJ",The Central and Southern Florida (C&SF) Projec...,1998,,COORDINATION: WATER RESOURCES AND ENVIRONMENT,,,149-150,WOS:000079047700022,...,#20809,Appelbaum 1998,,,https://doi.org/nan,Florida,C&SF,,,
638,"On the linkages among density, flow, and bathy...","Valle-Levinson, A; Boicourt, WC; Roman, MR","Linkages among density, flow, and bathymetry g...",2003,,ESTUARIES,26,6,1437-1449,WOS:000188745200005,...,#20864,Valle-Levinson 2003,,,https://doi.org/10.1007/BF02803652,,,,,
