In [None]:
%pip install pandas geonamescache geopy tqdm pycountry

# Find and merge together all instances of places across NYT and Zeit coverage

The notebook pre-processes the coverage for both newspapers and output a unique csv file where each row is a country. For each country, the number of total articles in the NYT and Zeit coverage is included, as long as the unique identifier for the articles. 

The pre-processing has some differences based on the news outlet. NYT data already provides information about the nature of the keywords, allowing for an initial grouping of all keywords about geolocations. Conversely, Zeit data only comes with an array of keywords, with no additional information. However, in both cases the approach is similar: once identified keywords that relate to a geolocation, I extract the country for each one of them, then iterate over the original data to find the articles related to a specific location. Ultimately, I group together all locations based on the country, creating a unique array of articles ids, removing their duplicates, and then counting the ids within the array. 

As the last step, the two datasets are merged together based on the country.

In [147]:
import pandas as pd
import ast

from tqdm import tqdm

import requests_cache
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut,GeocoderUnavailable

import geonamescache
import pycountry


# NYT

In [87]:
full_year = pd.read_csv("../../input-data/temp-data.csv")

In [88]:
full_year

Unnamed: 0.1,Unnamed: 0,abstract,web_url,snippet,lead_paragraph,print_section,print_page,source,multimedia,headline,...,news_desk,section_name,byline,type_of_material,_id,word_count,uri,subsection_name,image_url,clean_id
0,0,Wrestling with age and a case of idea theft.,https://www.nytimes.com/2024/09/01/business/he...,Wrestling with age and a case of idea theft.,"Send questions about the office, money, career...",BU,3.0,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",{'main': 'Help! I’m ‘Older’ and on the Job Hun...,...,SundayBusiness,Business Day,"{'original': 'By Anna Holmes', 'person': [{'fi...",News,nyt://article/da8532bd-f9bd-5ca3-9e7e-afef6e9f...,1280,nyt://article/da8532bd-f9bd-5ca3-9e7e-afef6e9f...,,images/2024/09/01/multimedia/01WorkFriend-fbmg...,da8532bd-f9bd-5ca3-9e7e-afef6e9f76d9
1,1,"Grueling shifts, abuse from the public and sub...",https://www.nytimes.com/2024/09/01/world/asia/...,"Grueling shifts, abuse from the public and sub...",Exhausted doctors resting in crowded on-call r...,A,4.0,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","{'main': 'Worked to the Bone, India’s Doctors ...",...,Foreign,World,{'original': 'By Anupreeta Das and Pragati K.B...,News,nyt://article/aeabc262-aeb0-5423-a7ac-8bb664cb...,1310,nyt://article/aeabc262-aeb0-5423-a7ac-8bb664cb...,Asia Pacific,images/2024/09/01/multimedia/01india-doctors-0...,aeabc262-aeb0-5423-a7ac-8bb664cb983b
2,2,"About a quarter of the residents of Maracaibo,...",https://www.nytimes.com/2024/09/01/world/ameri...,"About a quarter of the residents of Maracaibo,...",It was once a thriving metropolis in the heart...,A,1.0,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",{'main': 'What Happens When Half a Million Peo...,...,Foreign,World,{'original': 'By Frances Robles and Marian Car...,News,nyt://article/42c0d0f2-ea62-5d2b-8eba-baa04180...,1460,nyt://article/42c0d0f2-ea62-5d2b-8eba-baa04180...,Americas,images/2024/09/01/multimedia/01venezuela-migra...,42c0d0f2-ea62-5d2b-8eba-baa04180adea
3,3,Israel and Hamas agreed to pause the war to pe...,https://www.nytimes.com/2024/09/01/world/middl...,Israel and Hamas agreed to pause the war to pe...,Health workers on Sunday began a polio vaccina...,A,1.0,The New York Times,[],"{'main': 'In Race Against Polio, Gaza Begins V...",...,Foreign,World,"{'original': 'By Bilal Shbair, Erika Solomon a...",News,nyt://article/6393c6c3-0e1f-5494-925d-165e7aaf...,1473,nyt://article/6393c6c3-0e1f-5494-925d-165e7aaf...,Middle East,,6393c6c3-0e1f-5494-925d-165e7aafdefa
4,4,The prospect of a strong performance by the fa...,https://www.nytimes.com/2024/09/01/world/europ...,The prospect of a strong performance by the fa...,"On Sunday, voters in the eastern German states...",,,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",{'main': 'What to Watch For in East German Sta...,...,Foreign,World,"{'original': 'By Christopher F. Schuetze', 'pe...",News,nyt://article/fe046102-78e5-530d-89e0-59ff09c0...,812,nyt://article/fe046102-78e5-530d-89e0-59ff09c0...,Europe,images/2024/09/01/multimedia/01germany-electio...,fe046102-78e5-530d-89e0-59ff09c0e2e4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48691,48691,The first Black officer to achieve the rank of...,https://www.nytimes.com/2024/08/31/us/arthur-j...,The first Black officer to achieve the rank of...,"Arthur J. Gregg, the first African American Ar...",,,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","{'main': 'Arthur J. Gregg, Trailblazing Army O...",...,Obits,U.S.,"{'original': 'By Trip Gabriel', 'person': [{'f...",Obituary (Obit),nyt://article/15ef03c9-295b-50e0-a0f4-64f9a182...,1019,nyt://article/15ef03c9-295b-50e0-a0f4-64f9a182...,,images/2024/08/30/multimedia/30Gregg-03-pcbk/3...,15ef03c9-295b-50e0-a0f4-64f9a182675f
48692,48692,"In a reversal, two senior U.S. military offici...",https://www.nytimes.com/2024/08/31/world/europ...,"In a reversal, two senior U.S. military offici...",As hundreds of Russian missiles and drones str...,,,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","{'main': 'As Ukraine Mourns a Pilot’s Death, J...",...,Foreign,World,"{'original': 'By Marc Santora', 'person': [{'f...",News,nyt://article/3a3d339e-87ab-5650-b797-b7bb3cb0...,967,nyt://article/3a3d339e-87ab-5650-b797-b7bb3cb0...,Europe,images/2024/09/01/multimedia/01ukraine-f16-pil...,3a3d339e-87ab-5650-b797-b7bb3cb03e5b
48693,48693,Harris needs to talk about her theory of the p...,https://www.nytimes.com/2024/08/31/opinion/har...,Harris needs to talk about her theory of the p...,If the goal of the CNN interview with Vice Pre...,,,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...",{'main': 'Maybe We Are Asking Presidential Can...,...,OpEd,Opinion,"{'original': 'By Jamelle Bouie', 'person': [{'...",Op-Ed,nyt://article/1192db0c-51bd-525b-abb4-e8607c11...,870,nyt://article/1192db0c-51bd-525b-abb4-e8607c11...,,images/2024/08/31/multimedia/31bouie-newslette...,1192db0c-51bd-525b-abb4-e8607c11b2c3
48694,48694,It’s not a regular zucchini bread. It’s a cool...,https://www.nytimes.com/2024/08/31/dining/isla...,It’s not a regular zucchini bread. It’s a cool...,Island vibes zucchini bread! Have you ever hea...,,,The New York Times,"[{'rank': 0, 'subtype': 'xlarge', 'caption': N...","{'main': 'Island Vibes Zucchini Bread', 'kicke...",...,Dining,Food,"{'original': 'By Mia Leimkuhler', 'person': [{...",Letter,nyt://article/83b24708-09af-55b1-baac-7efca171...,485,nyt://article/83b24708-09af-55b1-baac-7efca171...,,images/2024/08/21/multimedia/JDA-Zucchini-Brea...,83b24708-09af-55b1-baac-7efca1711d63


In [89]:
full_year_essential = full_year[["_id", "section_name", "keywords", "pub_date"]].copy()

In [90]:
full_year_essential['keywords'] = full_year_essential['keywords'].apply(ast.literal_eval)

In [91]:
# Explode keywords in separate rows
full_year_essential = full_year_essential.explode("keywords")

In [92]:
# create separate columns for keyword type and keyword
full_year_essential["keyword_type"] = full_year_essential["keywords"].apply(lambda x: x.get("name") if isinstance(x, dict) else None)
full_year_essential["keyword"] = full_year_essential["keywords"].apply(lambda x: x.get("value") if isinstance(x, dict) else None)

In [93]:
full_year_essential

Unnamed: 0,_id,section_name,keywords,pub_date,keyword_type,keyword
0,nyt://article/da8532bd-f9bd-5ca3-9e7e-afef6e9f...,Business Day,"{'name': 'subject', 'value': 'Careers and Prof...",2024-09-01T04:01:07+0000,subject,Careers and Professions
0,nyt://article/da8532bd-f9bd-5ca3-9e7e-afef6e9f...,Business Day,"{'name': 'subject', 'value': 'Hiring and Promo...",2024-09-01T04:01:07+0000,subject,Hiring and Promotion
0,nyt://article/da8532bd-f9bd-5ca3-9e7e-afef6e9f...,Business Day,"{'name': 'subject', 'value': 'Content Type: Se...",2024-09-01T04:01:07+0000,subject,Content Type: Service
0,nyt://article/da8532bd-f9bd-5ca3-9e7e-afef6e9f...,Business Day,"{'name': 'subject', 'value': 'Workplace Enviro...",2024-09-01T04:01:07+0000,subject,Workplace Environment
0,nyt://article/da8532bd-f9bd-5ca3-9e7e-afef6e9f...,Business Day,"{'name': 'subject', 'value': 'your-feed-selfca...",2024-09-01T04:01:07+0000,subject,your-feed-selfcare
...,...,...,...,...,...,...
48695,nyt://article/0bca89dd-a1a7-5022-8192-9812f680...,New York,"{'name': 'subject', 'value': 'United States Op...",2024-08-31T15:42:26+0000,subject,United States Open (Tennis)
48695,nyt://article/0bca89dd-a1a7-5022-8192-9812f680...,New York,"{'name': 'subject', 'value': 'Content Type: Pe...",2024-08-31T15:42:26+0000,subject,Content Type: Personal Profile
48695,nyt://article/0bca89dd-a1a7-5022-8192-9812f680...,New York,"{'name': 'subject', 'value': 'Tennis', 'rank':...",2024-08-31T15:42:26+0000,subject,Tennis
48695,nyt://article/0bca89dd-a1a7-5022-8192-9812f680...,New York,"{'name': 'glocations', 'value': 'Netherlands',...",2024-08-31T15:42:26+0000,glocations,Netherlands


In [None]:
# Isolate the keywords associated with geolocations
locations = full_year_essential[full_year_essential["keyword_type"] == "glocations"]

In [None]:
# Create a unique list of keywords
locations = locations["keyword"].unique()
locations

In [None]:
# New df with only one column, the keywords
places_df = pd.DataFrame(locations, columns=["location"])
places_df

In [None]:
# A lot of these locations are structured into Place (Country). Here, the part in brackets 
# is moved to a new column to already have a rough indication of the location country. 
places_df['country'] = places_df['location'].apply(lambda x: x[x.find("(")+1:x.find(")")] if "(" in x and ")" in x else x)

## Geolocate individual locations

In [None]:
geolocator = Nominatim(user_agent="geo_locator")

In [None]:
# Function to get country
def get_country(location):
    try:
        geo = geolocator.geocode(location, exactly_one=True, language='en', addressdetails=False)
        if geo:
            return geo.address.split(",")[-1].strip()
        else:
            return "Not found"
    except GeocoderTimedOut:
        return "Timeout"
    except GeocoderUnavailable:
        return "Unavailable"

In [None]:
places_df['retrieved_country'] = places_df.apply(lambda x: get_country(x["country"]), axis=1)

In [None]:
places_df.to_csv("../../input-data/nyt_retrived_countries.csv")

## Clean edge cases

We need to start with loading again the dataset, so if we just need to further polish the data we do not need to run the get_country function again.

In [80]:
retrieved_countries = pd.read_csv("../../input-data/nyt_retrived_countries.csv")

In [81]:
retrieved_countries = retrieved_countries.drop(labels="Unnamed: 0", axis=1)

Masking all US states with ambigous 2 letter code (not real ISO code, will default to "United States" as country).

In [82]:
us_states_mask = (retrieved_countries['country'].str.len() == 2) | (retrieved_countries['country'].str.len() == 3) | (retrieved_countries['country'] == "Los Angeles, Calif") | (retrieved_countries['country'] == "Miami, Fla")

In [83]:
retrieved_countries.loc[us_states_mask, "retrieved_country"] = "United States"

In [84]:
retrieved_countries

Unnamed: 0,location,country,retrieved_country
0,India,India,India
1,Maracaibo (Venezuela),Venezuela,Venezuela
2,Venezuela,Venezuela,Venezuela
3,Israel,Israel,Israel
4,Germany,Germany,Germany
...,...,...,...
3169,Yanji (China),China,China
3170,"West Side Highway (Manhattan, NY)","Manhattan, NY",United States
3171,County Clare (Ireland),Ireland,Ireland
3172,County Limerick (Ireland),Ireland,Ireland


Masking all rows where geopy was not successful at finding a match.

In [85]:
undefined_mask = (retrieved_countries['retrieved_country'] == "Not found") | (retrieved_countries['retrieved_country'] == "Unavailable")

Some of them do not have a match because the US state name is not recognized

In [86]:
list_of_states = ['ALABAMA', 'ALASKA', 'ARIZONA', 'ARKANSAS', 'CALIFORNIA', 'COLORADO', 'CONNECTICUT', 'DELAWARE', 'FLORIDA', 'GEORGIA', 'HAWAII', 'IDAHO', 'ILLINOIS', 'INDIANA', 'IOWA', 'KANSAS', 'KENTUCKY', 'LOUISIANA', 'MAINE', 'MARYLAND', 'MASSACHUSETTS', 'MICHIGAN', 'MINNESOTA', 'MISSISSIPPI', 'MISSOURI', 'MONTANA', 'NEBRASKA', 'NEVADA', 'NEW HAMPSHIRE', 'NEW JERSEY', 'NEW MEXICO', 'NEW YORK', 'NORTH CAROLINA', 'NORTH DAKOTA', 'OHIO', 'OKLAHOMA', 'OREGON', 'PENNSYLVANIA', 'RHODE ISLAND', 'SOUTH CAROLINA', 'SOUTH DAKOTA', 'TENNESSEE', 'TEXAS', 'UTAH', 'VERMONT', 'VIRGINIA', 'WASHINGTON', 'WEST VIRGINIA', 'WISCONSIN', 'WYOMING']

In [94]:
undefined_countries = retrieved_countries.loc[undefined_mask]

In [95]:
undefined_countries

Unnamed: 0,location,country,retrieved_country
28,Minnesota,Minnesota,Unavailable
80,"Far East, South and Southeast Asia and Pacific...","Far East, South and Southeast Asia and Pacific...",Not found
131,Kansas,Kansas,Unavailable
145,Arizona,Arizona,Unavailable
172,London (England),England,Unavailable
203,Tiananmen Square (Beijing),Beijing,Unavailable
247,Maryland,Maryland,Unavailable
299,Oregon,Oregon,Unavailable
459,USSR (Former Soviet Union),Former Soviet Union,Not found
670,Cyclades Islands,Cyclades Islands,Not found


In [96]:
undefined_us_states = [country for country in undefined_countries["country"] if country.upper() in list_of_states]

In [97]:
undefined_us_states

['Minnesota',
 'Kansas',
 'Arizona',
 'Maryland',
 'Oregon',
 'Vermont',
 'Alaska',
 'Ohio']

In [98]:
mask_undefined_us = retrieved_countries["country"].isin(undefined_us_states)
retrieved_countries.loc[mask_undefined_us, "retrieved_country"] = "United States"

In [99]:
undefined_mask = (retrieved_countries['retrieved_country'] == "Not found") | (retrieved_countries['retrieved_country'] == "Unavailable")
undefined_countries = retrieved_countries.loc[undefined_mask]
undefined_countries

Unnamed: 0,location,country,retrieved_country
80,"Far East, South and Southeast Asia and Pacific...","Far East, South and Southeast Asia and Pacific...",Not found
172,London (England),England,Unavailable
203,Tiananmen Square (Beijing),Beijing,Unavailable
459,USSR (Former Soviet Union),Former Soviet Union,Not found
670,Cyclades Islands,Cyclades Islands,Not found
772,Panama Canal and Canal Zone,Panama Canal and Canal Zone,Not found
823,Sahara Desert,Sahara Desert,Unavailable
873,Darien Gap,Darien Gap,Not found
886,ANTARCTIC REGIONS,ANTARCTIC REGIONS,Not found
1079,"Valladolid (Yucatan Penninsula, Mexico)","Yucatan Penninsula, Mexico",Not found


In [100]:
missing_countries = ["Armenia", "St Vincent", "Lebanon", "Mali"]

In [101]:
mask_undefined_countries = retrieved_countries["country"].isin(missing_countries)
retrieved_countries.loc[mask_undefined_countries, "retrieved_country"] = retrieved_countries.loc[mask_undefined_countries, "country"] 

In [102]:
siberia_mask = retrieved_countries["country"] == "Siberia"
retrieved_countries.loc[siberia_mask, "retrieved_country"] = "Russia"
retrieved_countries

Unnamed: 0,location,country,retrieved_country
0,India,India,India
1,Maracaibo (Venezuela),Venezuela,Venezuela
2,Venezuela,Venezuela,Venezuela
3,Israel,Israel,Israel
4,Germany,Germany,Germany
...,...,...,...
3169,Yanji (China),China,China
3170,"West Side Highway (Manhattan, NY)","Manhattan, NY",United States
3171,County Clare (Ireland),Ireland,Ireland
3172,County Limerick (Ireland),Ireland,Ireland


In [103]:
west_bank_mask = retrieved_countries["country"] == "West Bank"
retrieved_countries.loc[west_bank_mask, "retrieved_country"] = "Palestinian Territory"

In [104]:
clean_countries = retrieved_countries.drop(retrieved_countries.loc[undefined_mask].index)

In [105]:
clean_countries

Unnamed: 0,location,country,retrieved_country
0,India,India,India
1,Maracaibo (Venezuela),Venezuela,Venezuela
2,Venezuela,Venezuela,Venezuela
3,Israel,Israel,Israel
4,Germany,Germany,Germany
...,...,...,...
3169,Yanji (China),China,China
3170,"West Side Highway (Manhattan, NY)","Manhattan, NY",United States
3171,County Clare (Ireland),Ireland,Ireland
3172,County Limerick (Ireland),Ireland,Ireland


## Retrieve article ids from original dataset

In [106]:
data = []
for index, place in clean_countries.iterrows():
    place_mask = full_year_essential["keyword"] == place["location"]
    place_coverage = full_year_essential.loc[place_mask, "_id"]
    data.append((place["location"], place["retrieved_country"], len(place_coverage.values), place_coverage.to_list()))

In [107]:
places_and_ids_df = pd.DataFrame(data, columns=["place_keyword", "country", "count_of_articles", "ids_of_articles"])

In [108]:
places_and_ids_df

Unnamed: 0,place_keyword,country,count_of_articles,ids_of_articles
0,India,India,234,[nyt://article/aeabc262-aeb0-5423-a7ac-8bb664c...
1,Maracaibo (Venezuela),Venezuela,1,[nyt://article/42c0d0f2-ea62-5d2b-8eba-baa0418...
2,Venezuela,Venezuela,103,[nyt://article/42c0d0f2-ea62-5d2b-8eba-baa0418...
3,Israel,Israel,2143,[nyt://article/6393c6c3-0e1f-5494-925d-165e7aa...
4,Germany,Germany,241,[nyt://article/fe046102-78e5-530d-89e0-59ff09c...
...,...,...,...,...
3139,Yanji (China),China,1,[nyt://article/ab3117c5-2380-5493-b5a2-9a275ff...
3140,"West Side Highway (Manhattan, NY)",United States,1,[nyt://article/bcae809a-57ae-5b4a-a574-168659e...
3141,County Clare (Ireland),Ireland,1,[nyt://article/441056ed-eb8a-5ad0-a0c1-a2ef99d...
3142,County Limerick (Ireland),Ireland,1,[nyt://article/441056ed-eb8a-5ad0-a0c1-a2ef99d...


In [109]:
places_and_ids_df

Unnamed: 0,place_keyword,country,count_of_articles,ids_of_articles
0,India,India,234,[nyt://article/aeabc262-aeb0-5423-a7ac-8bb664c...
1,Maracaibo (Venezuela),Venezuela,1,[nyt://article/42c0d0f2-ea62-5d2b-8eba-baa0418...
2,Venezuela,Venezuela,103,[nyt://article/42c0d0f2-ea62-5d2b-8eba-baa0418...
3,Israel,Israel,2143,[nyt://article/6393c6c3-0e1f-5494-925d-165e7aa...
4,Germany,Germany,241,[nyt://article/fe046102-78e5-530d-89e0-59ff09c...
...,...,...,...,...
3139,Yanji (China),China,1,[nyt://article/ab3117c5-2380-5493-b5a2-9a275ff...
3140,"West Side Highway (Manhattan, NY)",United States,1,[nyt://article/bcae809a-57ae-5b4a-a574-168659e...
3141,County Clare (Ireland),Ireland,1,[nyt://article/441056ed-eb8a-5ad0-a0c1-a2ef99d...
3142,County Limerick (Ireland),Ireland,1,[nyt://article/441056ed-eb8a-5ad0-a0c1-a2ef99d...


In [145]:
places_and_ids_df.to_csv("../../input-data/places/nyt-locations.csv", index=False)

## Group by country and chain ids of articles together

In [110]:
countries_and_unique_ids = places_and_ids_df.copy()

In [111]:
from itertools import chain
general_countries = (countries_and_unique_ids.groupby('country', as_index=False)['ids_of_articles']
         .agg(lambda x: list(chain.from_iterable(x)))
       )

## Remove duplicates from id list and count number of articles for each country

In [112]:
general_countries["ids_of_articles"] = general_countries["ids_of_articles"].apply(lambda x: list(set(x)))

In [113]:
general_countries["count_of_articles"] = general_countries["ids_of_articles"].apply(lambda x: len(x))

In [114]:
general_countries

Unnamed: 0,country,ids_of_articles,count_of_articles
0,Adriatic Sea,[nyt://article/6c5919fe-452a-532d-8234-f698a33...,1
1,Aegean Sea,[nyt://interactive/6addaf57-b7fa-5d0f-af24-ec0...,2
2,Afghanistan,[nyt://article/bf24d7e3-26df-577e-8327-5ad1567...,89
3,Africa,[nyt://article/e4d46fbb-0e78-5c0f-8bbe-9caa617...,199
4,Albania,[nyt://article/c81db80b-5820-56a0-b426-1466f04...,20
...,...,...,...
223,Vietnam,[nyt://article/182fbcdd-cf29-534a-900d-c2d66f6...,51
224,Yellow Sea,[nyt://article/593fcdd3-0e07-58b1-9a5d-eec8cd8...,1
225,Yemen,[nyt://article/df39f071-31c5-5992-aaef-19f15a6...,90
226,Zambia,[nyt://article/bc01f508-b5f4-5c61-a671-45a7159...,11


# Zeit

In [6]:
zeit_full_year = pd.read_csv("../../input-data/zeit-temp-data.csv")

In [7]:
zeit_full_year

Unnamed: 0.1,Unnamed: 0,uri,lang,isDuplicate,date,time,dateTime,dateTimePub,dataType,sim,...,body,source,authors,image,eventUri,sentiment,wgt,relevance,keywords,links
0,0,2024-12-590516688,deu,False,2024-12-31,23:48:05,2024-12-31 23:48:05+00:00,2024-12-31T23:35:24Z,news,0.000000,...,Kurz vor dem Jahreswechsel ist in Hamburg ein ...,"{'uri': 'zeit.de', 'dataType': 'news', 'title'...",[],https://img.zeit.de/news/2025-01/01/feuerwehr-...,,,473384885,1,"['News', 'Silvesternacht', 'Billstedt', 'Hambu...","[{'anchorText': 'Feuerwehr', 'url': 'https://w..."
1,1,8481124955,deu,False,2024-12-31,23:26:31,2024-12-31 23:26:31+00:00,2024-12-31T23:25:31Z,news,0.968627,...,"Das Drama um scheinbar harmlose, jedoch am End...","{'uri': 'zeit.de', 'dataType': 'news', 'title'...",[],https://img.zeit.de/news/2025-01/01/zweite-squ...,eng-10211662,,473383591,1,"['News', 'Netflix-Welthit', 'Netflix', 'Squid'...","[{'anchorText': 'Netflix', 'url': 'https://www..."
2,2,8481124174,deu,False,2024-12-31,23:25:05,2024-12-31 23:25:05+00:00,2024-12-31T23:24:01Z,news,0.000000,...,Vor der Silvesternacht in Berlin hat die Poliz...,"{'uri': 'zeit.de', 'dataType': 'news', 'title'...",[],https://img.zeit.de/gesellschaft/zeitgeschehen...,,,473383505,1,"['Gesellschaft', 'Jahreswechsel', 'Silvester',...","[{'anchorText': 'Polizei', 'url': 'https://www..."
3,3,8481124175,deu,False,2024-12-31,23:24:34,2024-12-31 23:24:34+00:00,2024-12-31T23:24:01Z,news,0.000000,...,Ein Einfamilienhaus ist in der Silvesternacht ...,"{'uri': 'zeit.de', 'dataType': 'news', 'title'...",[],https://img.zeit.de/news/2025-01/01/haus-in-hu...,,,473383474,1,"['News', 'Großeinsatz der Feuerwehr', 'Hude', ...","[{'anchorText': 'Feuerwehr', 'url': 'https://w..."
4,4,8481116703,deu,False,2024-12-31,23:14:35,2024-12-31 23:14:35+00:00,2024-12-31T23:12:42Z,news,0.945098,...,Polen hat zum Jahreswechsel den alle sechs Mon...,"{'uri': 'zeit.de', 'dataType': 'news', 'title'...",[],https://img.zeit.de/news/2025-01/01/polen-uebe...,eng-10209752,,473382875,1,"['News', 'Nach sechs Monaten Ungarn', 'Donald ...","[{'anchorText': 'Polen', 'url': 'https://www.z..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64840,64840,8105574979,deu,False,2024-05-01,02:23:35,2024-05-01 02:23:35+00:00,2024-05-01T02:22:46Z,news,0.800000,...,Die Zusammenfassung für diesen Artikel kann le...,"{'uri': 'zeit.de', 'dataType': 'news', 'title'...",[],https://img.zeit.de/news/2024-05/01/jahrestag-...,deu-1781817,,452226215,1,"['News', 'EU', 'Annalena Baerbock', 'Radosław ...","[{'anchorText': 'Annalena Baerbock', 'url': 'h..."
64841,64841,8105574980,deu,False,2024-05-01,02:23:18,2024-05-01 02:23:18+00:00,2024-05-01T02:22:46Z,news,0.588235,...,Die Audioversion dieses Artikels wurde künstli...,"{'uri': 'zeit.de', 'dataType': 'news', 'title'...",[],https://img.zeit.de/politik/ausland/2024-05/mi...,deu-1781792,,452226198,1,"['Politik', 'US-Repräsentantenhaus', 'US-Reprä...",
64842,64842,8105568694,deu,False,2024-05-01,02:15:24,2024-05-01 02:15:24+00:00,2024-05-01T02:14:45Z,news,0.968627,...,Die Zusammenfassung für diesen Artikel kann le...,"{'uri': 'zeit.de', 'dataType': 'news', 'title'...",[],https://img.zeit.de/news/2024-05/01/sek-einsat...,deu-1781828,,452225724,1,"['News', 'Extremismus', 'Velbert', 'Düsseldorf...","[{'anchorText': 'Polizei', 'url': 'https://www..."
64843,64843,8105478890,deu,False,2024-05-01,00:13:52,2024-05-01 00:13:52+00:00,2024-05-01T00:13:17Z,news,0.686275,...,Die Audioversion dieses Artikels wurde künstli...,"{'uri': 'zeit.de', 'dataType': 'news', 'title'...",[],https://img.zeit.de/politik/ausland/2024-05/ge...,deu-1780720,,452218432,1,"['Politik', 'Südkaukasus', 'Georgien', 'Tbilis...","[{'anchorText': 'Polizei', 'url': 'https://www..."


## Create list with all unique keywords

In [46]:
zeit_full_year["keywords"] = zeit_full_year["keywords"].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

In [None]:
zeit_full_year = zeit_full_year[zeit_full_year['keywords'].notna()]



In [None]:
all_keywords= list(zeit_full_year["keywords"])

In [None]:
all_keywords = [
    x
    for xs in all_keywords
    for x in xs
]

In [None]:
all_keywords

## Load custom dataset of countries

The dataset for Germany is a custom list with countries' names in German. The English names for countries and names for cities are pulled from geonamescache. We do not need specific names for cities because the articles tend to include both the German and English name of famous cities (e.g. Rom, Rome) or they always include the country within the list (e.g. Rom, Italien).

In [None]:
# Dataset: https://www.drupal.org/node/1136336
de_countries = pd.read_csv('../../input-data/countries_de.csv', sep='|')

In [None]:
de_countries

In [None]:
countries_de = de_countries["Country"].values

In [None]:
# Get a list of country and city names from GeoNamesCache
gc = geonamescache.GeonamesCache()
countries_en = {v['name'] for v in gc.get_countries().values()}
cities = {v['name'] for v in gc.get_cities().values()}
countries_de = de_countries["Country"].values

# Find keywords that match countries (ENG and DE) or cities
place_keywords = [word for word in all_keywords if word in countries_en or word in cities or word in countries_de]

print(place_keywords)

In [None]:
locations_in_coverage = list(set(place_keywords))

In [None]:
df_locations_in_coverage = pd.DataFrame(locations_in_coverage, columns=["location"])

In [None]:
df_locations_in_coverage

In [None]:
df_locations_in_coverage["country"] = df_locations_in_coverage.apply(lambda x: get_country(x["location"]), axis=1)

In [None]:
df_locations_in_coverage = df_locations_in_coverage.sort_values(by="country")

In [None]:
df_locations_in_coverage.to_csv("../../input-data/zeit-retrieved-countries.csv", index=False)

## Refine list of retrieved countries

The list of names requires a bit of manual refinement, hence I load a new file for adding the count. Unfortunately, it is difficult to come up with a way to automate the task, because certain keywords are ambigouous (e.g. Gardena, both a city in the US and a company in Germany) and the data coming from the API does not provide clear indication about the nature of keywords. Since the list of names is not super long, it is still possible to consider the special cases one by one. If the dataset will grow, it could be worth to consider an entirely different approach to place extraction. 

In [64]:
zeit_countries_refined = pd.read_csv("../../input-data/zeit-retrieved-countries-refined.csv")

In [65]:
zeit_countries_refined

Unnamed: 0,location,country
0,Kabul,Afghanistan
1,Afghanistan,Afghanistan
2,Albania,Albania
3,Albanien,Albania
4,Tirana,Albania
...,...,...
1756,Sanaa,Yemen
1757,Mbala,Zambia
1758,Sambia,Zambia
1759,Zimbabwe,Zimbabwe


## Match locations with article ID

In [66]:
zeit_categories = zeit_full_year.explode("keywords")

In [67]:
zeit_categories

Unnamed: 0.1,Unnamed: 0,uri,lang,isDuplicate,date,time,dateTime,dateTimePub,dataType,sim,...,body,source,authors,image,eventUri,sentiment,wgt,relevance,keywords,links
0,0,2024-12-590516688,deu,False,2024-12-31,23:48:05,2024-12-31 23:48:05+00:00,2024-12-31T23:35:24Z,news,0.000000,...,Kurz vor dem Jahreswechsel ist in Hamburg ein ...,"{'uri': 'zeit.de', 'dataType': 'news', 'title'...",[],https://img.zeit.de/news/2025-01/01/feuerwehr-...,,,473384885,1,News,"[{'anchorText': 'Feuerwehr', 'url': 'https://w..."
0,0,2024-12-590516688,deu,False,2024-12-31,23:48:05,2024-12-31 23:48:05+00:00,2024-12-31T23:35:24Z,news,0.000000,...,Kurz vor dem Jahreswechsel ist in Hamburg ein ...,"{'uri': 'zeit.de', 'dataType': 'news', 'title'...",[],https://img.zeit.de/news/2025-01/01/feuerwehr-...,,,473384885,1,Silvesternacht,"[{'anchorText': 'Feuerwehr', 'url': 'https://w..."
0,0,2024-12-590516688,deu,False,2024-12-31,23:48:05,2024-12-31 23:48:05+00:00,2024-12-31T23:35:24Z,news,0.000000,...,Kurz vor dem Jahreswechsel ist in Hamburg ein ...,"{'uri': 'zeit.de', 'dataType': 'news', 'title'...",[],https://img.zeit.de/news/2025-01/01/feuerwehr-...,,,473384885,1,Billstedt,"[{'anchorText': 'Feuerwehr', 'url': 'https://w..."
0,0,2024-12-590516688,deu,False,2024-12-31,23:48:05,2024-12-31 23:48:05+00:00,2024-12-31T23:35:24Z,news,0.000000,...,Kurz vor dem Jahreswechsel ist in Hamburg ein ...,"{'uri': 'zeit.de', 'dataType': 'news', 'title'...",[],https://img.zeit.de/news/2025-01/01/feuerwehr-...,,,473384885,1,Hamburg,"[{'anchorText': 'Feuerwehr', 'url': 'https://w..."
0,0,2024-12-590516688,deu,False,2024-12-31,23:48:05,2024-12-31 23:48:05+00:00,2024-12-31T23:35:24Z,news,0.000000,...,Kurz vor dem Jahreswechsel ist in Hamburg ein ...,"{'uri': 'zeit.de', 'dataType': 'news', 'title'...",[],https://img.zeit.de/news/2025-01/01/feuerwehr-...,,,473384885,1,Feuerwehr,"[{'anchorText': 'Feuerwehr', 'url': 'https://w..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64844,64844,8105471646,deu,False,2024-05-01,00:05:00,2024-05-01 00:05:00+00:00,2024-05-01T00:04:36Z,news,0.560784,...,Dies ist ein experimentelles Tool. Die Resulta...,"{'uri': 'zeit.de', 'dataType': 'news', 'title'...",[],https://img.zeit.de/politik/deutschland/2024-0...,deu-1781816,,452217900,1,AfD,[{'anchorText': 'Diakonie-Präsident Rüdiger Sc...
64844,64844,8105471646,deu,False,2024-05-01,00:05:00,2024-05-01 00:05:00+00:00,2024-05-01T00:04:36Z,news,0.560784,...,Dies ist ein experimentelles Tool. Die Resulta...,"{'uri': 'zeit.de', 'dataType': 'news', 'title'...",[],https://img.zeit.de/politik/deutschland/2024-0...,deu-1781816,,452217900,1,Gewerkschaft,[{'anchorText': 'Diakonie-Präsident Rüdiger Sc...
64844,64844,8105471646,deu,False,2024-05-01,00:05:00,2024-05-01 00:05:00+00:00,2024-05-01T00:04:36Z,news,0.560784,...,Dies ist ein experimentelles Tool. Die Resulta...,"{'uri': 'zeit.de', 'dataType': 'news', 'title'...",[],https://img.zeit.de/politik/deutschland/2024-0...,deu-1781816,,452217900,1,Frank Werneke,[{'anchorText': 'Diakonie-Präsident Rüdiger Sc...
64844,64844,8105471646,deu,False,2024-05-01,00:05:00,2024-05-01 00:05:00+00:00,2024-05-01T00:04:36Z,news,0.560784,...,Dies ist ein experimentelles Tool. Die Resulta...,"{'uri': 'zeit.de', 'dataType': 'news', 'title'...",[],https://img.zeit.de/politik/deutschland/2024-0...,deu-1781816,,452217900,1,RedaktionsNetzwerk Deutschland,[{'anchorText': 'Diakonie-Präsident Rüdiger Sc...


In [68]:
zeit_categories = zeit_categories[["uri", "keywords", "title", "date"]]

In [69]:
zeit_categories

Unnamed: 0,uri,keywords,title,date
0,2024-12-590516688,News,Silvesternacht: Feuerwehr löscht Brand an Schu...,2024-12-31
0,2024-12-590516688,Silvesternacht,Silvesternacht: Feuerwehr löscht Brand an Schu...,2024-12-31
0,2024-12-590516688,Billstedt,Silvesternacht: Feuerwehr löscht Brand an Schu...,2024-12-31
0,2024-12-590516688,Hamburg,Silvesternacht: Feuerwehr löscht Brand an Schu...,2024-12-31
0,2024-12-590516688,Feuerwehr,Silvesternacht: Feuerwehr löscht Brand an Schu...,2024-12-31
...,...,...,...,...
64844,8105471646,AfD,Gewerkschaft: Verdi-Chef sieht keinen Platz fü...,2024-05-01
64844,8105471646,Gewerkschaft,Gewerkschaft: Verdi-Chef sieht keinen Platz fü...,2024-05-01
64844,8105471646,Frank Werneke,Gewerkschaft: Verdi-Chef sieht keinen Platz fü...,2024-05-01
64844,8105471646,RedaktionsNetzwerk Deutschland,Gewerkschaft: Verdi-Chef sieht keinen Platz fü...,2024-05-01


In [70]:
zeit_categories.rename(columns={"keywords": "keyword", "uri": "_id"}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  zeit_categories.rename(columns={"keywords": "keyword", "uri": "_id"}, inplace=True)


In [71]:
zeit_categories

Unnamed: 0,_id,keyword,title,date
0,2024-12-590516688,News,Silvesternacht: Feuerwehr löscht Brand an Schu...,2024-12-31
0,2024-12-590516688,Silvesternacht,Silvesternacht: Feuerwehr löscht Brand an Schu...,2024-12-31
0,2024-12-590516688,Billstedt,Silvesternacht: Feuerwehr löscht Brand an Schu...,2024-12-31
0,2024-12-590516688,Hamburg,Silvesternacht: Feuerwehr löscht Brand an Schu...,2024-12-31
0,2024-12-590516688,Feuerwehr,Silvesternacht: Feuerwehr löscht Brand an Schu...,2024-12-31
...,...,...,...,...
64844,8105471646,AfD,Gewerkschaft: Verdi-Chef sieht keinen Platz fü...,2024-05-01
64844,8105471646,Gewerkschaft,Gewerkschaft: Verdi-Chef sieht keinen Platz fü...,2024-05-01
64844,8105471646,Frank Werneke,Gewerkschaft: Verdi-Chef sieht keinen Platz fü...,2024-05-01
64844,8105471646,RedaktionsNetzwerk Deutschland,Gewerkschaft: Verdi-Chef sieht keinen Platz fü...,2024-05-01


In [72]:
# TO DO: Exception to include keywords related to Ukraine for Ukraine, like "Krieg in der Ukraine" and "Kiew"
data = []
for index, place in zeit_countries_refined.iterrows():
    if place["location"] == "Ukraine":
        place_mask = (zeit_categories["keyword"] == place["location"]) | (zeit_categories["keyword"] == "Krieg in der Ukraine") | (zeit_categories["keyword"] == "Kiew")
        place_coverage = zeit_categories.loc[place_mask, "_id"]


    else: 
        place_mask = zeit_categories["keyword"] == place["location"]
        place_coverage = zeit_categories.loc[place_mask, "_id"]
    
    data.append((place["location"], place["country"], len(place_coverage.values), place_coverage.to_list()))
    

In [73]:
zeit_places_and_ids_df = pd.DataFrame(data, columns=["place_keyword", "country", "count_of_articles", "ids_of_articles"])

In [74]:
zeit_places_and_ids_df

Unnamed: 0,place_keyword,country,count_of_articles,ids_of_articles
0,Kabul,Afghanistan,24,"[8433653719, 8431459289, 8428897934, 836035179..."
1,Afghanistan,Afghanistan,194,"[8478156165, 8478156165, 8472648132, 847109322..."
2,Albania,Albania,1,[8005315340]
3,Albanien,Albania,63,"[8468377355, 8458717654, 8434938101, 842522250..."
4,Tirana,Albania,9,"[8458717654, 8433021264, 8384144335, 835401059..."
...,...,...,...,...
1756,Sanaa,Yemen,4,"[8464718290, 8464361707, 8367515653, 8272954064]"
1757,Mbala,Zambia,1,[8454598635]
1758,Sambia,Zambia,4,"[8362705221, 8015102614, 7927847804, 8253025150]"
1759,Zimbabwe,Zimbabwe,1,[7957531196]


In [144]:
zeit_places_and_ids_df.to_csv("../../input-data/places/zeit-locations.csv", index=False)

## Group by country and chain ids of articles together

In [75]:
zeit_countries_and_unique_ids = zeit_places_and_ids_df.copy()

In [76]:
from itertools import chain
zeit_general_countries = (zeit_countries_and_unique_ids.groupby('country', as_index=False)['ids_of_articles']
         .agg(lambda x: list(chain.from_iterable(x)))
       )

In [77]:
zeit_general_countries["ids_of_articles"] = zeit_general_countries["ids_of_articles"].apply(lambda x: list(set(x)))

In [78]:
zeit_general_countries["count_of_articles"] = zeit_general_countries["ids_of_articles"].apply(lambda x: len(x))

In [143]:
zeit_general_countries

Unnamed: 0,country,ids_of_articles,count_of_articles
0,Afghanistan,"[8298581376, 8184489802, 8478156165, 815011498...",168
1,Albania,"[8374221914, 8406497605, 8406583117, 836573329...",61
2,Algeria,"[8257833741, 8223378425, 8150923868, 795356460...",20
3,Angola,"[8103094764, 7927847804, 8097005428, 825215668...",8
4,Anguilla,[8067913106],1
...,...,...,...
183,Venezuela,"[8262040716, 8288657154, 8043229045, 798819651...",78
184,Vietnam,"[8316584811, 8107267090, 8336739402, 833323361...",35
185,Yemen,"[7937609389, 8367515653, 8316863505, 817681693...",99
186,Zambia,"[8362705221, 7927847804, 8253025150, 845459863...",5


# Join data 

In [151]:
joined_locations_coverages = general_countries.merge(zeit_general_countries, how='outer', on="country", suffixes=("_nyt", "_zeit"))

## Clean NaN values

In [152]:
joined_locations_coverages["count_of_articles_zeit"] = joined_locations_coverages["count_of_articles_zeit"].fillna(0)

In [153]:
joined_locations_coverages["count_of_articles_nyt"]  = joined_locations_coverages["count_of_articles_nyt"].fillna(0)


In [154]:
joined_locations_coverages['ids_of_articles_nyt'] = joined_locations_coverages['ids_of_articles_nyt'].apply(lambda d: d if isinstance(d, list) else [])

In [155]:
joined_locations_coverages['ids_of_articles_zeit'] = joined_locations_coverages['ids_of_articles_zeit'].apply(lambda d: d if isinstance(d, list) else [])

## Get coordinates

In [156]:
geolocator = Nominatim(user_agent="geo_lookup")
session = requests_cache.CachedSession("geopy_cache", expire_after=86400)  # Cache for 1 day
tqdm.pandas()

In [157]:
def get_coordinates(location_name):
    try:
        location = geolocator.geocode(location_name, timeout=10)
        if location:
            return location.latitude, location.longitude
    except Exception as e:
        print(f"Error for {location_name}: {e}")
    return None, None

In [158]:
joined_locations_coverages[["Latitude", "Longitude"]] = joined_locations_coverages.progress_apply(
    lambda row: get_coordinates(row["country"]), axis=1, result_type="expand"
)

100%|██████████| 242/242 [04:01<00:00,  1.00it/s]


In [229]:
joined_locations_coverages.loc[joined_locations_coverages["country"] == "Turkey", "country"] = "Türkiye"

In [240]:
joined_locations_coverages.loc[joined_locations_coverages["country"] == "Democratic Republic of the Congo", "country"] = "Congo, The Democratic Republic of the"

In [248]:
joined_locations_coverages.loc[joined_locations_coverages["country"] == "Palestinian Territory", "country"] = "Palestine, State of"

In [249]:
def get_country_code(location_name):
    try:
        country = pycountry.countries.get(name=location_name)
        if country:
            return country.alpha_3
        else:
            try:
                list_of_countries = pycountry.countries.search_fuzzy(location_name)
                if list_of_countries:
                    country = list_of_countries[0]
                    return country.alpha_3
                else:
                    return "No code"
            except Exception as e:
                return "No code"
    except Exception as e:
        print(f"No {location_name} found")

In [247]:
pycountry.countries.search_fuzzy("Palestine")

[Country(alpha_2='PS', alpha_3='PSE', flag='🇵🇸', name='Palestine, State of', numeric='275', official_name='the State of Palestine')]

In [250]:
joined_locations_coverages["iso_alpha3"] = joined_locations_coverages.progress_apply(
    lambda row: get_country_code(row["country"]), axis=1, result_type="expand"
)

100%|██████████| 242/242 [00:00<00:00, 638.64it/s]


In [251]:
joined_locations_coverages

Unnamed: 0,country,ids_of_articles_nyt,count_of_articles_nyt,ids_of_articles_zeit,count_of_articles_zeit,Latitude,Longitude,iso_alpha3
0,Adriatic Sea,[nyt://article/6c5919fe-452a-532d-8234-f698a33...,1.0,[],0.0,43.702151,14.667946,No code
1,Aegean Sea,[nyt://interactive/6addaf57-b7fa-5d0f-af24-ec0...,2.0,[],0.0,38.062228,25.720589,No code
2,Afghanistan,[nyt://article/bf24d7e3-26df-577e-8327-5ad1567...,89.0,"[8298581376, 8184489802, 8478156165, 815011498...",168.0,33.768006,66.238514,AFG
3,Africa,[nyt://article/e4d46fbb-0e78-5c0f-8bbe-9caa617...,199.0,[],0.0,11.502434,17.757812,ZAF
4,Albania,[nyt://article/c81db80b-5820-56a0-b426-1466f04...,20.0,"[8374221914, 8406497605, 8406583117, 836573329...",61.0,5.758765,-73.915162,ALB
...,...,...,...,...,...,...,...,...
237,Vietnam,[nyt://article/182fbcdd-cf29-534a-900d-c2d66f6...,51.0,"[8316584811, 8107267090, 8336739402, 833323361...",35.0,15.926666,107.965086,VNM
238,Yellow Sea,[nyt://article/593fcdd3-0e07-58b1-9a5d-eec8cd8...,1.0,[],0.0,36.000000,124.000000,No code
239,Yemen,[nyt://article/df39f071-31c5-5992-aaef-19f15a6...,90.0,"[7937609389, 8367515653, 8316863505, 817681693...",99.0,16.347124,47.891527,YEM
240,Zambia,[nyt://article/bc01f508-b5f4-5c61-a671-45a7159...,11.0,"[8362705221, 7927847804, 8253025150, 845459863...",5.0,-14.518912,27.558988,ZMB


In [252]:
joined_locations_coverages_reordered = joined_locations_coverages[['country', "iso_alpha3","Latitude", "Longitude", "count_of_articles_nyt", "count_of_articles_zeit", "ids_of_articles_nyt", "ids_of_articles_zeit"]]

In [253]:
joined_locations_coverages_reordered

Unnamed: 0,country,iso_alpha3,Latitude,Longitude,count_of_articles_nyt,count_of_articles_zeit,ids_of_articles_nyt,ids_of_articles_zeit
0,Adriatic Sea,No code,43.702151,14.667946,1.0,0.0,[nyt://article/6c5919fe-452a-532d-8234-f698a33...,[]
1,Aegean Sea,No code,38.062228,25.720589,2.0,0.0,[nyt://interactive/6addaf57-b7fa-5d0f-af24-ec0...,[]
2,Afghanistan,AFG,33.768006,66.238514,89.0,168.0,[nyt://article/bf24d7e3-26df-577e-8327-5ad1567...,"[8298581376, 8184489802, 8478156165, 815011498..."
3,Africa,ZAF,11.502434,17.757812,199.0,0.0,[nyt://article/e4d46fbb-0e78-5c0f-8bbe-9caa617...,[]
4,Albania,ALB,5.758765,-73.915162,20.0,61.0,[nyt://article/c81db80b-5820-56a0-b426-1466f04...,"[8374221914, 8406497605, 8406583117, 836573329..."
...,...,...,...,...,...,...,...,...
237,Vietnam,VNM,15.926666,107.965086,51.0,35.0,[nyt://article/182fbcdd-cf29-534a-900d-c2d66f6...,"[8316584811, 8107267090, 8336739402, 833323361..."
238,Yellow Sea,No code,36.000000,124.000000,1.0,0.0,[nyt://article/593fcdd3-0e07-58b1-9a5d-eec8cd8...,[]
239,Yemen,YEM,16.347124,47.891527,90.0,99.0,[nyt://article/df39f071-31c5-5992-aaef-19f15a6...,"[7937609389, 8367515653, 8316863505, 817681693..."
240,Zambia,ZMB,-14.518912,27.558988,11.0,5.0,[nyt://article/bc01f508-b5f4-5c61-a671-45a7159...,"[8362705221, 7927847804, 8253025150, 845459863..."


## Export

In [254]:
joined_locations_coverages_reordered.to_csv("../../data/places/coverage_by_country.csv", index=False)