In [1]:
"""
# Extract and plot latitude/longitude coordinates of locations extracted from text

## Data
Book title: *Up the Amazon and Madeira rivers, through Bolivia and Peru*
Author: *Edward D. Mathews*
Publication date: *1879*

## Terms
- `region of interest` (`roi`): The areas of Peru, Bolivia and Brazil (exlcuding other references that appear in the text to locations e.g. London, China, India)
- `text_chronological`: The chronological appearence of locations in the text
- `journey_chronological`: The chronologcical appearance of locations during the exploration
- `sentence_block`: <= 5 sentence block centred on the possible location reference in the text

## Pipeline
1. Extract and basic clean text
2. Perform NLP on text
3. Identify possible references to locations in the NLP'ed text
4. Fuzzy match possible locations and correct
4. Create dataset of chronologically (in terms of the text) ordered locations with their `sentence_blocks`.
5. Do some fancy opneai api call thingy on the `sentence_blocks`, to see get structured data from the blocks:
    - Is this a real location, or did the NLP make a mistake?
    - Is this location in the region of interest `roi`?
    - Is this location `journey_chronological`?
6. Plot the real locations in the `roi` on a satellite/cartoon map of Brazil using folium
"""

"\n# Extract and plot latitude/longitude coordinates of locations extracted from text\n\n## Data\nBook title: *Up the Amazon and Madeira rivers, through Bolivia and Peru*\nAuthor: *Edward D. Mathews*\nPublication date: *1879*\n\n## Terms\n- `region of interest` (`roi`): The areas of Peru, Bolivia and Brazil (exlcuding other references that appear in the text to locations e.g. London, China, India)\n- `text_chronological`: The chronological appearence of locations in the text\n- `journey_chronological`: The chronologcical appearance of locations during the exploration\n- `sentence_block`: <= 5 sentence block centred on the possible location reference in the text\n\n## Pipeline\n1. Extract and basic clean text\n2. Perform NLP on text\n3. Identify possible references to locations in the NLP'ed text\n4. Fuzzy match possible locations and correct\n4. Create dataset of chronologically (in terms of the text) ordered locations with their `sentence_blocks`.\n5. Do some fancy opneai api call thi

In [1]:
import os
import numpy as np
import pandas as pd
import re
import json
import random
import time

from collections import Counter

import spacy
import nltk
from nltk.tokenize import sent_tokenize

from rapidfuzz import fuzz

import folium
import matplotlib.pyplot as plt

from geopy.geocoders import Nominatim
from geopy.exc import GeocoderUnavailable, GeocoderTimedOut

import geopandas as gpd

from openai import OpenAI

In [None]:
# Download punkt tokenizer if not already done
nltk.download('punkt', quiet=True)

# Load spaCy model
nlp = spacy.load("en_core_web_trf")

#####
# FILE PATH SETUP (adjust as needed)
TEXT_FILENAME = "Up the Amazon and Madeira Rivers through Bolivia and Peru copy.txt"
COORDS_FILENAME = "coords.json"

CWD = os.getcwd()

PATH_TO_RAW_DATA_DIR = os.path.join(CWD, "data", "raw")
PATH_TO_PROCESSED_DATA_DIR = os.path.join(CWD, "data", "processed")

PATH_TO_DATA = os.path.join(PATH_TO_RAW_DATA_DIR, TEXT_FILENAME)
PATH_TO_COORDS_JSON = os.path.join(PATH_TO_PROCESSED_DATA_DIR, COORDS_FILENAME)

# -- Read the input text
with open(PATH_TO_DATA, 'r') as file:
    text = file.read().replace('\n', ' ')

#####
# Build a list of words and their character offsets
matches = list(re.finditer(r'\S+', text))
words = [m.group(0) for m in matches]
word_starts = [m.start() for m in matches]
word_ends = [m.end() for m in matches]

#####
# Helper function: Find word index for a char position
def find_word_idx(char_idx, starts):
    for i, s in enumerate(starts):
        if char_idx < s:
            return max(i-1, 0)
    return len(starts)-1

#####
# Run spaCy NER
doc = nlp(text)

# Extract (location_text, start_char, end_char) for locations
location_spans = [
    (ent.text, ent.start_char, ent.end_char)
    for ent in doc.ents
    if ent.label_ in ("GPE", "LOC")
]


In [73]:
sentences = list(doc.sents)
for i in range(10):
    print(f"sentence index {i}: {sentences[i]}\n")

sentence index 0: ##### FRONTMATTER REDACTED #####  PREFACE.

sentence index 1:  

sentence index 2: A few years ago I was Resident Engineer of the projected Madeira and Mamoré Railway, to be constructed in the Province of Matto Grosso, in the Empire of Brazil, and as nearly as possible in the centre of the Continent of South America.

sentence index 3: From various causes the prosecution of the enterprise fell into abeyance for some considerable time.

sentence index 4: When the works were temporarily stopped, several reasons combined to induce me to return home by way of Bolivia and Peru.

sentence index 5: During that journey I kept up my ordinary custom of keeping a rough diary, and I have since dressed up my notes into something of a consecutive form.

sentence index 6: The resumption of the railway works has led me to think that some interest would attach to a description of a route across South America that has yet been but little travelled over.

sentence index 7: It has also o

In [74]:
sentence_char_starts = [sent.start_char for sent in sentences]
sentence_char_ends = [sent.end_char for sent in sentences]

In [77]:
for loc_text, start_char, end_char in location_spans:
    # Find which sentence contains the entity
    containing_sent_idx = None
    for i, (s, e) in enumerate(zip(sentence_char_starts, sentence_char_ends)):
        if start_char >= s and end_char <= e:
            containing_sent_idx = i
            break
    if containing_sent_idx is None:
        continue  # skip if not found (shouldn't really happen)

window = 2  # or whatever context you want
s0 = max(containing_sent_idx - window, 0)
s1 = min(containing_sent_idx + window + 1, len(sentences))
block_sentences = sentences[s0:s1]

block_text = text[sentence_char_starts[s0]:sentence_char_ends[s1-1]]

# To highlight, replace in that substring:
# (this is safer than entity_words variant)
block_text = block_text.replace(loc_text, f"<<{loc_text}>>", 1)

blocks.append({
    'location': loc_text,
    'sentence_block': block_text,
    'sentence_indices': (s0, s1-1),
    'location_start_char': start_char,
    'location_end_char': end_char
})

In [None]:

#####
# Build blocks (word windowed, guaranteed to contain full location)
window = 30 * 2  # words before and after

blocks = []
for loc_text, start_char, end_char in location_spans:
    start_word_idx = find_word_idx(start_char, word_starts)
    end_word_idx = find_word_idx(end_char-1, word_starts)
    w0 = max(start_word_idx - window, 0)
    w1 = min(end_word_idx + window + 1, len(words))  # +1 so slice includes last word

    block_words = words[w0:w1]
    # For possible highlighting, join the entity words
    entity_words = words[start_word_idx:end_word_idx+1]
    entity_str = ' '.join(entity_words)

    # Optional: mark the location in the block
    sentence_block = ' '.join(block_words)
    # Try to only mark first occurrence so as not to mess with repeated mentions in block
    sentence_block = sentence_block.replace(entity_str, f"<<{entity_str}>>", 1)

    blocks.append({
        'location': loc_text,
        'sentence_block': sentence_block,
        'start_word_index': start_word_idx,
        'end_word_index': end_word_idx,
        'location_start_char': start_char,
        'location_end_char': end_char
    })

df_blocks = pd.DataFrame(blocks)

In [3]:
def highlight_places_in_text(text, ents, start_char=0, end_char=1000):
    """
    text: full string
    ents: list of spacy entities (doc.ents)
    start_char, end_char: char positions of the slice to show
    """
    # Only consider entities in the given text slice
    display_ents = [e for e in ents if e.end_char > start_char and e.start_char < end_char and e.label_ in ('GPE', 'LOC')]
    # Sort entities so later ones don't disrupt char positions of earlier ones
    display_ents = sorted(display_ents, key=lambda e: e.start_char)
    new_text = text[start_char:end_char]
    offset = 0  # because we will insert stuff, index will shift right
    for ent in display_ents:
        rel_start = ent.start_char - start_char + offset
        rel_end = ent.end_char - start_char + offset
        # Only mark if it's within our slice
        if 0 <= rel_start < len(new_text) and 0 < rel_end <= len(new_text):
            new_text = (
                new_text[:rel_start] +
                f"<<{new_text[rel_start:rel_end]}>>" +
                new_text[rel_end:]
            )
            offset += 4  # len("<<>>")=4 extra chars per entity found
    print(new_text)

# Example use: print the first 1000 characters, highlighting all found places
highlight_places_in_text(text, doc.ents, start_char=0, end_char=5000)

##### FRONTMATTER REDACTED #####  PREFACE.  A few years ago I was Resident Engineer of the projected Madeira and Mamoré Railway, to be constructed in the Province of <<Matto Grosso>>, in the Empire of <<Brazil>>, and as nearly as possible in the centre of <<the Continent of South America>>. From various causes the prosecution of the enterprise fell into abeyance for some considerable time. When the works were temporarily stopped, several reasons combined to induce me to return home by way of <<Bolivia>> and <<Peru>>. During that journey I kept up my ordinary custom of keeping a rough diary, and I have since dressed up my notes into something of a consecutive form. The resumption of the railway works has led me to think that some interest would attach to a description of a route across <<South America>> that has yet been but little travelled over. It has also occurred to me, that nowadays, when the Eastern trip to <<India>>, <<China>>, <<Japan>>, and home _viâ_ <<San Francisco>> and <<N

In [4]:
SIMILARITY_THRESHOLD = 90  # tune as needed

# 1. Gather unique location names
all_locations = [block['location'] for block in blocks]

clusters = []
canonical_names = []
for loc in all_locations:
    matched = False
    for i, canon in enumerate(canonical_names):
        score = fuzz.token_sort_ratio(loc, canon)
        if score >= SIMILARITY_THRESHOLD:
            clusters[i].append(loc)
            matched = True
            break
    if not matched:
        canonical_names.append(loc)
        clusters.append([loc])

# 2. For each cluster, pick a canonical name (most common)
for i, c in enumerate(clusters):
    most_common = Counter(c).most_common(1)[0][0]
    canonical_names[i] = most_common

# --- STEP 3: Build your mapping and annotate blocks afterwards
location_to_canon = {}
for canon, members in zip(canonical_names, clusters):
    for m in members:
        location_to_canon[m] = canon

# Assign canonical_location to each block
for block in blocks:
    block['canonical_location'] = location_to_canon[block['location']]

In [5]:
locations = [block["canonical_location"] for block in blocks]
print(len(blocks))
print(len(locations))
print(len(set(locations)))

2417
2417
506


In [6]:
# Example output: Print a few blocks
for i in range(len(df_blocks)):  # adjust as you wish
    row = df_blocks.iloc[i]
    if row["location"] == "Cachimayo":
        print('-'*60)
        print("Location:", row["location"])
        print("Block:")
        print(row["sentence_block"])
        print(" ")
# To save: df_blocks.to_csv('location_blocks.csv')

------------------------------------------------------------
Location: Cachimayo
Block:
the posta which marks the end of the day’s work in good temper, inclined to do good justice to whatever is forthcoming for the nightly repast, after which the thoroughly earned rest is heartily enjoyed both by masters, men, and animals. CHAPTER XXVII. Leave Sucre under escort of English residents—Village of Nutshucc—Yotala—“Quintas,” or farmhouses—Don Tomas Frias’ country-house—Señor Pacheco’s quinta—The river <<Cachimayo>> and its basket bridge—Fever at Nutshucc—The river Pilcomayo—Terrado and Pampa-tambo—Quebrada Honda—Bartolo—Potosí—The Soroche—The Fonda Coca—The Cerro de Potosí—Silver mines—Reservoirs—Church of La Matriz—The mint—New coinage—Burying money—Future of mining in Potosí—Departure from Potosí—Tarapaya—Yocalla—Yocalla bridge built by the devil in a night—Leñas—Loss of my aneroid barometer—Vicuñas—Condors—Lagunillas—Tolapalca—Bolivian Norfolk-Howards, or Vinchutas—Vilcapujio—Curious cus

In [12]:
def get_location_coords(location_set, verbose=False):
    # Filter out None or empty/whitespace locations early
    location_set = set(filter(lambda x: x and x.strip(), location_set))

    geolocator = Nominatim(user_agent="generic_name")
    location_coord_dict = {}
    n_locations = len(location_set)

    t0 = time.perf_counter()
    for count, location in enumerate(location_set, start=1):
        t1 = time.perf_counter()
        print(f"Getting coords for '{location}'; {count}/{n_locations}")
        try:
            loc = geolocator.geocode(location)
            if loc is not None:
                coords = (loc.latitude, loc.longitude)
                print(f"  {location}: {coords}")
                location_coord_dict[location] = coords
            else:
                print(f"  Location not found: {location}")
        except GeocoderUnavailable as e:
            if verbose:
                print(f"  Geocoder is unavailable: {e}")
            else:
                print("  Geocoder is unavailable")
        except GeocoderTimedOut as e:
            if verbose:
                print(f"  Geocoder timed out: {e}")
            else:
                print("  Geocoder timed out")
        except AttributeError as e:
            if verbose:
                print(f"  Attribute error (possibly bad result) for '{location}': {e}")
            else:
                print("  Attribute error (possibly bad result) for '{location}'")
                
        t_elapsed = time.perf_counter() - t1
        total_elapsed = time.perf_counter() - t0
        print(f"  Time for this location: {t_elapsed:.2f} s\n")
        print(f"  Runtime so far: {total_elapsed:.2f} s\n")

    t_total = time.perf_counter() - t0
    print(f"Total time for all locations: {t_total:.2f} s")
    return location_coord_dict

coords_dict = get_location_coords(set(locations))

Getting coords for 'Mamoré'; 1/458
  Mamoré: (-13.4129427, -65.2152448)
  Time for this location: 0.19 s

  Runtime so far: 0.19 s

Getting coords for 'Magdalena'; 2/458
  Geocoder is unavailable
  Time for this location: 3.14 s

  Runtime so far: 3.33 s

Getting coords for 'COCHABAMBA'; 3/458
  COCHABAMBA: (-17.4012458, -66.1675681)
  Time for this location: 0.97 s

  Runtime so far: 4.29 s

Getting coords for 'Venezuela'; 4/458
  Venezuela: (8.0018709, -66.1109318)
  Time for this location: 1.93 s

  Runtime so far: 6.22 s

Getting coords for 'Matocari'; 5/458
  Location not found: Matocari
  Time for this location: 1.96 s

  Runtime so far: 8.18 s

Getting coords for 'the Rio Grande'; 6/458
  the Rio Grande: (42.4891061, -2.4409022)
  Time for this location: 3.07 s

  Runtime so far: 11.26 s

Getting coords for 'Australia'; 7/458
  Australia: (-24.7761086, 134.755)
  Time for this location: 2.05 s

  Runtime so far: 13.30 s

Getting coords for 'Madeira'; 8/458
  Madeira: (32.7517501

In [7]:
with open("data/processed/coords.json", "r") as f:
    coords_dict = json.load(f)

In [8]:
# Load Amazonia boundary
amazon_gdf = gpd.read_file('data/raw/Amazonia-sensu-stricto.gpkg')
amazon_geom = amazon_gdf.unary_union  # Combine all shapes into one MultiPolygon/Polygon

  amazon_geom = amazon_gdf.unary_union  # Combine all shapes into one MultiPolygon/Polygon


In [10]:
from shapely.geometry import Point

filtered_coords = {}
for place, (lat, lon) in coords_dict.items():
    point = Point(lon, lat)  # Note (lon, lat) order!
    if amazon_geom.contains(point):
        filtered_coords[place] = (lat, lon)

In [11]:
# row = df_blocks[df_blocks["location"]=="shaly"]
# sentence = row["sentence_block"]
# print(sentence)

In [13]:
with open(PATH_TO_COORDS_JSON, "w") as f:
    json.dump(coords_dict, f, indent=4)

# with open("data/processed/coords_v01.json", "r") as file:
#     coords_dict = json.load(file)

In [12]:
coords_dict

{'Mamoré': [-13.4129427, -65.2152448],
 'COCHABAMBA': [-17.4012458, -66.1675681],
 'Venezuela': [8.0018709, -66.1109318],
 'the Rio Grande': [42.4891061, -2.4409022],
 'Australia': [-24.7761086, 134.755],
 'Madeira': [32.7517501, -16.9817487],
 'Desaguadero': [-16.564809, -69.0401545],
 'Lima': [-12.0621065, -77.0365256],
 'Panamá': [8.559559, -81.1308434],
 'Andean': [42.9414286, -7.9979985],
 'Sepulturas': [-17.9769226, -67.0134118],
 'Mollendo': [-17.0292533, -72.0155447],
 'Columbus': [39.9622601, -83.0007065],
 'Girão': [32.7351665, -17.1952761],
 'Minas Maio': [-20.316193, -40.4387467],
 'China': [35.0000663, 104.999955],
 'Gurupá': [-1.406325, -51.6427456],
 'Praia': [14.9162811, -23.5095095],
 'United States': [39.7837304, -100.445882],
 'Madeira Rivers': [-6.8064866, -62.4115491],
 'St. James': [39.573173, -77.7493708],
 'CARIPUNAS': [-9.2149237, -64.6514359],
 'Yotala': [-19.1616017, -65.2647648],
 'Santa Cruz de la Sierra': [-17.7834217, -63.1820853],
 'Caracas': [10.5060934

In [13]:
df_blocks.columns

Index(['location', 'sentence_block', 'start_word_index', 'end_word_index',
       'location_start_char', 'location_end_char'],
      dtype='object')

In [14]:
# Each location can have multiple blocks, so grab the first one or join with separator
location_to_sentence = (
    df_blocks
    .groupby("location")["sentence_block"]
    .apply(lambda x: "<br><br>".join(x))
    .to_dict()
)

In [15]:
# Now, create a Folium map centered somewhere (e.g., the average of your lat/lon)
if filtered_coords:
    lats = [coords[0] for coords in filtered_coords.values()]
    lons = [coords[1] for coords in filtered_coords.values()]
    avg_lat = sum(lats) / len(lats)
    avg_lon = sum(lons) / len(lons)
    m = folium.Map(location=[avg_lat, avg_lon], zoom_start=2)
else:
    m = folium.Map(location=[0, 0], zoom_start=2)  # fallback

# Add markers
for name, coords in filtered_coords.items():
    popup_text = location_to_sentence.get(name, name)  # fallback to name

    folium.Marker(
        location=coords,
        popup=folium.Popup(popup_text, max_width=350),  # max_width avoids very wide popups
        icon=folium.Icon(color='blue', icon='info-sign')
    ).add_to(m)

m

In [37]:
# Now, create a Folium map centered somewhere (e.g., the average of your lat/lon)
if filtered_coords:
    lats = [coords[0] for coords in filtered_coords.values()]
    lons = [coords[1] for coords in filtered_coords.values()]
    avg_lat = sum(lats) / len(lats)
    avg_lon = sum(lons) / len(lons)
    m = folium.Map(location=[avg_lat, avg_lon], zoom_start=2)
else:
    m = folium.Map(location=[0, 0], zoom_start=2)  # fallback
    
# Number of words in the whole book
total_words = len(words)

def first_N_words(text, N=6):
    tokens = text.split()
    preview = " ".join(tokens[:N])
    return preview + ("..." if len(tokens) > N else "")

# Group all sentence blocks per location, keeping their start_word_index for sorting %age
blocks_grouped = df_blocks.groupby("location").apply(
    lambda df: df.sort_values('start_word_index')[['sentence_block', 'start_word_index']].to_dict('records')
)
# blocks_grouped is now a Series: location -> list of dicts with sentence_block, start_word_index

# Now make what you want for the popup!
location_to_sentence = {}
for loc, entries in blocks_grouped.items():
    popup_parts = []
    for i, entry in enumerate(entries):
        pct_through = 100 * entry['start_word_index'] / total_words
        pct_str = f"<b>{pct_through:.1f}% through book</b>: "
        if i == 0:
            popup_parts.append(f'{pct_str}<span>{entry["sentence_block"]}</span>')
        else:
            short_line = first_N_words(entry["sentence_block"], N=7)
            popup_parts.append(f'{pct_str}<i>{short_line}</i>')
    # Separate blocks by lines
    popup_text = "<hr>".join(popup_parts)
    location_to_sentence[loc] = popup_text

# Use with folium as before:
for name, coords in filtered_coords.items():
    folium.Marker(
        location=coords,
        popup=folium.Popup(location_to_sentence.get(name, name), max_width=350),
        icon=folium.Icon(color='blue', icon='info-sign')
    ).add_to(m)

  blocks_grouped = df_blocks.groupby("location").apply(


In [39]:
m

In [16]:
m.save('map_of_locations.html')

## Get metadata for each text snippet from openai's api

In [32]:
with open(".secrets.json", "r") as f:
    OPENAI_API_KEY = (json.load(f))["OPENAI_API_KEY"]
    
client = OpenAI()
client.api_key=OPENAI_API_KEY

response = client.responses.create(
    model="gpt-4.1",
    input="Write a one-sentence bedtime story about a unicorn."
)

print(response.output_text)

Beneath a shimmering moon, a gentle unicorn named Luna tiptoed through a field of glowing flowers, leaving trails of stardust for dreamers to follow.


In [40]:
entry

location                                                    Matto Grosso
sentence_block         ##### FRONTMATTER REDACTED ##### PREFACE. A fe...
start_word_index                                                      27
end_word_index                                                        28
location_start_char                                                  166
location_end_char                                                    178
Name: 0, dtype: object

In [None]:
entry = df_blocks.iloc[100]
pct = 100 * entry['start_word_index'] / total_words

message = f"""
This text is a snippet from the 1879 book 'Up the Amazon and Madeira Rivers through Bolivia and Peru' by 'Edward D. Mathews'. The snippet is centred on the location {entry.location}, which occurs {pct:.2f} % of the way through the book. Please analyse the text with specific reference to this location and give me the following:
- is the author referring to a place he is encountering during his journey in the Amazon? (answer: yes, no, unsure)
- is the author travelling to or from the location? (answer: to, from, neither, unsure)
- what is the authors sentiment to the location? (answer: positive, negative, curious, bored, etc. none, unsure)
- what type of place is this location? (answer: e.g. city, river, region, infrastructure, anything else, unknown)
- is the location real? (answer: yes, no, unsure)
- does the author date (roughly or precisely) this location in their journey? (answer: yes (if so, give date), no, unsure)
- what is the theme of the location? (answer: nature, anthropological, danger, disease, travel, etc. unsure)
- which of these themes best describes the main topic of the following passage? (answer: travel, exploration, trade, health, wildlife, engineering, conflict, etc.)
- identify any major event associated with this location. (answer: illness, accident, meeting, discovery, etc. unsure)
- summary of snippet (answer: 1-2 sentences)
- ambiguity of snippet, what is missing by it being cut off?
- additional places mentioned

Snippet:
{entry['sentence_block']}
"""
print(message)


This text is a snippet from the 1879 book 'Up the Amazon and Madeira Rivers through Bolivia and Peru' by 'Edward D. Mathews'. The snippet is centred on the location Tapajoz, which occurs 2.20 % of the way through the book. Please analyse the text with specific reference to this location and give me the following:
- is the author referring to a place he is encountering during his journey in the Amazon? (answer: yes, no, unsure)
- is the author travelling to or from the location? (answer: to, from, neither, unsure)
- what is the authors sentiment to the location? (answer: positive, negative, curious, bored, etc. none, unsure)
- what type of place is this location? (answer: e.g. city, river, region, infrastructure, anything else, unknown)
- is the location real? (answer: yes, no, unsure)
- does the author date (roughly or precisely) this location in their journey? (answer: yes (if so, give date), no, unsure)
- what is the theme of the location? (answer: nature, anthropological, danger, d

In [68]:
from openai import OpenAI
from pydantic import BaseModel

class LocationMetadata(BaseModel):
    journey: str | None
    travel_to_from: str | None
    sentiment: str | None
    type: str | None
    real: bool | None
    date: str | None
    theme: str | None
    event: str | None
    summary: str
    ambiguity: str
    plases: list[str]


response = client.responses.parse(
    model="gpt-4.1",
    input=[
        {"role": "system", "content": "You are an expert in 19th century Amazonian exploration."},
        {
            "role": "user",
            "content": message,
        },
    ],
    text_format=LocationMetadata,
)

event = response.output_parsed

In [69]:
event

LocationMetadata(journey='Yes, the author is describing his journey along the Amazon, encountering the Tapajoz River and its junction with the Amazon.', travel_to_from='to', sentiment='positive', type='river', real=True, date='no', theme='travel', event='unsure', summary='The author describes reaching the Tapajoz River and the town of Santarem, noting the characteristics of the river and the local geography and economy.', ambiguity='The snippet is cut off before providing a full description of Santarem, its church, and possibly more details about the river or surrounding area; it is also not clear how long the author stays or what specific events occur there.', plases=['Tapajoz River', 'Santarem', 'Amazon River'])