In [15]:
import os
import pandas as pd
import geopandas as gpd
from dbfread import DBF 
import requests
from urllib.parse import urlparse
import json
import re

In [5]:
#path to DBF file
file_path = ".\\landmarks_names\\landmarks_names\\landmarks_names.dbf"

encoding = 'utf-8'  

# Create an empty list to store records
records = []

# Read the DBF file and iterate through records with the specified encoding
with DBF(file_path, encoding=encoding) as dbf:
    for record in dbf:
        records.append(record)

In [6]:
# Convert the list of records into a pandas DataFrame
df = pd.DataFrame(records)
df

Unnamed: 0,id,English,Arabic,Arabic_2,German,English_2,Hebrew
0,,Storks Tower,Burj Laklak,,,,
1,,St. Stephen's Gate,Bâb Sitti Maryam,Bāb Sitti Marjam,Stephansthor,,
2,,Gate of the Tribes\t\t\t\t,Bâb al Asbât\t,,,,
3,,Solomon's Throne,,,,,
4,,Golden Gate,Bâb al Taûbe',,,,
...,...,...,...,...,...,...,...
155,,German Colony,,,,,
156,,Railway Station,,,,,
157,,Chapel of the Ascension,,,,,
158,,German Hospice,,,,,


In [7]:
#removing NA values
df['English'] = df['English'].replace('', pd.NA)
# Remove rows with NaN values
df = df.dropna(subset=['English'],how='all')
df

Unnamed: 0,id,English,Arabic,Arabic_2,German,English_2,Hebrew
0,,Storks Tower,Burj Laklak,,,,
1,,St. Stephen's Gate,Bâb Sitti Maryam,Bāb Sitti Marjam,Stephansthor,,
2,,Gate of the Tribes\t\t\t\t,Bâb al Asbât\t,,,,
3,,Solomon's Throne,,,,,
4,,Golden Gate,Bâb al Taûbe',,,,
...,...,...,...,...,...,...,...
155,,German Colony,,,,,
156,,Railway Station,,,,,
157,,Chapel of the Ascension,,,,,
158,,German Hospice,,,,,


In [8]:
def remove_quadruple_tabs_if_exists(s):
    """Remove occurrences of '\t\t\t\t' from the input string if it exists."""
    if '\t\t\t\t' in s:
        return s.replace('\t\t\t\t', '')
    if '\t\t\t\t\t' in s:
        return s.replace('\t\t\t\t\t', '')   
    else:
        return s

# Applying the function to each string in the list
df['English'] = [remove_quadruple_tabs_if_exists(s) for s in df['English']]
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['English'] = [remove_quadruple_tabs_if_exists(s) for s in df['English']]


Unnamed: 0,id,English,Arabic,Arabic_2,German,English_2,Hebrew
0,,Storks Tower,Burj Laklak,,,,
1,,St. Stephen's Gate,Bâb Sitti Maryam,Bāb Sitti Marjam,Stephansthor,,
2,,Gate of the Tribes,Bâb al Asbât\t,,,,
3,,Solomon's Throne,,,,,
4,,Golden Gate,Bâb al Taûbe',,,,
...,...,...,...,...,...,...,...
155,,German Colony,,,,,
156,,Railway Station,,,,,
157,,Chapel of the Ascension,,,,,
158,,German Hospice,,,,,


In [9]:
# Replace spaces with '+' in the 'English' column
df['formatted_english'] = df['English'].str.replace(' ', '+', regex=False)

# Create the base URL
base_url = 'https://www.loc.gov/photos/?fa=location:jerusalem&q='

# Concatenate the base URL with the formatted 'English' column and assign it to the new 'url' column
df['url'] = base_url + df['formatted_english']
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['formatted_english'] = df['English'].str.replace(' ', '+', regex=False)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['url'] = base_url + df['formatted_english']


Unnamed: 0,id,English,Arabic,Arabic_2,German,English_2,Hebrew,formatted_english,url
0,,Storks Tower,Burj Laklak,,,,,Storks+Tower,https://www.loc.gov/photos/?fa=location:jerusa...
1,,St. Stephen's Gate,Bâb Sitti Maryam,Bāb Sitti Marjam,Stephansthor,,,St.+Stephen's+Gate,https://www.loc.gov/photos/?fa=location:jerusa...
2,,Gate of the Tribes,Bâb al Asbât\t,,,,,Gate+of+the+Tribes,https://www.loc.gov/photos/?fa=location:jerusa...
3,,Solomon's Throne,,,,,,Solomon's+Throne,https://www.loc.gov/photos/?fa=location:jerusa...
4,,Golden Gate,Bâb al Taûbe',,,,,Golden+Gate,https://www.loc.gov/photos/?fa=location:jerusa...
...,...,...,...,...,...,...,...,...,...
155,,German Colony,,,,,,German+Colony,https://www.loc.gov/photos/?fa=location:jerusa...
156,,Railway Station,,,,,,Railway+Station,https://www.loc.gov/photos/?fa=location:jerusa...
157,,Chapel of the Ascension,,,,,,Chapel+of+the+Ascension,https://www.loc.gov/photos/?fa=location:jerusa...
158,,German Hospice,,,,,,German+Hospice,https://www.loc.gov/photos/?fa=location:jerusa...


In [None]:
import requests
from urllib.parse import urlparse
import os
import json
import re
import pandas as pd

def sanitize_name(name):
    return re.sub(r'[<>:"/\\|?*\t\n]+', '_', name)

def download_metadata(identifier, save_path):
    metadata_url = f"https://www.loc.gov/item/{identifier}/?fo=json"
    try:
        response = requests.get(metadata_url)
        if response.status_code == 200:
            metadata = response.json()
            metadata_file = os.path.join(save_path, f"{identifier}_metadata.json")
            with open(metadata_file, 'w') as f:
                json.dump(metadata, f, indent=2)
            print(f"Metadata saved for {identifier}")
        else:
            print(f"Failed to download metadata for {identifier} with status code: {response.status_code}")
    except Exception as e:
        print(f"An error occurred while fetching metadata for {identifier}: {e}")

def get_and_save_images(results_url, path):
    params = {"fo": "json", "c": 100, "at": "results,pagination"}
    call = requests.get(results_url, params=params)
    data = call.json()
    results = data['results']
    for result in results:
        if "collection" not in result.get("original_format") and "web page" not in result.get("original_format"):
            if result.get("image_url"):
                image = result.get("image_url")[-1]
                identifier = urlparse(result["id"])[2].rstrip('/').split('/')[-1]
                filename = f"{sanitize_name(identifier)}.jpg"
                filepath = os.path.join(path, filename)
                image_response = requests.get(image, stream=True)
                with open(filepath, 'wb') as fd:
                    for chunk in image_response.iter_content(chunk_size=100000):
                        fd.write(chunk)
                print(f"Downloaded image to {filepath}")

                # Download and save metadata
                download_metadata(identifier, path)
    
    if data["pagination"].get("next"):
        next_url = data["pagination"]["next"]
        print(f"Getting next page: {next_url}")
        get_and_save_images(next_url, path)

def process_all_urls(df, base_path):
    for index, row in df.iterrows():
        name = sanitize_name(row['English'])
        url = row['url']
        path = os.path.join(base_path, name)
        os.makedirs(path, exist_ok=True)
        get_and_save_images(url, path)


base_path = ".\\images_metadata" 
process_all_urls(df, base_path)


Downloaded image to C:\Users\97455\Downloads\images_metadata\Storks Tower\2007675295.jpg
Metadata saved for 2007675295
Downloaded image to C:\Users\97455\Downloads\images_metadata\St. Stephen's Gate\92500649.jpg
Metadata saved for 92500649
Downloaded image to C:\Users\97455\Downloads\images_metadata\St. Stephen's Gate\2019692376.jpg
Metadata saved for 2019692376
Downloaded image to C:\Users\97455\Downloads\images_metadata\St. Stephen's Gate\2019702314.jpg
Metadata saved for 2019702314
Downloaded image to C:\Users\97455\Downloads\images_metadata\St. Stephen's Gate\2019701767.jpg
Metadata saved for 2019701767
Downloaded image to C:\Users\97455\Downloads\images_metadata\St. Stephen's Gate\2019702313.jpg
Metadata saved for 2019702313
Downloaded image to C:\Users\97455\Downloads\images_metadata\St. Stephen's Gate\2019705420.jpg
Metadata saved for 2019705420
Downloaded image to C:\Users\97455\Downloads\images_metadata\St. Stephen's Gate\2019697354.jpg
Metadata saved for 2019697354
Downloaded

Metadata saved for 2019696690
Downloaded image to C:\Users\97455\Downloads\images_metadata\St. Stephen's Gate\2019707735.jpg
Metadata saved for 2019707735
Downloaded image to C:\Users\97455\Downloads\images_metadata\St. Stephen's Gate\2019707736.jpg
Metadata saved for 2019707736
Downloaded image to C:\Users\97455\Downloads\images_metadata\St. Stephen's Gate\2019707737.jpg
Metadata saved for 2019707737
Downloaded image to C:\Users\97455\Downloads\images_metadata\St. Stephen's Gate\2019707749.jpg
Metadata saved for 2019707749
Downloaded image to C:\Users\97455\Downloads\images_metadata\St. Stephen's Gate\2007682816.jpg
Metadata saved for 2007682816
Downloaded image to C:\Users\97455\Downloads\images_metadata\St. Stephen's Gate\2006675884.jpg
Metadata saved for 2006675884
Downloaded image to C:\Users\97455\Downloads\images_metadata\St. Stephen's Gate\2007675259.jpg
Metadata saved for 2007675259
Downloaded image to C:\Users\97455\Downloads\images_metadata\Gate of the Tribes\2007675285.jpg


Metadata saved for 2001695420
Downloaded image to C:\Users\97455\Downloads\images_metadata\Jaffa Gate\2019699443.jpg
Metadata saved for 2019699443
Downloaded image to C:\Users\97455\Downloads\images_metadata\Jaffa Gate\2019701851.jpg
Metadata saved for 2019701851
Downloaded image to C:\Users\97455\Downloads\images_metadata\Jaffa Gate\2019705390.jpg
Metadata saved for 2019705390
Downloaded image to C:\Users\97455\Downloads\images_metadata\Jaffa Gate\2019699906.jpg
Metadata saved for 2019699906
Downloaded image to C:\Users\97455\Downloads\images_metadata\Jaffa Gate\2019697370.jpg
Metadata saved for 2019697370
Downloaded image to C:\Users\97455\Downloads\images_metadata\Jaffa Gate\2019709589.jpg
Metadata saved for 2019709589
Downloaded image to C:\Users\97455\Downloads\images_metadata\Jaffa Gate\2019699238.jpg
Metadata saved for 2019699238
Downloaded image to C:\Users\97455\Downloads\images_metadata\Jaffa Gate\92500430.jpg
Metadata saved for 92500430
Downloaded image to C:\Users\97455\Dow

Metadata saved for 93513671
Downloaded image to C:\Users\97455\Downloads\images_metadata\Jaffa Gate\2019691459.jpg
Metadata saved for 2019691459
Downloaded image to C:\Users\97455\Downloads\images_metadata\Jaffa Gate\2019705830.jpg
Metadata saved for 2019705830
Downloaded image to C:\Users\97455\Downloads\images_metadata\Jaffa Gate\2019691461.jpg
Metadata saved for 2019691461
Downloaded image to C:\Users\97455\Downloads\images_metadata\Jaffa Gate\2019708842.jpg
Metadata saved for 2019708842
Downloaded image to C:\Users\97455\Downloads\images_metadata\Jaffa Gate\2019709364.jpg
Metadata saved for 2019709364
Downloaded image to C:\Users\97455\Downloads\images_metadata\Jaffa Gate\2019691861.jpg
Metadata saved for 2019691861
Downloaded image to C:\Users\97455\Downloads\images_metadata\Jaffa Gate\2019712063.jpg
Metadata saved for 2019712063
Downloaded image to C:\Users\97455\Downloads\images_metadata\Jaffa Gate\2019712064.jpg
Metadata saved for 2019712064
Downloaded image to C:\Users\97455\D

Metadata saved for 2019699327
Downloaded image to C:\Users\97455\Downloads\images_metadata\The Citadel\2019699352.jpg
Metadata saved for 2019699352
Downloaded image to C:\Users\97455\Downloads\images_metadata\The Citadel\2019697355.jpg
Metadata saved for 2019697355
Downloaded image to C:\Users\97455\Downloads\images_metadata\The Citadel\2019697104.jpg
Metadata saved for 2019697104
Downloaded image to C:\Users\97455\Downloads\images_metadata\The Citadel\2019695032.jpg
Metadata saved for 2019695032
Downloaded image to C:\Users\97455\Downloads\images_metadata\The Citadel\2019696582.jpg
Metadata saved for 2019696582
Downloaded image to C:\Users\97455\Downloads\images_metadata\The Citadel\2019696583.jpg
Metadata saved for 2019696583
Downloaded image to C:\Users\97455\Downloads\images_metadata\The Citadel\2019699288.jpg
Metadata saved for 2019699288
Downloaded image to C:\Users\97455\Downloads\images_metadata\The Citadel\2019699300.jpg
Metadata saved for 2019699300
Downloaded image to C:\Use

Metadata saved for 2019699319
Downloaded image to C:\Users\97455\Downloads\images_metadata\David's Tower\2019699320.jpg
Metadata saved for 2019699320
Downloaded image to C:\Users\97455\Downloads\images_metadata\David's Tower\2019699321.jpg
Metadata saved for 2019699321
Downloaded image to C:\Users\97455\Downloads\images_metadata\David's Tower\2019699327.jpg
Metadata saved for 2019699327
Downloaded image to C:\Users\97455\Downloads\images_metadata\David's Tower\2019699352.jpg
Metadata saved for 2019699352
Downloaded image to C:\Users\97455\Downloads\images_metadata\David's Tower\2019697332.jpg
Metadata saved for 2019697332
Downloaded image to C:\Users\97455\Downloads\images_metadata\David's Tower\2019699323.jpg
Metadata saved for 2019699323
Downloaded image to C:\Users\97455\Downloads\images_metadata\David's Tower\2002711285.jpg
Metadata saved for 2002711285
Downloaded image to C:\Users\97455\Downloads\images_metadata\David's Tower\2019699338.jpg
Metadata saved for 2019699338
Downloaded

Metadata saved for 2019704669
Downloaded image to C:\Users\97455\Downloads\images_metadata\David's Tower\2006675884.jpg
Metadata saved for 2006675884
Downloaded image to C:\Users\97455\Downloads\images_metadata\David's Tower\2007675295.jpg
Metadata saved for 2007675295
Downloaded image to C:\Users\97455\Downloads\images_metadata\David's Tower\2007675264.jpg
Metadata saved for 2007675264
Downloaded image to C:\Users\97455\Downloads\images_metadata\David's Tower\2019695929.jpg
Metadata saved for 2019695929
Downloaded image to C:\Users\97455\Downloads\images_metadata\David's Tower\2019699467.jpg
Metadata saved for 2019699467
Downloaded image to C:\Users\97455\Downloads\images_metadata\David's Tower\2019699512.jpg
Metadata saved for 2019699512
Downloaded image to C:\Users\97455\Downloads\images_metadata\David's Tower\2019699694.jpg
Metadata saved for 2019699694
Downloaded image to C:\Users\97455\Downloads\images_metadata\David's Tower\2019699708.jpg
Metadata saved for 2019699708
Downloaded

Metadata saved for 2019698456
Downloaded image to C:\Users\97455\Downloads\images_metadata\Damascus Gate_\2019702341.jpg
Metadata saved for 2019702341
Downloaded image to C:\Users\97455\Downloads\images_metadata\Damascus Gate_\2019693888.jpg
Metadata saved for 2019693888
Downloaded image to C:\Users\97455\Downloads\images_metadata\Damascus Gate_\2019693822.jpg
Metadata saved for 2019693822
Downloaded image to C:\Users\97455\Downloads\images_metadata\Damascus Gate_\2019702286.jpg
Metadata saved for 2019702286
Downloaded image to C:\Users\97455\Downloads\images_metadata\Damascus Gate_\2019702342.jpg
Metadata saved for 2019702342
Downloaded image to C:\Users\97455\Downloads\images_metadata\Damascus Gate_\2019694557.jpg
Metadata saved for 2019694557
Downloaded image to C:\Users\97455\Downloads\images_metadata\Damascus Gate_\2019696586.jpg
Metadata saved for 2019696586
Downloaded image to C:\Users\97455\Downloads\images_metadata\Damascus Gate_\2019705880.jpg
Metadata saved for 2019705880
Do