In [1]:
import requests
import re
import json
import os
import pandas as pd
import sys
import numpy as np

# project lib
PROJECT_SRC_PATH = os.path.join( '/workspace/workspace/ufo-prediction', 'src-RCA-UFO')
sys.path.append(PROJECT_SRC_PATH)
import dataset

In [2]:
# Set a seed for reproducibility
np.random.seed(42)  # You can choose any number as your seed

# Define paths for data
path_data_NLD = os.path.join('/workspace/workspace/ufo-prediction', 'demo', 'df-NLD.pkl')
path_data_FRA = os.path.join('/workspace/workspace/ufo-prediction', 'demo', 'df-FRA.pkl')
path_data_ESP = os.path.join('/workspace/workspace/ufo-prediction', 'demo', 'df-ESP.pkl')
processed_df_NLD = "/workspace/workspace/ufo-prediction/demo/kartaview_key_NLD_adv.csv"
processed_df_ESP = "/workspace/workspace/ufo-prediction/demo/kartaview_key_ESP_adv.csv"
processed_df_FRA = "/workspace/workspace/ufo-prediction/demo/kartaview_key_FRA_adv.csv"
processed_df_ALL = "/workspace/workspace/ufo-prediction/demo/kartaview_key.csv"

# Image directory paths
image_dir_map = {
    'NLD': '/workspace/workspace/ufo-prediction/image_data_NLD_adv',
    'FRA': '/workspace/workspace/ufo-prediction/image_data_FRA_adv',
    'ESP': '/workspace/workspace/ufo-prediction/image_data_ESP_adv',
    'ALL': '/workspace/workspace/ufo-prediction/image_data'
}

# Ask for user input
country_code = input("Enter country code (FRA, NLD, ESP) or ALL: ").upper()

# Map user input to the correct path
path_data_map = {
    'NLD': processed_df_NLD,
    'FRA': processed_df_FRA,
    'ESP': processed_df_ESP,
    'ALL': processed_df_ALL
}

raw_data_path_map = {
'NLD': path_data_NLD,
'FRA': path_data_FRA,
'ESP': path_data_ESP
}
# Check if the input is valid
if country_code not in path_data_map:
    print("Invalid country code or specification. Please enter FRA, NLD, ESP, or ALL.")
else:
    processed_df_path = path_data_map[country_code]
    # Set directory based on country code
    current_directory = image_dir_map[country_code]

    # Check if the new directory exists, if not, create it
    if not os.path.exists(current_directory):
        os.makedirs(current_directory)
        print(f"Directory {current_directory} created.")
    else:
        print(f"Directory {current_directory} already exists.")

    # Process for ALL
    if country_code == 'ALL':
        if os.path.exists(processed_df_path):
            kartaview_keys = pd.read_csv(processed_df_path)
            print("Loaded processed DataFrame from", processed_df_path)
        else:
            print("Creating a new processed DataFrame for ALL")
            path_data_RCA = os.path.join(dataset.DATA_DIR, 'rca-ufo-merge_ALL.csv')
            df = pd.read_csv(path_data_RCA, encoding='latin1')
            kartaview_keys = df[['lon', 'lat','age_right', 'id', 'PropertyKey_ID']]
            kartaview_keys.to_csv(processed_df_path, index=False)
            

    # Process for FRA, NLD, ESP
    else:
        if os.path.exists(processed_df_path):
            kartaview_keys = pd.read_csv(processed_df_path)
            print("Loaded processed DataFrame from", processed_df_path)
        else:
            print(f"Creating a new processed DataFrame for {country_code}")
            df_path = raw_data_path_map[country_code]
            df = pd.read_pickle(df_path)
            print("Loaded DataFrame from", df_path)
            sampled_df = df.sample(n=1500000, random_state=42)
            kartaview_keys = sampled_df[['lon', 'lat', 'age', 'id']].rename(columns={'age': 'age_right'})
            kartaview_keys.to_csv(processed_df_path, index=False)

print(kartaview_keys) 

Enter country code (FRA, NLD, ESP) or ALL: NLD
Directory /workspace/workspace/ufo-prediction/image_data_NLD_adv already exists.
Loaded processed DataFrame from /workspace/workspace/ufo-prediction/demo/kartaview_key_NLD_adv.csv
             lon        lat  age_right                     id
0       6.160798  52.855815     1972.0   v0.1-NLD.1.12_1-6428
1       4.657806  52.488100     1907.0   v0.1-NLD.9.10_1-1974
2       4.853944  52.310044     1937.0   v0.1-NLD.9.3_1-36089
3       5.521659  51.760100     1986.0  v0.1-NLD.8.47_1-47564
4       4.748038  51.666148     1930.0  v0.1-NLD.8.19_1-18371
...          ...        ...        ...                    ...
859811  4.456558  51.532000     1980.0  v0.1-NLD.8.49_1-34115
859812  5.203008  51.796783     1988.0  v0.1-NLD.4.68_1-11607
859813  5.182171  52.372244     1993.0   v0.1-NLD.2.1_1-75764
859814  6.034283  51.407144     1973.0   v0.1-NLD.7.36_1-2558
859815  4.442087  51.726000        NaN                    NaN

[859816 rows x 4 columns]


In [3]:
print(current_directory)
print(kartaview_keys)

/workspace/workspace/ufo-prediction/image_data_NLD_adv
             lon        lat  age_right                     id
0       6.160798  52.855815     1972.0   v0.1-NLD.1.12_1-6428
1       4.657806  52.488100     1907.0   v0.1-NLD.9.10_1-1974
2       4.853944  52.310044     1937.0   v0.1-NLD.9.3_1-36089
3       5.521659  51.760100     1986.0  v0.1-NLD.8.47_1-47564
4       4.748038  51.666148     1930.0  v0.1-NLD.8.19_1-18371
...          ...        ...        ...                    ...
859811  4.456558  51.532000     1980.0  v0.1-NLD.8.49_1-34115
859812  5.203008  51.796783     1988.0  v0.1-NLD.4.68_1-11607
859813  5.182171  52.372244     1993.0   v0.1-NLD.2.1_1-75764
859814  6.034283  51.407144     1973.0   v0.1-NLD.7.36_1-2558
859815  4.442087  51.726000        NaN                    NaN

[859816 rows x 4 columns]


In [None]:
# Assuming current_directory and processed_df_path are set from the previous code chunk
print(f"Images will be saved in: {current_directory}")
print(f"Using kartaview_keys from: {processed_df_path}")

# Consistency check between image directory and kartaview_keys path
expected_csv_map = {
    '/workspace/workspace/ufo-prediction/image_data': '/workspace/workspace/ufo-prediction/demo/kartaview_key.csv',
    '/workspace/workspace/ufo-prediction/image_data_NLD': '/workspace/workspace/ufo-prediction/demo/kartaview_key_NLD.csv',
    '/workspace/workspace/ufo-prediction/image_data_FRA': '/workspace/workspace/ufo-prediction/demo/kartaview_key_FRA.csv',
    '/workspace/workspace/ufo-prediction/image_data_ESP': '/workspace/workspace/ufo-prediction/demo/kartaview_key_ESP.csv',
}

# Stop the code if using image_data directory
if current_directory == '/workspace/workspace/ufo-prediction/image_data':
    print("Download for the 'image_data' directory has already been completed. Stopping execution.")
    # Use `exit()` or `sys.exit()` depending on your environment
    exit()

if processed_df_path != expected_csv_map.get(current_directory):
    print("Inconsistency detected between the image directory and the kartaview_keys path. Please check.")
    exit()

image_count = {} 
print("Number of buildings remaining: ",len(kartaview_keys))

#Initialise a counter for the loop iterations
iteration_counter = 0

for index, row in kartaview_keys.iterrows():
    iteration_counter += 1 # Increment the counter with each iteration
    precision = 6  # Start with 6 decimal places
    success = False  # Flag to indicate if the request was successful

    while precision > 2 and not success:
        # Format lon and lat to the current precision
        lon = f"{row['lon']:.{precision}f}"
        lat = f"{row['lat']:.{precision}f}"

        # Construct the API URL
        url = "https://api.openstreetcam.org/2.0/photo/?lat={}&lng={}".format(lat, lon)

        # Send a GET request to the API
        response = requests.get(url)

        # Check if the request was successful
        if response.status_code == 200:
            data = response.json()
            url_pattern = r'https://storage\d+\.openstreetcam\.org/files/photo/\d+/\d+/\d+/[^"]+\.jpg'
            urls = re.findall(url_pattern, json.dumps(data))

            filtered_urls = [
                url for url in urls
                if not any(x in url.rsplit('/', 2)[-2] for x in ["{{sizeprefix}}", "proc"]) and
                ("th" in url.rsplit('/', 2)[-2] and not "lth" in url.rsplit('/', 2)[-2])
            ]

            if filtered_urls:
                # Initialize or update the image count for the current ID
                building_id = row['id']  # Assuming 'id' column exists in your DataFrame
                if building_id not in image_count:
                    image_count[building_id] = 0

                for image_url in filtered_urls:
                    image_count[building_id] += 1  # Increment the image count for the building
                    subscript = image_count[building_id]  # Subscript for the file name
                    file_name = f"{row['age_right']}_{building_id}_{subscript}.jpg"
                    file_path = os.path.join(current_directory, file_name)

                    # Check if the file already exists
                    if os.path.exists(file_path):
                        print(f"File already exists: {file_path}. Skipping download.")
                    else:
                        image_response = requests.get(image_url)

                        if image_response.status_code == 200:
                            with open(file_path, 'wb') as f:
                                f.write(image_response.content)
                            print("Image downloaded successfully: {}".format(file_path))
                        else:
                            print("Failed to download the image.")
                success = True  # Mark success as True to exit the while loop
            else:
                print("No suitable images found for location: lon={}, lat={}".format(lon, lat))
                precision -= 1  # Reduce precision by one decimal place
        else:
            print("Failed to retrieve data from the API for location: lon={}, lat={}. Trying with reduced precision.")
            

    # After processing, remove the row from df_subset
    kartaview_keys = kartaview_keys.drop(index)

    # Only save the updated DataFrame to a CSV file every 100th instance
    if iteration_counter % 100 == 0:
        print(f"Saving progress at iteration {iteration_counter} to {processed_df_path}. ")
        print("Number of buildings remaining: ",len(kartaview_keys))
        kartaview_keys.to_csv(processed_df_path, index=False)

    if not success:
        print("Unable to retrieve data from the API with sufficient precision for location: lon={}, lat={}".format(row['lon'], row['lat']))
        
if iteration_counter % 100 != 0:
    print(f"Saving final progress")
    print("Number of buildings remaining: ",len(kartaview_keys))
    kartaview_keys.to_csv(processed_df_path, index=False)

Images will be saved in: /workspace/workspace/ufo-prediction/image_data_NLD_adv
Using kartaview_keys from: /workspace/workspace/ufo-prediction/demo/kartaview_key_NLD_adv.csv
Inconsistency detected between the image directory and the kartaview_keys path. Please check.
Number of buildings remaining:  859816
No suitable images found for location: lon=6.160798, lat=52.855815
No suitable images found for location: lon=6.16080, lat=52.85582
No suitable images found for location: lon=6.1608, lat=52.8558
No suitable images found for location: lon=6.161, lat=52.856
Unable to retrieve data from the API with sufficient precision for location: lon=6.160797884310248, lat=52.8558152302032
No suitable images found for location: lon=4.657806, lat=52.488100
No suitable images found for location: lon=4.65781, lat=52.48810
No suitable images found for location: lon=4.6578, lat=52.4881
No suitable images found for location: lon=4.658, lat=52.488
Unable to retrieve data from the API with sufficient precisi

No suitable images found for location: lon=3.906, lat=51.698
Unable to retrieve data from the API with sufficient precision for location: lon=3.906367323795777, lat=51.69781415138909
No suitable images found for location: lon=5.175789, lat=52.111774
No suitable images found for location: lon=5.17579, lat=52.11177
No suitable images found for location: lon=5.1758, lat=52.1118
No suitable images found for location: lon=5.176, lat=52.112
Unable to retrieve data from the API with sufficient precision for location: lon=5.175788575140865, lat=52.11177431869177
No suitable images found for location: lon=5.070824, lat=52.295853
No suitable images found for location: lon=5.07082, lat=52.29585
No suitable images found for location: lon=5.0708, lat=52.2959
No suitable images found for location: lon=5.071, lat=52.296
Unable to retrieve data from the API with sufficient precision for location: lon=5.070824442928669, lat=52.295853079886925
No suitable images found for location: lon=5.793317, lat=52.

No suitable images found for location: lon=5.9790, lat=50.8767
No suitable images found for location: lon=5.979, lat=50.877
Unable to retrieve data from the API with sufficient precision for location: lon=5.979005627996537, lat=50.87665353369769
No suitable images found for location: lon=4.664825, lat=52.144188
No suitable images found for location: lon=4.66482, lat=52.14419
No suitable images found for location: lon=4.6648, lat=52.1442
No suitable images found for location: lon=4.665, lat=52.144
Unable to retrieve data from the API with sufficient precision for location: lon=4.664824725918115, lat=52.14418779834245
Image downloaded successfully: /workspace/workspace/ufo-prediction/image_data_NLD_adv/1889.0_v0.1-NLD.9.4_1-96760_1.jpg
Failed to download the image.
No suitable images found for location: lon=4.661253, lat=52.367106
No suitable images found for location: lon=4.66125, lat=52.36711
No suitable images found for location: lon=4.6613, lat=52.3671
No suitable images found for lo

No suitable images found for location: lon=4.161428, lat=51.841854
No suitable images found for location: lon=4.16143, lat=51.84185
No suitable images found for location: lon=4.1614, lat=51.8419
No suitable images found for location: lon=4.161, lat=51.842
Unable to retrieve data from the API with sufficient precision for location: lon=4.1614280926887925, lat=51.84185423015405
No suitable images found for location: lon=6.904133, lat=52.229692
No suitable images found for location: lon=6.90413, lat=52.22969
No suitable images found for location: lon=6.9041, lat=52.2297
Image downloaded successfully: /workspace/workspace/ufo-prediction/image_data_NLD_adv/1928.0_v0.1-NLD.10.7_1-38483_1.jpg
No suitable images found for location: lon=5.855842, lat=51.778877
No suitable images found for location: lon=5.85584, lat=51.77888
No suitable images found for location: lon=5.8558, lat=51.7789
No suitable images found for location: lon=5.856, lat=51.779
Unable to retrieve data from the API with suffici

No suitable images found for location: lon=3.743594, lat=51.697009
No suitable images found for location: lon=3.74359, lat=51.69701
No suitable images found for location: lon=3.7436, lat=51.6970
No suitable images found for location: lon=3.744, lat=51.697
Unable to retrieve data from the API with sufficient precision for location: lon=3.7435938301652265, lat=51.69700886227675
No suitable images found for location: lon=7.068209, lat=53.109261
No suitable images found for location: lon=7.06821, lat=53.10926
No suitable images found for location: lon=7.0682, lat=53.1093
No suitable images found for location: lon=7.068, lat=53.109
Unable to retrieve data from the API with sufficient precision for location: lon=7.068209159329787, lat=53.10926070088431
No suitable images found for location: lon=5.441611, lat=51.891173
No suitable images found for location: lon=5.44161, lat=51.89117
No suitable images found for location: lon=5.4416, lat=51.8912
No suitable images found for location: lon=5.442

No suitable images found for location: lon=6.8799, lat=52.2142
No suitable images found for location: lon=6.880, lat=52.214
Unable to retrieve data from the API with sufficient precision for location: lon=6.879943549505171, lat=52.214209712658814
No suitable images found for location: lon=7.032972, lat=53.275457
No suitable images found for location: lon=7.03297, lat=53.27546
No suitable images found for location: lon=7.0330, lat=53.2755
No suitable images found for location: lon=7.033, lat=53.275
Unable to retrieve data from the API with sufficient precision for location: lon=7.032972018246923, lat=53.27545697653445
No suitable images found for location: lon=6.216637, lat=52.692884
No suitable images found for location: lon=6.21664, lat=52.69288
No suitable images found for location: lon=6.2166, lat=52.6929
No suitable images found for location: lon=6.217, lat=52.693
Unable to retrieve data from the API with sufficient precision for location: lon=6.216636667974211, lat=52.692883881901

No suitable images found for location: lon=4.27984, lat=52.08298
No suitable images found for location: lon=4.2798, lat=52.0830
No suitable images found for location: lon=4.280, lat=52.083
Unable to retrieve data from the API with sufficient precision for location: lon=4.27983528093441, lat=52.082975857484286
No suitable images found for location: lon=5.056127, lat=51.689109
No suitable images found for location: lon=5.05613, lat=51.68911
No suitable images found for location: lon=5.0561, lat=51.6891
No suitable images found for location: lon=5.056, lat=51.689
Unable to retrieve data from the API with sufficient precision for location: lon=5.056126712904074, lat=51.689109274054935
No suitable images found for location: lon=6.554221, lat=53.236072
No suitable images found for location: lon=6.55422, lat=53.23607
No suitable images found for location: lon=6.5542, lat=53.2361
No suitable images found for location: lon=6.554, lat=53.236
Unable to retrieve data from the API with sufficient p

No suitable images found for location: lon=6.9104, lat=53.1389
No suitable images found for location: lon=6.910, lat=53.139
Unable to retrieve data from the API with sufficient precision for location: lon=6.910430867166627, lat=53.1388824732681
No suitable images found for location: lon=4.658421, lat=51.801874
No suitable images found for location: lon=4.65842, lat=51.80187
No suitable images found for location: lon=4.6584, lat=51.8019
No suitable images found for location: lon=4.658, lat=51.802
Unable to retrieve data from the API with sufficient precision for location: lon=4.658421291123209, lat=51.80187421704685
No suitable images found for location: lon=5.003390, lat=52.167099
No suitable images found for location: lon=5.00339, lat=52.16710
No suitable images found for location: lon=5.0034, lat=52.1671
No suitable images found for location: lon=5.003, lat=52.167
Unable to retrieve data from the API with sufficient precision for location: lon=5.0033895019460655, lat=52.1670985222423

No suitable images found for location: lon=6.573895, lat=53.064564
No suitable images found for location: lon=6.57390, lat=53.06456
No suitable images found for location: lon=6.5739, lat=53.0646
No suitable images found for location: lon=6.574, lat=53.065
Unable to retrieve data from the API with sufficient precision for location: lon=6.573895023891308, lat=53.06456375419925
No suitable images found for location: lon=5.871052, lat=51.841853
No suitable images found for location: lon=5.87105, lat=51.84185
No suitable images found for location: lon=5.8711, lat=51.8419
No suitable images found for location: lon=5.871, lat=51.842
Unable to retrieve data from the API with sufficient precision for location: lon=5.871052293942871, lat=51.84185306323848
No suitable images found for location: lon=6.564326, lat=53.196179
No suitable images found for location: lon=6.56433, lat=53.19618
No suitable images found for location: lon=6.5643, lat=53.1962
No suitable images found for location: lon=6.564,

No suitable images found for location: lon=4.985647, lat=52.510609
No suitable images found for location: lon=4.98565, lat=52.51061
No suitable images found for location: lon=4.9856, lat=52.5106
No suitable images found for location: lon=4.986, lat=52.511
Unable to retrieve data from the API with sufficient precision for location: lon=4.985646918332439, lat=52.51060893777046
No suitable images found for location: lon=6.605354, lat=52.361952
No suitable images found for location: lon=6.60535, lat=52.36195
No suitable images found for location: lon=6.6054, lat=52.3620
No suitable images found for location: lon=6.605, lat=52.362
Saving progress at iteration 200 to /workspace/workspace/ufo-prediction/demo/kartaview_key_NLD_adv.csv. 
Number of buildings remaining:  859616
Unable to retrieve data from the API with sufficient precision for location: lon=6.6053538813374, lat=52.36195191206052
Image downloaded successfully: /workspace/workspace/ufo-prediction/image_data_NLD_adv/1930.0_v0.1-NLD.

No suitable images found for location: lon=4.740, lat=52.616
Unable to retrieve data from the API with sufficient precision for location: lon=4.739608938529532, lat=52.61631191325348
No suitable images found for location: lon=4.483953, lat=52.065176
No suitable images found for location: lon=4.48395, lat=52.06518
No suitable images found for location: lon=4.4840, lat=52.0652
No suitable images found for location: lon=4.484, lat=52.065
Unable to retrieve data from the API with sufficient precision for location: lon=4.483952919059639, lat=52.065175779056425
No suitable images found for location: lon=6.895727, lat=52.194676
No suitable images found for location: lon=6.89573, lat=52.19468
No suitable images found for location: lon=6.8957, lat=52.1947
No suitable images found for location: lon=6.896, lat=52.195
Unable to retrieve data from the API with sufficient precision for location: lon=6.895727254620527, lat=52.19467586770054
No suitable images found for location: lon=5.677940, lat=50.

No suitable images found for location: lon=5.898836, lat=52.562960
No suitable images found for location: lon=5.89884, lat=52.56296
No suitable images found for location: lon=5.8988, lat=52.5630
No suitable images found for location: lon=5.899, lat=52.563
Unable to retrieve data from the API with sufficient precision for location: lon=5.898835562440833, lat=52.56295959606123
No suitable images found for location: lon=4.648168, lat=52.486867
No suitable images found for location: lon=4.64817, lat=52.48687
No suitable images found for location: lon=4.6482, lat=52.4869
No suitable images found for location: lon=4.648, lat=52.487
Unable to retrieve data from the API with sufficient precision for location: lon=4.648168001826189, lat=52.48686681581575
No suitable images found for location: lon=5.752056, lat=51.405560
No suitable images found for location: lon=5.75206, lat=51.40556
No suitable images found for location: lon=5.7521, lat=51.4056
No suitable images found for location: lon=5.752,

No suitable images found for location: lon=6.386728, lat=53.250515
No suitable images found for location: lon=6.38673, lat=53.25052
No suitable images found for location: lon=6.3867, lat=53.2505
No suitable images found for location: lon=6.387, lat=53.251
Unable to retrieve data from the API with sufficient precision for location: lon=6.386728141178527, lat=53.25051507345668
No suitable images found for location: lon=4.788050, lat=52.355129
No suitable images found for location: lon=4.78805, lat=52.35513
No suitable images found for location: lon=4.7880, lat=52.3551
No suitable images found for location: lon=4.788, lat=52.355
Unable to retrieve data from the API with sufficient precision for location: lon=4.788049934221825, lat=52.35512887210292
No suitable images found for location: lon=6.449464, lat=51.948566
No suitable images found for location: lon=6.44946, lat=51.94857
No suitable images found for location: lon=6.4495, lat=51.9486
No suitable images found for location: lon=6.449,

No suitable images found for location: lon=6.874, lat=52.197
Unable to retrieve data from the API with sufficient precision for location: lon=6.874116727721214, lat=52.19670548875338
No suitable images found for location: lon=3.792062, lat=51.740423
No suitable images found for location: lon=3.79206, lat=51.74042
No suitable images found for location: lon=3.7921, lat=51.7404
No suitable images found for location: lon=3.792, lat=51.740
Unable to retrieve data from the API with sufficient precision for location: lon=3.792062163830637, lat=51.74042329986997
No suitable images found for location: lon=4.615024, lat=52.276135
No suitable images found for location: lon=4.61502, lat=52.27613
No suitable images found for location: lon=4.6150, lat=52.2761
No suitable images found for location: lon=4.615, lat=52.276
Unable to retrieve data from the API with sufficient precision for location: lon=4.6150239768736006, lat=52.27613470012317
No suitable images found for location: lon=5.383647, lat=51.