# Pipeline:

1. Check stats for images folder and included albums
2. Run MAIN script. It will create photos/ folder with photos, encoded from different formats from images/ folder. It will also create photos.csv with census of files and missing_coords.csv with the ones which do not have any coords. Source files (jpg, jpeg, heic) are transformed into jpg files according to the template IMG_YYYYMMDD_has.jpg
3. Run a script to pick up Description field from Google Photos. It uses Album+source_filename to identify photo. It records the results into photos_description.csv
4. Add coordinates to missing_coords.csv manually
5. Run the next script which will merge photos.csv+missing_coords.csv+photos_descriptions.csv into photos_cobmined.csv which contains the full list of all photos with their names, coordinates, dates and description. 
6. Upload files from photos/ to Google Drive/photos
7. Connect to Google Drive and get new filenames with their links into google_drive_links.csv
8. Get all info from photos_combined.csv and google_drive_links.csv into photos_final.csv
9. Put all info from photos_final.csv into photos.geojson
10. Get country code from nominate site into photos.geojson

In [None]:
#- STEP 1. Check stats summary

In [None]:
import os
import re
from collections import defaultdict

# === –ü–∞–ø–∫–∞ —Å –∞–ª—å–±–æ–º–∞–º–∏ ===
ROOT_DIR = "images"
album_dirs = [d for d in os.listdir(ROOT_DIR) if os.path.isdir(os.path.join(ROOT_DIR, d))]

# === –ò–Ω–∏—Ü–∏–∞–ª–∏–∑–∞—Ü–∏—è —Å—á—ë—Ç—á–∏–∫–æ–≤ ===
stats = {}
total = defaultdict(int)

# === –®–∞–±–ª–æ–Ω—ã ===
pattern_copy = re.compile(r"\s\(\d+\)| copy", re.IGNORECASE)

# === –ê–Ω–∞–ª–∏–∑ –ø–æ –∞–ª—å–±–æ–º–∞–º ===
for album in album_dirs:
    folder_path = os.path.join(ROOT_DIR, album)
    files = os.listdir(folder_path)

    counts = defaultdict(int)

    for file in files:
        file_path = os.path.join(folder_path, file)
        if not os.path.isfile(file_path):
            continue

        ext = os.path.splitext(file)[1].lower()

        if ext in ['.jpg', '.jpeg', '.heic']:
            if '-edited' in file.lower():
                counts['edited'] += 1
            else:
                counts[ext] += 1
                if pattern_copy.search(file):
                    counts['renamed'] += 1
        elif ext == '.json':
            counts['json'] += 1
        else:
            counts['other'] += 1

    stats[album] = dict(counts)
    for k, v in counts.items():
        total[k] += v

# === –í—ã–≤–æ–¥ —Å—Ç–∞—Ç–∏—Å—Ç–∏–∫–∏ ===
print("\nüìä –°—Ç–∞—Ç–∏—Å—Ç–∏–∫–∞ –ø–æ –∞–ª—å–±–æ–º–∞–º:")
for album, counts in stats.items():
    print(f"\nüìÅ {album}:")
    for k, v in counts.items():
        print(f"  {k}: {v}")

# === –ü–æ–¥—Å—á—ë—Ç –æ–±—â–µ–≥–æ –∫–æ–ª–∏—á–µ—Å—Ç–≤–∞ –ø—Ä–∏–≥–æ–¥–Ω—ã—Ö —Ñ–æ—Ç–æ ===
photo_exts = ['.jpg', '.jpeg', '.heic']
total_photos = sum(total[ext] for ext in photo_exts)
total_edited = total['edited']
usable_photos = total_photos  # editable —É–∂–µ –∏—Å–∫–ª—é—á–µ–Ω—ã –≤—ã—à–µ, –Ω–µ —É—á–∏—Ç—ã–≤–∞—é—Ç—Å—è

print("\nüìà –ò—Ç–æ–≥–æ –ø–æ –≤—Å–µ–º –∞–ª—å–±–æ–º–∞–º:")
for k, v in total.items():
    print(f"  {k}: {v}")

print(f"\nüßÆ –í—Å–µ–≥–æ —Ñ–æ—Ç–æ-—Ñ–∞–π–ª–æ–≤ (–±–µ–∑ -edited): {total_photos}")
print(f"‚úÇÔ∏è  –§–∞–π–ª–æ–≤ —Å '-edited' –≤ –∏–º–µ–Ω–∏:        {total_edited}")
print(f"‚úÖ –ì–æ–¥–Ω—ã—Ö –¥–ª—è –æ–±—Ä–∞–±–æ—Ç–∫–∏ —Ñ–∞–π–ª–æ–≤:        {usable_photos}")

In [None]:
# --- STEP 2. MAIN script

In [28]:
import os
import re
import math
import json
import pandas as pd
import subprocess
from datetime import datetime
from PIL import Image, ExifTags
from tqdm import tqdm

# === Paths ===
ROOT = os.path.abspath(".")
SRC_FOLDER = os.path.join(ROOT, "images")
OUT_FOLDER = os.path.join(ROOT, "photos")
CSV_FOLDER = os.path.join(ROOT, "csv")
CSV_MAIN = os.path.join(CSV_FOLDER, "photos.csv")
CSV_MISSING = os.path.join(CSV_FOLDER, "missing_coords.csv")

os.makedirs(OUT_FOLDER, exist_ok=True)
os.makedirs(CSV_FOLDER, exist_ok=True)

# === Helpers ===
def convert_to_degrees(v):
    d, m, s = v
    return float(d) + float(m) / 60 + float(s) / 3600

def extract_gps_from_exif(image_path):
    try:
        image = Image.open(image_path)
        exif_data = image._getexif()
        if not exif_data:
            return None, None

        gps_info = {}
        date_str = None
        for tag, value in exif_data.items():
            decoded = ExifTags.TAGS.get(tag)
            if decoded == "GPSInfo":
                for t in value:
                    sub_decoded = ExifTags.GPSTAGS.get(t)
                    gps_info[sub_decoded] = value[t]
            elif decoded == "DateTimeOriginal":
                date_str = value

        if not gps_info:
            return None, date_str

        lat = convert_to_degrees(gps_info.get("GPSLatitude"))
        if gps_info.get("GPSLatitudeRef") == "S":
            lat = -lat
        lon = convert_to_degrees(gps_info.get("GPSLongitude"))
        if gps_info.get("GPSLongitudeRef") == "W":
            lon = -lon

        return (lat, lon), date_str
    except:
        return None, None

def extract_gps_from_json(json_path):
    try:
        with open(json_path, "r", encoding="utf-8") as f:
            data = json.load(f)
            geo = data.get("geoData")
            photo_taken_time = data.get("photoTakenTime", {}).get("timestamp")
            if geo:
                lat = geo.get("latitude")
                lon = geo.get("longitude")
                return (lat, lon), photo_taken_time
    except:
        pass
    return None, None

def find_json_path_for_image(image_path):
    base = os.path.basename(image_path)
    folder = os.path.dirname(image_path)

    suffixes = [
        ".supplemental-metadata.json",
        ".supplemental-meta.json",
        ".supplemental-metada.json",
        ".supplemental-metadat.json",
        ".supplemental-me.json"
    ]

    for suffix in suffixes:
        candidate = image_path + suffix
        if os.path.exists(candidate):
            return candidate

    match = re.match(r'^(.*)\((\d+)\)\.(jpg|jpeg|heic)$', base, re.IGNORECASE)
    if match:
        base_clean = match.group(1).strip()
        suffix_num = match.group(2)
        ext = match.group(3)
        for sfx in suffixes:
            sfx_with_index = sfx.replace(".json", f"({suffix_num}).json")
            alt_json = os.path.join(folder, f"{base_clean}.{ext}{sfx_with_index}")
            if os.path.exists(alt_json):
                return alt_json

    return None

def parse_date_components(date_str):
    try:
        if date_str and len(str(date_str)) == 10 and str(date_str).isdigit():
            dt = datetime.fromtimestamp(int(date_str))
        else:
            dt = datetime.strptime(date_str, "%Y:%m:%d %H:%M:%S")
        return str(dt.year), f"{dt.month:02d}", f"{dt.day:02d}"
    except:
        return "2099", "01", "01"

def spiral_coords(lat, lon, index, step=0.0002):
    angle = index * (math.pi / 3)
    radius = step * (1 + index // 6)
    return lat + radius * math.cos(angle), lon + radius * math.sin(angle)

# === Find all images ===
image_files = []
for dirpath, _, filenames in os.walk(SRC_FOLDER):
    for f in filenames:
        if f.lower().endswith(('.jpg', '.jpeg', '.heic')) and "-edited" not in f.lower():
            image_files.append(os.path.join(dirpath, f))

# === Processing ===
Image.init()
coords_seen = {}
records_ok = []
records_missing = []

for in_path in tqdm(image_files, desc="\U0001f4e6 –û–±—Ä–∞–±–æ—Ç–∫–∞ –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–π", ncols=80):
    source_filename = os.path.basename(in_path)
    ext = os.path.splitext(source_filename)[1].lower()
    album = os.path.basename(os.path.dirname(in_path))
    source_type = None
    lat, lon, date_str = None, None, None

    try:
        if ext in [".jpg", ".jpeg"]:
            gps, date_str = extract_gps_from_exif(in_path)
            if gps:
                lat, lon = gps
                source_type = "exif"

        if lat is None or lon is None:
            json_path = find_json_path_for_image(in_path)
            if json_path:
                gps_json, json_date = extract_gps_from_json(json_path)
                if gps_json is not None:
                    try:
                        lat = float(gps_json[0])
                        lon = float(gps_json[1])
                        source_type = (source_type or "") + "+json"
                        if ext == ".heic":
                            print(f"[HEIC] –ù–∞–π–¥–µ–Ω JSON ‚Üí lat: {lat}, lon: {lon}, file: {source_filename}")
                    except:
                        lat, lon = None, None
                if not date_str and json_date:
                    date_str = json_date

        year, month, day = parse_date_components(date_str)
        base_name = f"IMG_{year}{month}{day}_{abs(hash(source_filename)) % 10**6}.jpg"
        out_path = os.path.join(OUT_FOLDER, base_name)

        if ext == ".heic":
            print(f"[HEIC] –ö–æ–Ω–≤–µ—Ä—Ç–∞—Ü–∏—è: {source_filename} ‚Üí {base_name}")
            temp_path = "/tmp/temp_output.jpg"
            result = subprocess.run([
                "sips", "-s", "format", "jpeg", in_path, "--out", temp_path
            ], capture_output=True, text=True)

            if result.returncode == 0 and os.path.exists(temp_path):
                image = Image.open(temp_path)
                width, height = image.size
                new_size = (int(width * 0.8), int(height * 0.8))
                image = image.resize(new_size, Image.LANCZOS)
                image.convert("RGB").save(out_path, "JPEG", quality=85)
                os.remove(temp_path)
            else:
                print(f"[HEIC] ‚ùå –û—à–∏–±–∫–∞ sips: {result.stderr.strip()}")
                Image.new("RGB", (800, 600), (128, 128, 128)).save(out_path, "JPEG", quality=85)
                source_type = (source_type or "") + "+heic-error"

        else:
            image = Image.open(in_path)
            width, height = image.size
            new_size = (int(width * 0.8), int(height * 0.8))
            image = image.resize(new_size, Image.LANCZOS)
            image.convert("RGB").save(out_path, "JPEG", quality=85)

        row = {
            'filename': base_name,
            'folder': os.path.basename(OUT_FOLDER),
            'latitude': lat,
            'longitude': lon,
            'year': year,
            'month': month,
            'day': day,
            'album': album,
            'source_filename': source_filename,
            'source_type': source_type
        }

        if lat in (None, 0.0) or lon in (None, 0.0):
            row['latitude'] = None
            row['longitude'] = None
            records_missing.append(row)
        else:
            key = (round(lat, 6), round(lon, 6))
            count = coords_seen.get(key, 0)
            if count > 0:
                row['latitude'], row['longitude'] = spiral_coords(lat, lon, count)
            coords_seen[key] = count + 1
            records_ok.append(row)

    except Exception as e:
        print(f"‚ùå –û—à–∏–±–∫–∞ –ø—Ä–∏ –æ–±—Ä–∞–±–æ—Ç–∫–µ {source_filename}: {e}")
        year, month, day = "2099", "01", "01"
        base_name = f"IMG_{year}{month}{day}_{abs(hash(source_filename)) % 10**6}.jpg"
        records_missing.append({
            'filename': base_name,
            'folder': os.path.basename(OUT_FOLDER),
            'latitude': None,
            'longitude': None,
            'year': year,
            'month': month,
            'day': day,
            'album': album,
            'source_filename': source_filename,
            'source_type': None
        })

# === Save CSVs ===
pd.DataFrame(records_ok).to_csv(CSV_MAIN, index=False)
pd.DataFrame(records_missing).to_csv(CSV_MISSING, index=False)

# === Summary ===
print(f"\n\U0001f4ca –°–≤–æ–¥–∫–∞:")
print(f"\U0001f9ee –í—Å–µ–≥–æ –æ–±—Ä–∞–±–æ—Ç–∞–Ω–æ: {len(records_ok) + len(records_missing)}")
print(f"‚úÖ –° –∫–æ–æ—Ä–¥–∏–Ω–∞—Ç–∞–º–∏:   {len(records_ok)}")
print(f"‚ùå –ë–µ–∑ –∫–æ–æ—Ä–¥–∏–Ω–∞—Ç:    {len(records_missing)}")
print(f"üìÑ CSV-—Ñ–∞–π–ª—ã:        {CSV_MAIN}, {CSV_MISSING}")
print(f"üìÅ –ò–∑–æ–±—Ä–∞–∂–µ–Ω–∏—è:      {OUT_FOLDER}")

üì¶ –û–±—Ä–∞–±–æ—Ç–∫–∞ –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–π:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã     | 1427/2431 [06:35<05:51,  2.86it/s]

[HEIC] –ù–∞–π–¥–µ–Ω JSON ‚Üí lat: 50.0842092, lon: 14.4240219, file: IMG_1894.HEIC
[HEIC] –ö–æ–Ω–≤–µ—Ä—Ç–∞—Ü–∏—è: IMG_1894.HEIC ‚Üí IMG_20220501_332311.jpg


üì¶ –û–±—Ä–∞–±–æ—Ç–∫–∞ –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–π:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 1546/2431 [07:11<03:42,  3.98it/s]

[HEIC] –ù–∞–π–¥–µ–Ω JSON ‚Üí lat: 50.082902600000004, lon: 14.422433299999998, file: IMG_1899.HEIC
[HEIC] –ö–æ–Ω–≤–µ—Ä—Ç–∞—Ü–∏—è: IMG_1899.HEIC ‚Üí IMG_20220501_71799.jpg


üì¶ –û–±—Ä–∞–±–æ—Ç–∫–∞ –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–π:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 1694/2431 [07:56<03:47,  3.24it/s]

[HEIC] –ù–∞–π–¥–µ–Ω JSON ‚Üí lat: 50.0877207, lon: 14.4277908, file: IMG_1811.HEIC
[HEIC] –ö–æ–Ω–≤–µ—Ä—Ç–∞—Ü–∏—è: IMG_1811.HEIC ‚Üí IMG_20220501_135512.jpg


üì¶ –û–±—Ä–∞–±–æ—Ç–∫–∞ –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–π:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 1790/2431 [08:25<02:58,  3.60it/s]

[HEIC] –ù–∞–π–¥–µ–Ω JSON ‚Üí lat: 50.0864771, lon: 14.411436600000002, file: IMG_2005.HEIC
[HEIC] –ö–æ–Ω–≤–µ—Ä—Ç–∞—Ü–∏—è: IMG_2005.HEIC ‚Üí IMG_20220501_551842.jpg


üì¶ –û–±—Ä–∞–±–æ—Ç–∫–∞ –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–π:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 1810/2431 [08:31<03:08,  3.29it/s]

[HEIC] –ù–∞–π–¥–µ–Ω JSON ‚Üí lat: 50.075538099999996, lon: 14.437800500000002, file: IMG_1890.HEIC
[HEIC] –ö–æ–Ω–≤–µ—Ä—Ç–∞—Ü–∏—è: IMG_1890.HEIC ‚Üí IMG_20220501_429680.jpg


üì¶ –û–±—Ä–∞–±–æ—Ç–∫–∞ –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–π:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 1846/2431 [08:41<02:56,  3.31it/s]

[HEIC] –ù–∞–π–¥–µ–Ω JSON ‚Üí lat: 50.0861017, lon: 14.416173599999997, file: IMG_1901.HEIC
[HEIC] –ö–æ–Ω–≤–µ—Ä—Ç–∞—Ü–∏—è: IMG_1901.HEIC ‚Üí IMG_20220501_435404.jpg


üì¶ –û–±—Ä–∞–±–æ—Ç–∫–∞ –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–π: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2431/2431 [10:46<00:00,  3.76it/s]


üìä –°–≤–æ–¥–∫–∞:
üßÆ –í—Å–µ–≥–æ –æ–±—Ä–∞–±–æ—Ç–∞–Ω–æ: 2431
‚úÖ –° –∫–æ–æ—Ä–¥–∏–Ω–∞—Ç–∞–º–∏:   2424
‚ùå –ë–µ–∑ –∫–æ–æ—Ä–¥–∏–Ω–∞—Ç:    7
üìÑ CSV-—Ñ–∞–π–ª—ã:        /Users/mloktionov/PycharmProjects/PhotoMaps/csv/photos.csv, /Users/mloktionov/PycharmProjects/PhotoMaps/csv/missing_coords.csv
üìÅ –ò–∑–æ–±—Ä–∞–∂–µ–Ω–∏—è:      /Users/mloktionov/PycharmProjects/PhotoMaps/photos





In [None]:
# ---- STEP 3. Getting descriptions from Google photos via API

In [36]:
import csv
import os
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build_from_document
from tqdm import tqdm

# === –ù–∞—Å—Ç—Ä–æ–π–∫–∏ ===
SCOPES = ['https://www.googleapis.com/auth/photoslibrary.readonly']
OUTPUT_CSV = 'csv/photos_descriptions.csv'
ALBUM_NAMES = [
    "PhotoMap 2022-2025", "PhotoMap 2019-2021", "PhotoMap 2017-2019",
    "PhotoMap 2014-2016", "PhotoMap 2010-2013", "PhotoMap 08-09"
]

# === –ê—É—Ç–µ–Ω—Ç–∏—Ñ–∏–∫–∞—Ü–∏—è ===
flow = InstalledAppFlow.from_client_secrets_file('client_secrets.json', SCOPES)
credentials = flow.run_local_server(port=0)

# === –ß—Ç–µ–Ω–∏–µ discovery JSON ===
with open("photoslibrary_v1_discovery.json", "r") as f:
    discovery_doc = f.read()

service = build_from_document(discovery_doc, credentials=credentials)

# === –ü–æ–∏—Å–∫ –∞–ª—å–±–æ–º–æ–≤ ===
print("üîç –ü–æ–∏—Å–∫ –∞–ª—å–±–æ–º–æ–≤ –≤ Google Photos...")
albums = []
nextPageToken = None

while True:
    response = service.albums().list(pageSize=50, pageToken=nextPageToken).execute()
    albums.extend(response.get('albums', []))
    nextPageToken = response.get('nextPageToken')
    if not nextPageToken:
        break

target_albums = {album['title']: album['id'] for album in albums if album['title'] in ALBUM_NAMES}

# === –ü—Ä–æ–≤–µ—Ä–∫–∞ –Ω–∞–π–¥–µ–Ω–Ω—ã—Ö –∞–ª—å–±–æ–º–æ–≤ ===
if not target_albums:
    print("‚ùå –ù–µ –Ω–∞–π–¥–µ–Ω—ã —É–∫–∞–∑–∞–Ω–Ω—ã–µ –∞–ª—å–±–æ–º—ã –≤ Google Photos")
else:
    print(f"‚úÖ –ù–∞–π–¥–µ–Ω—ã –∞–ª—å–±–æ–º—ã: {list(target_albums.keys())}")

# === –ó–∞–ø–∏—Å—å –æ–ø–∏—Å–∞–Ω–∏–π ===
os.makedirs('csv', exist_ok=True)
with open(OUTPUT_CSV, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['album', 'source_filename', 'description'])

    for album_name, album_id in target_albums.items():
        print(f"\nüìÇ –ê–ª—å–±–æ–º: {album_name}")
        nextPageToken = None

        with tqdm(desc=f"–ó–∞–≥—Ä—É–∑–∫–∞ –∏–∑ {album_name}", unit="—Ñ–æ—Ç–æ", leave=True) as pbar:
            while True:
                results = service.mediaItems().search(
                    body={"albumId": album_id, "pageSize": 100, "pageToken": nextPageToken}
                ).execute()

                items = results.get('mediaItems', [])
                nextPageToken = results.get('nextPageToken')

                if not items:
                    break

                for item in items:
                    description = item.get('description')
                    filename = item.get('filename')

                    if description and filename:
                        writer.writerow([album_name, filename, description])

                pbar.update(len(items))

                if not nextPageToken:
                    break

    print(f"\n‚úÖ –û–ø–∏—Å–∞–Ω–∏—è —É—Å–ø–µ—à–Ω–æ —Å–æ—Ö—Ä–∞–Ω–µ–Ω—ã –≤ {OUTPUT_CSV}")

Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=849244129200-hdfqohk1rs46hjekajgu7pa4jqrn9sqj.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A60939%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fphotoslibrary.readonly&state=KCVP37DvJmxZEqfefrdyKQwSp3XioA&access_type=offline
üîç –ü–æ–∏—Å–∫ –∞–ª—å–±–æ–º–æ–≤ –≤ Google Photos...
‚úÖ –ù–∞–π–¥–µ–Ω—ã –∞–ª—å–±–æ–º—ã: ['PhotoMap 2022-2025', 'PhotoMap 2019-2021', 'PhotoMap 2017-2019', 'PhotoMap 2014-2016', 'PhotoMap 2010-2013', 'PhotoMap 08-09']

üìÇ –ê–ª—å–±–æ–º: PhotoMap 2022-2025


–ó–∞–≥—Ä—É–∑–∫–∞ –∏–∑ PhotoMap 2022-2025: 653—Ñ–æ—Ç–æ [00:06, 107.97—Ñ–æ—Ç–æ/s]



üìÇ –ê–ª—å–±–æ–º: PhotoMap 2019-2021


–ó–∞–≥—Ä—É–∑–∫–∞ –∏–∑ PhotoMap 2019-2021: 784—Ñ–æ—Ç–æ [00:08, 96.14—Ñ–æ—Ç–æ/s] 



üìÇ –ê–ª—å–±–æ–º: PhotoMap 2017-2019


–ó–∞–≥—Ä—É–∑–∫–∞ –∏–∑ PhotoMap 2017-2019: 249—Ñ–æ—Ç–æ [00:02, 101.87—Ñ–æ—Ç–æ/s]



üìÇ –ê–ª—å–±–æ–º: PhotoMap 2014-2016


–ó–∞–≥—Ä—É–∑–∫–∞ –∏–∑ PhotoMap 2014-2016: 381—Ñ–æ—Ç–æ [00:03, 102.93—Ñ–æ—Ç–æ/s]



üìÇ –ê–ª—å–±–æ–º: PhotoMap 2010-2013


–ó–∞–≥—Ä—É–∑–∫–∞ –∏–∑ PhotoMap 2010-2013: 257—Ñ–æ—Ç–æ [00:02, 111.37—Ñ–æ—Ç–æ/s]



üìÇ –ê–ª—å–±–æ–º: PhotoMap 08-09


–ó–∞–≥—Ä—É–∑–∫–∞ –∏–∑ PhotoMap 08-09: 109—Ñ–æ—Ç–æ [00:01, 67.24—Ñ–æ—Ç–æ/s]


‚úÖ –û–ø–∏—Å–∞–Ω–∏—è —É—Å–ø–µ—à–Ω–æ —Å–æ—Ö—Ä–∞–Ω–µ–Ω—ã –≤ csv/photos_descriptions.csv





In [39]:
# --- STEP 4. here add coordinates to missing_coords.csv
# --- and then run the script!

In [None]:
# --- STEP 5. Run the script to merge all csv into one containing file names, coords, dates and descriptions

In [42]:
import os
import pandas as pd

# === –ü—É—Ç–∏ ===
CSV_FOLDER = "csv"
MAIN_CSV = os.path.join(CSV_FOLDER, "photos.csv")
MISSING_CSV = os.path.join(CSV_FOLDER, "missing_coords.csv")
DESCRIPTIONS_CSV = os.path.join(CSV_FOLDER, "photos_descriptions.csv")
FINAL_CSV = os.path.join(CSV_FOLDER, "photos_combined.csv")

# === –ó–∞–≥—Ä—É–∑–∫–∞ —Ñ–∞–π–ª–æ–≤ ===
print("üì• –ó–∞–≥—Ä—É–∑–∫–∞ CSV...")
df_main = pd.read_csv(MAIN_CSV)
df_missing = pd.read_csv(MISSING_CSV)
df_desc = pd.read_csv(DESCRIPTIONS_CSV)

# === –û–±—ä–µ–¥–∏–Ω–µ–Ω–∏–µ main –∏ missing ===
df_all = pd.concat([df_main, df_missing], ignore_index=True)

# === –ü–æ–¥–≥–æ—Ç–æ–≤–∫–∞ –¥–ª—è merge –ø–æ –∫–ª—é—á—É: album + source_filename ===
df_all["merge_key"] = df_all["album"].str.strip() + "/" + df_all["source_filename"].str.strip()
df_desc["merge_key"] = df_desc["album"].str.strip() + "/" + df_desc["source_filename"].str.strip()

# === –°–ª–∏—è–Ω–∏–µ –æ–ø–∏—Å–∞–Ω–∏–π ===
df_all = df_all.merge(
    df_desc[["merge_key", "description"]],
    on="merge_key",
    how="left"
)

# === –û—á–∏—Å—Ç–∫–∞ –∏ —Ñ–∏–Ω–∞–ª—å–Ω–∞—è –∑–∞–ø–∏—Å—å ===
df_all.drop(columns=["merge_key"], inplace=True)
df_all.to_csv(FINAL_CSV, index=False)

print("\n‚úÖ –û–±—ä–µ–¥–∏–Ω—ë–Ω–Ω—ã–π CSV —Å–æ—Ö—Ä–∞–Ω—ë–Ω:", FINAL_CSV)
print("üìÑ –°—Ç—Ä–æ–∫: ", len(df_all))
print("üìù –ö–æ–ª–æ–Ω–æ–∫: ", list(df_all.columns))


üì• –ó–∞–≥—Ä—É–∑–∫–∞ CSV...

‚úÖ –û–±—ä–µ–¥–∏–Ω—ë–Ω–Ω—ã–π CSV —Å–æ—Ö—Ä–∞–Ω—ë–Ω: csv/photos_combined.csv
üìÑ –°—Ç—Ä–æ–∫:  2431
üìù –ö–æ–ª–æ–Ω–æ–∫:  ['filename', 'folder', 'latitude', 'longitude', 'year', 'month', 'day', 'album', 'source_filename', 'source_type', 'description']


In [None]:
# --- STEP 6. Upload files from photos/ to Google Drive/photos

In [None]:
# --- Script deletes all files in Google Drive/Photos
# --- Script uploads all files from photos/ to Google/Photos/

In [47]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from tqdm import tqdm
import os

# === –ê–≤—Ç–æ—Ä–∏–∑–∞—Ü–∏—è –≤ Google Drive
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
drive = GoogleDrive(gauth)

# === ID —Ü–µ–ª–µ–≤–æ–π –ø–∞–ø–∫–∏ –Ω–∞ Google Drive
FOLDER_ID = '1em81MElkxnaue5r92e9hPGezgOuw4nRL'
LOCAL_PHOTOS_FOLDER = 'photos/'

# === –û—á–∏—Å—Ç–∫–∞ –ø–∞–ø–∫–∏ –Ω–∞ Google Drive
print("üßπ –û—á–∏—Å—Ç–∫–∞ –ø–∞–ø–∫–∏ –Ω–∞ Google Drive...")
file_list = drive.ListFile({
    'q': f"'{FOLDER_ID}' in parents and trashed=false"
}).GetList()

for f in tqdm(file_list, desc="–£–¥–∞–ª–µ–Ω–∏–µ —Ñ–∞–π–ª–æ–≤"):
    f.Delete()

print("‚úÖ –ü–∞–ø–∫–∞ –æ—á–∏—â–µ–Ω–∞.")

# === –ó–∞–≥—Ä—É–∑–∫–∞ –Ω–æ–≤—ã—Ö —Ñ–∞–π–ª–æ–≤ –∏–∑ –ª–æ–∫–∞–ª—å–Ω–æ–π –ø–∞–ø–∫–∏
print("‚¨ÜÔ∏è  –ó–∞–≥—Ä—É–∑–∫–∞ –Ω–æ–≤—ã—Ö —Ñ–∞–π–ª–æ–≤ –≤ Google Drive...")
local_files = [f for f in os.listdir(LOCAL_PHOTOS_FOLDER) if f.lower().endswith(('.jpg', '.jpeg'))]

for fname in tqdm(local_files, desc="–ó–∞–≥—Ä—É–∑–∫–∞ —Ñ–∞–π–ª–æ–≤"):
    file_path = os.path.join(LOCAL_PHOTOS_FOLDER, fname)
    gfile = drive.CreateFile({
        'title': fname,
        'parents': [{'id': FOLDER_ID}]
    })
    gfile.SetContentFile(file_path)
    gfile.Upload()

print("‚úÖ –ó–∞–≥—Ä—É–∑–∫–∞ –∑–∞–≤–µ—Ä—à–µ–Ω–∞.")


Your browser has been opened to visit:

    https://accounts.google.com/o/oauth2/auth?client_id=849244129200-hdfqohk1rs46hjekajgu7pa4jqrn9sqj.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8080%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive&access_type=offline&response_type=code

Authentication successful.
üßπ –û—á–∏—Å—Ç–∫–∞ –ø–∞–ø–∫–∏ –Ω–∞ Google Drive...


–£–¥–∞–ª–µ–Ω–∏–µ —Ñ–∞–π–ª–æ–≤: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2431/2431 [23:06<00:00,  1.75it/s]


‚úÖ –ü–∞–ø–∫–∞ –æ—á–∏—â–µ–Ω–∞.
‚¨ÜÔ∏è  –ó–∞–≥—Ä—É–∑–∫–∞ –Ω–æ–≤—ã—Ö —Ñ–∞–π–ª–æ–≤ –≤ Google Drive...


–ó–∞–≥—Ä—É–∑–∫–∞ —Ñ–∞–π–ª–æ–≤: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2431/2431 [2:28:50<00:00,  3.67s/it]  

‚úÖ –ó–∞–≥—Ä—É–∑–∫–∞ –∑–∞–≤–µ—Ä—à–µ–Ω–∞.





In [None]:
# ---- STEP 7. Connect to Google Drive and get new filenames with their links

In [66]:
import os
import pandas as pd
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from tqdm import tqdm

# === –ù–∞—Å—Ç—Ä–æ–π–∫–∏ ===
FOLDER_ID = '1em81MElkxnaue5r92e9hPGezgOuw4nRL'
OUTPUT_CSV = 'csv/google_drive_links.csv'

# === –ê–≤—Ç–æ—Ä–∏–∑–∞—Ü–∏—è Google Drive ===
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
drive = GoogleDrive(gauth)

# === –ü–æ–ª—É—á–µ–Ω–∏–µ —Å–ø–∏—Å–∫–∞ —Ñ–∞–π–ª–æ–≤ ===
print("üîç –ü–æ–ª—É—á–∞–µ–º —Å–ø–∏—Å–æ–∫ —Ñ–∞–π–ª–æ–≤ –∏–∑ Google Drive...")
file_list = drive.ListFile({'q': f"'{FOLDER_ID}' in parents and trashed=false"}).GetList()

# === –û–±—Ä–∞–±–æ—Ç–∫–∞ —Å –ø—Ä–æ–≥—Ä–µ—Å—Å–æ–º ===
data = []
print(f"üìÅ –ù–∞–π–¥–µ–Ω–æ —Ñ–∞–π–ª–æ–≤: {len(file_list)}")

for file in tqdm(file_list, desc="üì¶ –û–±—Ä–∞–±–æ—Ç–∫–∞ —Ñ–∞–π–ª–æ–≤", unit="—Ñ–∞–π–ª"):
    filename = file['title']
    file_id = file['id']
    view_url = f"https://drive.google.com/uc?export=view&id={file_id}"
    data.append({
        'filename': filename,
        'file_id': file_id,
        'link': view_url
    })

# === –°–æ—Ö—Ä–∞–Ω—è–µ–º CSV ===
os.makedirs('csv', exist_ok=True)
df = pd.DataFrame(data)
df.to_csv(OUTPUT_CSV, index=False)

print(f"\n‚úÖ –°—Å—ã–ª–∫–∏ —É—Å–ø–µ—à–Ω–æ —Å–æ—Ö—Ä–∞–Ω–µ–Ω—ã –≤ {OUTPUT_CSV}")

Your browser has been opened to visit:

    https://accounts.google.com/o/oauth2/auth?client_id=849244129200-hdfqohk1rs46hjekajgu7pa4jqrn9sqj.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8080%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive&access_type=offline&response_type=code

Authentication successful.
üîç –ü–æ–ª—É—á–∞–µ–º —Å–ø–∏—Å–æ–∫ —Ñ–∞–π–ª–æ–≤ –∏–∑ Google Drive...
üìÅ –ù–∞–π–¥–µ–Ω–æ —Ñ–∞–π–ª–æ–≤: 2431


üì¶ –û–±—Ä–∞–±–æ—Ç–∫–∞ —Ñ–∞–π–ª–æ–≤: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2431/2431 [00:00<00:00, 545814.09—Ñ–∞–π–ª/s]


‚úÖ –°—Å—ã–ª–∫–∏ —É—Å–ø–µ—à–Ω–æ —Å–æ—Ö—Ä–∞–Ω–µ–Ω—ã –≤ csv/google_drive_links.csv





In [None]:
# --- STEP 8. Get all info from photos_combined.csv and google_drive_links.csv into photos_final.csv


In [69]:
import os
import pandas as pd

# === –ü—É—Ç–∏ ===
CSV_FOLDER = "csv"
COMBINED_CSV = os.path.join(CSV_FOLDER, "photos_combined.csv")
LINKS_CSV = os.path.join(CSV_FOLDER, "google_drive_links.csv")
FINAL_CSV = os.path.join(CSV_FOLDER, "photos_final.csv")

# === –ó–∞–≥—Ä—É–∑–∫–∞ –¥–∞–Ω–Ω—ã—Ö ===
print("üì• –ó–∞–≥—Ä—É–∑–∫–∞ CSV...")
df_combined = pd.read_csv(COMBINED_CSV)
df_links = pd.read_csv(LINKS_CSV)

# === –û–±—ä–µ–¥–∏–Ω–µ–Ω–∏–µ –ø–æ filename ===
df_final = df_combined.merge(df_links[['filename', 'link']], on='filename', how='left')

# === –°–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ –∏—Ç–æ–≥–æ–≤–æ–≥–æ —Ñ–∞–π–ª–∞ ===
df_final.to_csv(FINAL_CSV, index=False)

# === –û—Ç—á—ë—Ç ===
print("\n‚úÖ –ò—Ç–æ–≥–æ–≤—ã–π CSV —Å–æ—Ö—Ä–∞–Ω—ë–Ω:", FINAL_CSV)
print("üìÑ –°—Ç—Ä–æ–∫: ", len(df_final))
print("üß∑ –ö–æ–ª–æ–Ω–∫–∏: ", list(df_final.columns))


üì• –ó–∞–≥—Ä—É–∑–∫–∞ CSV...

‚úÖ –ò—Ç–æ–≥–æ–≤—ã–π CSV —Å–æ—Ö—Ä–∞–Ω—ë–Ω: csv/photos_final.csv
üìÑ –°—Ç—Ä–æ–∫:  2431
üß∑ –ö–æ–ª–æ–Ω–∫–∏:  ['filename', 'folder', 'latitude', 'longitude', 'year', 'month', 'day', 'album', 'source_filename', 'source_type', 'description', 'link']


In [None]:
# --- Step 9. Put all info from photos_final.csv into photos.geojson

In [86]:
import os
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

# === –ü—É—Ç–∏ ===
CSV_FOLDER = "csv"
GEOJSON_FOLDER = "geojson"
FINAL_CSV = os.path.join(CSV_FOLDER, "photos_final.csv")
GEOJSON_OUTPUT = os.path.join(GEOJSON_FOLDER, "photos.geojson")

# === –ó–∞–≥—Ä—É–∑–∫–∞ –¥–∞–Ω–Ω—ã—Ö ===
df = pd.read_csv(FINAL_CSV)

# === –§–∏–ª—å—Ç—Ä–∞—Ü–∏—è —Ç–æ—á–µ–∫ —Å –∫–æ–æ—Ä–¥–∏–Ω–∞—Ç–∞–º–∏ ===
df_geo = df[df['latitude'].notnull() & df['longitude'].notnull()].copy()

# === –°–æ–∑–¥–∞–Ω–∏–µ –≥–µ–æ–º–µ—Ç—Ä–∏–∏ ===
df_geo['geometry'] = df_geo.apply(lambda row: Point(float(row['longitude']), float(row['latitude'])), axis=1)

# === –î–æ–±–∞–≤–ª–µ–Ω–∏–µ image –∏ fullname –≤–º–µ—Å—Ç–æ link ===
def make_drive_urls(link):
    if isinstance(link, str) and "id=" in link:
        file_id = link.split("id=")[-1]
        return pd.Series({
            "image": f"https://drive.google.com/thumbnail?id={file_id}",
            "fullname": f"https://drive.google.com/uc?export=view&id={file_id}"
        })
    return pd.Series({"image": None, "fullname": None})

url_cols = df_geo["link"].apply(make_drive_urls)
df_geo = pd.concat([df_geo.drop(columns=["link"]), url_cols], axis=1)

# === –ü—Ä–µ–æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏–µ –≤ GeoDataFrame –∏ —Å–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ ===
gdf = gpd.GeoDataFrame(df_geo, geometry='geometry', crs='EPSG:4326')
os.makedirs(GEOJSON_FOLDER, exist_ok=True)
gdf.to_file(GEOJSON_OUTPUT, driver='GeoJSON')

# === –û—Ç—á—ë—Ç ===
print("\nüåç GeoJSON —É—Å–ø–µ—à–Ω–æ –æ–±–Ω–æ–≤–ª—ë–Ω:", GEOJSON_OUTPUT)
print("üìç –¢–æ—á–µ–∫: ", len(gdf))
print("üìÑ –ö–æ–ª–æ–Ω–∫–∏: ", list(gdf.columns))



üåç GeoJSON —É—Å–ø–µ—à–Ω–æ –æ–±–Ω–æ–≤–ª—ë–Ω: geojson/photos.geojson
üìç –¢–æ—á–µ–∫:  2431
üìÑ –ö–æ–ª–æ–Ω–∫–∏:  ['filename', 'folder', 'latitude', 'longitude', 'year', 'month', 'day', 'album', 'source_filename', 'source_type', 'description', 'geometry', 'image', 'fullname']


In [None]:
# --- STEP 10. Get country code from nominate site into photos.geojson

In [92]:
import json
import requests
import time
from tqdm import tqdm

GEOJSON_FILE = "geojson/photos.geojson"
NOMINATIM_URL = "https://nominatim.openstreetmap.org/reverse"
DELAY = 2  # —Å–µ–∫—É–Ω–¥—ã –º–µ–∂–¥—É –∑–∞–ø—Ä–æ—Å–∞–º–∏

HEADERS = {
    'User-Agent': 'PhotoMapsApp (bizbur08@gmail.com)'  # üîß –£–∫–∞–∂–∏ —Å–≤–æ–π email!
}

# ‚Äî –ö—ç—à –∫–æ–æ—Ä–¥–∏–Ω–∞—Ç, —á—Ç–æ–±—ã –Ω–µ –¥—É–±–ª–∏—Ä–æ–≤–∞—Ç—å –∑–∞–ø—Ä–æ—Å—ã
country_cache = {}

def get_country_code(lat, lon):
    """
    –ü–æ–ª—É—á–µ–Ω–∏–µ –∫–æ–¥–∞ —Å—Ç—Ä–∞–Ω—ã –ø–æ –∫–æ–æ—Ä–¥–∏–Ω–∞—Ç–∞–º —á–µ—Ä–µ–∑ Nominatim API
    """
    key = f"{lat:.4f},{lon:.4f}"
    if key in country_cache:
        return country_cache[key]

    try:
        response = requests.get(NOMINATIM_URL, params={
            'format': 'json',
            'lat': lat,
            'lon': lon
        }, headers=HEADERS)

        if response.status_code == 200:
            data = response.json()
            code = data.get('address', {}).get('country_code', '').upper()
            country_cache[key] = code if code else '??'
            return country_cache[key]
        else:
            print(f"‚ùå –û—à–∏–±–∫–∞ {response.status_code} –¥–ª—è {lat},{lon}")
            return '??'

    except Exception as e:
        print(f"‚ùå –û—à–∏–±–∫–∞ —Å–æ–µ–¥–∏–Ω–µ–Ω–∏—è –¥–ª—è {lat},{lon}: {e}")
        return '??'

def enrich_geojson():
    """
    –î–æ–±–∞–≤–ª–µ–Ω–∏–µ –∫–æ–¥–∞ —Å—Ç—Ä–∞–Ω—ã –≤ –∫–∞–∂–¥—É—é —Ç–æ—á–∫—É GeoJSON
    """
    with open(GEOJSON_FILE, 'r', encoding='utf-8') as f:
        geojson_data = json.load(f)

    for feature in tqdm(geojson_data["features"], desc="üåç –û–±—Ä–∞–±–æ—Ç–∫–∞ —Ç–æ—á–µ–∫"):
        lat, lon = feature["geometry"]["coordinates"][1], feature["geometry"]["coordinates"][0]
        country_code = get_country_code(lat, lon)
        feature["properties"]["country_code"] = country_code
        time.sleep(DELAY)

    with open(GEOJSON_FILE, 'w', encoding='utf-8') as f:
        json.dump(geojson_data, f, indent=2, ensure_ascii=False)

    print(f"\n‚úÖ –§–∞–π–ª {GEOJSON_FILE} –æ–±–Ω–æ–≤–ª—ë–Ω —Å country_code")

# === –ó–∞–ø—É—Å–∫ ===
if __name__ == "__main__":
    enrich_geojson()

üåç –û–±—Ä–∞–±–æ—Ç–∫–∞ —Ç–æ—á–µ–∫: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2431/2431 [1:31:56<00:00,  2.27s/it]


‚úÖ –§–∞–π–ª geojson/photos.geojson –æ–±–Ω–æ–≤–ª—ë–Ω —Å country_code



