# Pipeline:

1. Check stats for images folder and included albums
2. Run MAIN script. It will create photos/ folder with photos, encoded from different formats from images/ folder. It will also create photos.csv with census of files and missing_coords.csv with the ones which do not have any coords. Source files (jpg, jpeg, heic) are transformed into jpg files according to the template IMG_YYYYMMDD_has.jpg
3. Run a script to pick up Description field from Google Photos. It uses Album+source_filename to identify photo. It records the results into photos_description.csv
4. Add coordinates to missing_coords.csv manually
5. Run the next script which will merge photos.csv+missing_coords.csv+photos_descriptions.csv into photos_cobmined.csv which contains the full list of all photos with their names, coordinates, dates and description. 
6. Upload files from photos/ to Google Drive/photos
7. Connect to Google Drive and get new filenames with their links into google_drive_links.csv
8. Get all info from photos_combined.csv and google_drive_links.csv into photos_final.csv
9. Put all info from photos_final.csv into photos.geojson
10. Get country code from nominate site into photos.geojson

In [None]:
#- STEP 1. Check stats summary

In [None]:
import os
import re
from collections import defaultdict

# === Папка с альбомами ===
ROOT_DIR = "images"
album_dirs = [d for d in os.listdir(ROOT_DIR) if os.path.isdir(os.path.join(ROOT_DIR, d))]

# === Инициализация счётчиков ===
stats = {}
total = defaultdict(int)

# === Шаблоны ===
pattern_copy = re.compile(r"\s\(\d+\)| copy", re.IGNORECASE)

# === Анализ по альбомам ===
for album in album_dirs:
    folder_path = os.path.join(ROOT_DIR, album)
    files = os.listdir(folder_path)

    counts = defaultdict(int)

    for file in files:
        file_path = os.path.join(folder_path, file)
        if not os.path.isfile(file_path):
            continue

        ext = os.path.splitext(file)[1].lower()

        if ext in ['.jpg', '.jpeg', '.heic']:
            if '-edited' in file.lower():
                counts['edited'] += 1
            else:
                counts[ext] += 1
                if pattern_copy.search(file):
                    counts['renamed'] += 1
        elif ext == '.json':
            counts['json'] += 1
        else:
            counts['other'] += 1

    stats[album] = dict(counts)
    for k, v in counts.items():
        total[k] += v

# === Вывод статистики ===
print("\n📊 Статистика по альбомам:")
for album, counts in stats.items():
    print(f"\n📁 {album}:")
    for k, v in counts.items():
        print(f"  {k}: {v}")

# === Подсчёт общего количества пригодных фото ===
photo_exts = ['.jpg', '.jpeg', '.heic']
total_photos = sum(total[ext] for ext in photo_exts)
total_edited = total['edited']
usable_photos = total_photos  # editable уже исключены выше, не учитываются

print("\n📈 Итого по всем альбомам:")
for k, v in total.items():
    print(f"  {k}: {v}")

print(f"\n🧮 Всего фото-файлов (без -edited): {total_photos}")
print(f"✂️  Файлов с '-edited' в имени:        {total_edited}")
print(f"✅ Годных для обработки файлов:        {usable_photos}")

In [None]:
# --- STEP 2. MAIN script

In [28]:
import os
import re
import math
import json
import pandas as pd
import subprocess
from datetime import datetime
from PIL import Image, ExifTags
from tqdm import tqdm

# === Paths ===
ROOT = os.path.abspath(".")
SRC_FOLDER = os.path.join(ROOT, "images")
OUT_FOLDER = os.path.join(ROOT, "photos")
CSV_FOLDER = os.path.join(ROOT, "csv")
CSV_MAIN = os.path.join(CSV_FOLDER, "photos.csv")
CSV_MISSING = os.path.join(CSV_FOLDER, "missing_coords.csv")

os.makedirs(OUT_FOLDER, exist_ok=True)
os.makedirs(CSV_FOLDER, exist_ok=True)

# === Helpers ===
def convert_to_degrees(v):
    d, m, s = v
    return float(d) + float(m) / 60 + float(s) / 3600

def extract_gps_from_exif(image_path):
    try:
        image = Image.open(image_path)
        exif_data = image._getexif()
        if not exif_data:
            return None, None

        gps_info = {}
        date_str = None
        for tag, value in exif_data.items():
            decoded = ExifTags.TAGS.get(tag)
            if decoded == "GPSInfo":
                for t in value:
                    sub_decoded = ExifTags.GPSTAGS.get(t)
                    gps_info[sub_decoded] = value[t]
            elif decoded == "DateTimeOriginal":
                date_str = value

        if not gps_info:
            return None, date_str

        lat = convert_to_degrees(gps_info.get("GPSLatitude"))
        if gps_info.get("GPSLatitudeRef") == "S":
            lat = -lat
        lon = convert_to_degrees(gps_info.get("GPSLongitude"))
        if gps_info.get("GPSLongitudeRef") == "W":
            lon = -lon

        return (lat, lon), date_str
    except:
        return None, None

def extract_gps_from_json(json_path):
    try:
        with open(json_path, "r", encoding="utf-8") as f:
            data = json.load(f)
            geo = data.get("geoData")
            photo_taken_time = data.get("photoTakenTime", {}).get("timestamp")
            if geo:
                lat = geo.get("latitude")
                lon = geo.get("longitude")
                return (lat, lon), photo_taken_time
    except:
        pass
    return None, None

def find_json_path_for_image(image_path):
    base = os.path.basename(image_path)
    folder = os.path.dirname(image_path)

    suffixes = [
        ".supplemental-metadata.json",
        ".supplemental-meta.json",
        ".supplemental-metada.json",
        ".supplemental-metadat.json",
        ".supplemental-me.json"
    ]

    for suffix in suffixes:
        candidate = image_path + suffix
        if os.path.exists(candidate):
            return candidate

    match = re.match(r'^(.*)\((\d+)\)\.(jpg|jpeg|heic)$', base, re.IGNORECASE)
    if match:
        base_clean = match.group(1).strip()
        suffix_num = match.group(2)
        ext = match.group(3)
        for sfx in suffixes:
            sfx_with_index = sfx.replace(".json", f"({suffix_num}).json")
            alt_json = os.path.join(folder, f"{base_clean}.{ext}{sfx_with_index}")
            if os.path.exists(alt_json):
                return alt_json

    return None

def parse_date_components(date_str):
    try:
        if date_str and len(str(date_str)) == 10 and str(date_str).isdigit():
            dt = datetime.fromtimestamp(int(date_str))
        else:
            dt = datetime.strptime(date_str, "%Y:%m:%d %H:%M:%S")
        return str(dt.year), f"{dt.month:02d}", f"{dt.day:02d}"
    except:
        return "2099", "01", "01"

def spiral_coords(lat, lon, index, step=0.0002):
    angle = index * (math.pi / 3)
    radius = step * (1 + index // 6)
    return lat + radius * math.cos(angle), lon + radius * math.sin(angle)

# === Find all images ===
image_files = []
for dirpath, _, filenames in os.walk(SRC_FOLDER):
    for f in filenames:
        if f.lower().endswith(('.jpg', '.jpeg', '.heic')) and "-edited" not in f.lower():
            image_files.append(os.path.join(dirpath, f))

# === Processing ===
Image.init()
coords_seen = {}
records_ok = []
records_missing = []

for in_path in tqdm(image_files, desc="\U0001f4e6 Обработка изображений", ncols=80):
    source_filename = os.path.basename(in_path)
    ext = os.path.splitext(source_filename)[1].lower()
    album = os.path.basename(os.path.dirname(in_path))
    source_type = None
    lat, lon, date_str = None, None, None

    try:
        if ext in [".jpg", ".jpeg"]:
            gps, date_str = extract_gps_from_exif(in_path)
            if gps:
                lat, lon = gps
                source_type = "exif"

        if lat is None or lon is None:
            json_path = find_json_path_for_image(in_path)
            if json_path:
                gps_json, json_date = extract_gps_from_json(json_path)
                if gps_json is not None:
                    try:
                        lat = float(gps_json[0])
                        lon = float(gps_json[1])
                        source_type = (source_type or "") + "+json"
                        if ext == ".heic":
                            print(f"[HEIC] Найден JSON → lat: {lat}, lon: {lon}, file: {source_filename}")
                    except:
                        lat, lon = None, None
                if not date_str and json_date:
                    date_str = json_date

        year, month, day = parse_date_components(date_str)
        base_name = f"IMG_{year}{month}{day}_{abs(hash(source_filename)) % 10**6}.jpg"
        out_path = os.path.join(OUT_FOLDER, base_name)

        if ext == ".heic":
            print(f"[HEIC] Конвертация: {source_filename} → {base_name}")
            temp_path = "/tmp/temp_output.jpg"
            result = subprocess.run([
                "sips", "-s", "format", "jpeg", in_path, "--out", temp_path
            ], capture_output=True, text=True)

            if result.returncode == 0 and os.path.exists(temp_path):
                image = Image.open(temp_path)
                width, height = image.size
                new_size = (int(width * 0.8), int(height * 0.8))
                image = image.resize(new_size, Image.LANCZOS)
                image.convert("RGB").save(out_path, "JPEG", quality=85)
                os.remove(temp_path)
            else:
                print(f"[HEIC] ❌ Ошибка sips: {result.stderr.strip()}")
                Image.new("RGB", (800, 600), (128, 128, 128)).save(out_path, "JPEG", quality=85)
                source_type = (source_type or "") + "+heic-error"

        else:
            image = Image.open(in_path)
            width, height = image.size
            new_size = (int(width * 0.8), int(height * 0.8))
            image = image.resize(new_size, Image.LANCZOS)
            image.convert("RGB").save(out_path, "JPEG", quality=85)

        row = {
            'filename': base_name,
            'folder': os.path.basename(OUT_FOLDER),
            'latitude': lat,
            'longitude': lon,
            'year': year,
            'month': month,
            'day': day,
            'album': album,
            'source_filename': source_filename,
            'source_type': source_type
        }

        if lat in (None, 0.0) or lon in (None, 0.0):
            row['latitude'] = None
            row['longitude'] = None
            records_missing.append(row)
        else:
            key = (round(lat, 6), round(lon, 6))
            count = coords_seen.get(key, 0)
            if count > 0:
                row['latitude'], row['longitude'] = spiral_coords(lat, lon, count)
            coords_seen[key] = count + 1
            records_ok.append(row)

    except Exception as e:
        print(f"❌ Ошибка при обработке {source_filename}: {e}")
        year, month, day = "2099", "01", "01"
        base_name = f"IMG_{year}{month}{day}_{abs(hash(source_filename)) % 10**6}.jpg"
        records_missing.append({
            'filename': base_name,
            'folder': os.path.basename(OUT_FOLDER),
            'latitude': None,
            'longitude': None,
            'year': year,
            'month': month,
            'day': day,
            'album': album,
            'source_filename': source_filename,
            'source_type': None
        })

# === Save CSVs ===
pd.DataFrame(records_ok).to_csv(CSV_MAIN, index=False)
pd.DataFrame(records_missing).to_csv(CSV_MISSING, index=False)

# === Summary ===
print(f"\n\U0001f4ca Сводка:")
print(f"\U0001f9ee Всего обработано: {len(records_ok) + len(records_missing)}")
print(f"✅ С координатами:   {len(records_ok)}")
print(f"❌ Без координат:    {len(records_missing)}")
print(f"📄 CSV-файлы:        {CSV_MAIN}, {CSV_MISSING}")
print(f"📁 Изображения:      {OUT_FOLDER}")

📦 Обработка изображений:  59%|███████▋     | 1427/2431 [06:35<05:51,  2.86it/s]

[HEIC] Найден JSON → lat: 50.0842092, lon: 14.4240219, file: IMG_1894.HEIC
[HEIC] Конвертация: IMG_1894.HEIC → IMG_20220501_332311.jpg


📦 Обработка изображений:  64%|████████▎    | 1546/2431 [07:11<03:42,  3.98it/s]

[HEIC] Найден JSON → lat: 50.082902600000004, lon: 14.422433299999998, file: IMG_1899.HEIC
[HEIC] Конвертация: IMG_1899.HEIC → IMG_20220501_71799.jpg


📦 Обработка изображений:  70%|█████████    | 1694/2431 [07:56<03:47,  3.24it/s]

[HEIC] Найден JSON → lat: 50.0877207, lon: 14.4277908, file: IMG_1811.HEIC
[HEIC] Конвертация: IMG_1811.HEIC → IMG_20220501_135512.jpg


📦 Обработка изображений:  74%|█████████▌   | 1790/2431 [08:25<02:58,  3.60it/s]

[HEIC] Найден JSON → lat: 50.0864771, lon: 14.411436600000002, file: IMG_2005.HEIC
[HEIC] Конвертация: IMG_2005.HEIC → IMG_20220501_551842.jpg


📦 Обработка изображений:  74%|█████████▋   | 1810/2431 [08:31<03:08,  3.29it/s]

[HEIC] Найден JSON → lat: 50.075538099999996, lon: 14.437800500000002, file: IMG_1890.HEIC
[HEIC] Конвертация: IMG_1890.HEIC → IMG_20220501_429680.jpg


📦 Обработка изображений:  76%|█████████▊   | 1846/2431 [08:41<02:56,  3.31it/s]

[HEIC] Найден JSON → lat: 50.0861017, lon: 14.416173599999997, file: IMG_1901.HEIC
[HEIC] Конвертация: IMG_1901.HEIC → IMG_20220501_435404.jpg


📦 Обработка изображений: 100%|█████████████| 2431/2431 [10:46<00:00,  3.76it/s]


📊 Сводка:
🧮 Всего обработано: 2431
✅ С координатами:   2424
❌ Без координат:    7
📄 CSV-файлы:        /Users/mloktionov/PycharmProjects/PhotoMaps/csv/photos.csv, /Users/mloktionov/PycharmProjects/PhotoMaps/csv/missing_coords.csv
📁 Изображения:      /Users/mloktionov/PycharmProjects/PhotoMaps/photos





In [None]:
# ---- STEP 3. Getting descriptions from Google photos via API

In [36]:
import csv
import os
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build_from_document
from tqdm import tqdm

# === Настройки ===
SCOPES = ['https://www.googleapis.com/auth/photoslibrary.readonly']
OUTPUT_CSV = 'csv/photos_descriptions.csv'
ALBUM_NAMES = [
    "PhotoMap 2022-2025", "PhotoMap 2019-2021", "PhotoMap 2017-2019",
    "PhotoMap 2014-2016", "PhotoMap 2010-2013", "PhotoMap 08-09"
]

# === Аутентификация ===
flow = InstalledAppFlow.from_client_secrets_file('client_secrets.json', SCOPES)
credentials = flow.run_local_server(port=0)

# === Чтение discovery JSON ===
with open("photoslibrary_v1_discovery.json", "r") as f:
    discovery_doc = f.read()

service = build_from_document(discovery_doc, credentials=credentials)

# === Поиск альбомов ===
print("🔍 Поиск альбомов в Google Photos...")
albums = []
nextPageToken = None

while True:
    response = service.albums().list(pageSize=50, pageToken=nextPageToken).execute()
    albums.extend(response.get('albums', []))
    nextPageToken = response.get('nextPageToken')
    if not nextPageToken:
        break

target_albums = {album['title']: album['id'] for album in albums if album['title'] in ALBUM_NAMES}

# === Проверка найденных альбомов ===
if not target_albums:
    print("❌ Не найдены указанные альбомы в Google Photos")
else:
    print(f"✅ Найдены альбомы: {list(target_albums.keys())}")

# === Запись описаний ===
os.makedirs('csv', exist_ok=True)
with open(OUTPUT_CSV, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['album', 'source_filename', 'description'])

    for album_name, album_id in target_albums.items():
        print(f"\n📂 Альбом: {album_name}")
        nextPageToken = None

        with tqdm(desc=f"Загрузка из {album_name}", unit="фото", leave=True) as pbar:
            while True:
                results = service.mediaItems().search(
                    body={"albumId": album_id, "pageSize": 100, "pageToken": nextPageToken}
                ).execute()

                items = results.get('mediaItems', [])
                nextPageToken = results.get('nextPageToken')

                if not items:
                    break

                for item in items:
                    description = item.get('description')
                    filename = item.get('filename')

                    if description and filename:
                        writer.writerow([album_name, filename, description])

                pbar.update(len(items))

                if not nextPageToken:
                    break

    print(f"\n✅ Описания успешно сохранены в {OUTPUT_CSV}")

Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=849244129200-hdfqohk1rs46hjekajgu7pa4jqrn9sqj.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A60939%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fphotoslibrary.readonly&state=KCVP37DvJmxZEqfefrdyKQwSp3XioA&access_type=offline
🔍 Поиск альбомов в Google Photos...
✅ Найдены альбомы: ['PhotoMap 2022-2025', 'PhotoMap 2019-2021', 'PhotoMap 2017-2019', 'PhotoMap 2014-2016', 'PhotoMap 2010-2013', 'PhotoMap 08-09']

📂 Альбом: PhotoMap 2022-2025


Загрузка из PhotoMap 2022-2025: 653фото [00:06, 107.97фото/s]



📂 Альбом: PhotoMap 2019-2021


Загрузка из PhotoMap 2019-2021: 784фото [00:08, 96.14фото/s] 



📂 Альбом: PhotoMap 2017-2019


Загрузка из PhotoMap 2017-2019: 249фото [00:02, 101.87фото/s]



📂 Альбом: PhotoMap 2014-2016


Загрузка из PhotoMap 2014-2016: 381фото [00:03, 102.93фото/s]



📂 Альбом: PhotoMap 2010-2013


Загрузка из PhotoMap 2010-2013: 257фото [00:02, 111.37фото/s]



📂 Альбом: PhotoMap 08-09


Загрузка из PhotoMap 08-09: 109фото [00:01, 67.24фото/s]


✅ Описания успешно сохранены в csv/photos_descriptions.csv





In [39]:
# --- STEP 4. here add coordinates to missing_coords.csv
# --- and then run the script!

In [None]:
# --- STEP 5. Run the script to merge all csv into one containing file names, coords, dates and descriptions

In [42]:
import os
import pandas as pd

# === Пути ===
CSV_FOLDER = "csv"
MAIN_CSV = os.path.join(CSV_FOLDER, "photos.csv")
MISSING_CSV = os.path.join(CSV_FOLDER, "missing_coords.csv")
DESCRIPTIONS_CSV = os.path.join(CSV_FOLDER, "photos_descriptions.csv")
FINAL_CSV = os.path.join(CSV_FOLDER, "photos_combined.csv")

# === Загрузка файлов ===
print("📥 Загрузка CSV...")
df_main = pd.read_csv(MAIN_CSV)
df_missing = pd.read_csv(MISSING_CSV)
df_desc = pd.read_csv(DESCRIPTIONS_CSV)

# === Объединение main и missing ===
df_all = pd.concat([df_main, df_missing], ignore_index=True)

# === Подготовка для merge по ключу: album + source_filename ===
df_all["merge_key"] = df_all["album"].str.strip() + "/" + df_all["source_filename"].str.strip()
df_desc["merge_key"] = df_desc["album"].str.strip() + "/" + df_desc["source_filename"].str.strip()

# === Слияние описаний ===
df_all = df_all.merge(
    df_desc[["merge_key", "description"]],
    on="merge_key",
    how="left"
)

# === Очистка и финальная запись ===
df_all.drop(columns=["merge_key"], inplace=True)
df_all.to_csv(FINAL_CSV, index=False)

print("\n✅ Объединённый CSV сохранён:", FINAL_CSV)
print("📄 Строк: ", len(df_all))
print("📝 Колонок: ", list(df_all.columns))


📥 Загрузка CSV...

✅ Объединённый CSV сохранён: csv/photos_combined.csv
📄 Строк:  2431
📝 Колонок:  ['filename', 'folder', 'latitude', 'longitude', 'year', 'month', 'day', 'album', 'source_filename', 'source_type', 'description']


In [None]:
# --- STEP 6. Upload files from photos/ to Google Drive/photos

In [None]:
# --- Script deletes all files in Google Drive/Photos
# --- Script uploads all files from photos/ to Google/Photos/

In [47]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from tqdm import tqdm
import os

# === Авторизация в Google Drive
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
drive = GoogleDrive(gauth)

# === ID целевой папки на Google Drive
FOLDER_ID = '1em81MElkxnaue5r92e9hPGezgOuw4nRL'
LOCAL_PHOTOS_FOLDER = 'photos/'

# === Очистка папки на Google Drive
print("🧹 Очистка папки на Google Drive...")
file_list = drive.ListFile({
    'q': f"'{FOLDER_ID}' in parents and trashed=false"
}).GetList()

for f in tqdm(file_list, desc="Удаление файлов"):
    f.Delete()

print("✅ Папка очищена.")

# === Загрузка новых файлов из локальной папки
print("⬆️  Загрузка новых файлов в Google Drive...")
local_files = [f for f in os.listdir(LOCAL_PHOTOS_FOLDER) if f.lower().endswith(('.jpg', '.jpeg'))]

for fname in tqdm(local_files, desc="Загрузка файлов"):
    file_path = os.path.join(LOCAL_PHOTOS_FOLDER, fname)
    gfile = drive.CreateFile({
        'title': fname,
        'parents': [{'id': FOLDER_ID}]
    })
    gfile.SetContentFile(file_path)
    gfile.Upload()

print("✅ Загрузка завершена.")


Your browser has been opened to visit:

    https://accounts.google.com/o/oauth2/auth?client_id=849244129200-hdfqohk1rs46hjekajgu7pa4jqrn9sqj.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8080%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive&access_type=offline&response_type=code

Authentication successful.
🧹 Очистка папки на Google Drive...


Удаление файлов: 100%|██████████| 2431/2431 [23:06<00:00,  1.75it/s]


✅ Папка очищена.
⬆️  Загрузка новых файлов в Google Drive...


Загрузка файлов: 100%|██████████| 2431/2431 [2:28:50<00:00,  3.67s/it]  

✅ Загрузка завершена.





In [None]:
# ---- STEP 7. Connect to Google Drive and get new filenames with their links

In [66]:
import os
import pandas as pd
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from tqdm import tqdm

# === Настройки ===
FOLDER_ID = '1em81MElkxnaue5r92e9hPGezgOuw4nRL'
OUTPUT_CSV = 'csv/google_drive_links.csv'

# === Авторизация Google Drive ===
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
drive = GoogleDrive(gauth)

# === Получение списка файлов ===
print("🔍 Получаем список файлов из Google Drive...")
file_list = drive.ListFile({'q': f"'{FOLDER_ID}' in parents and trashed=false"}).GetList()

# === Обработка с прогрессом ===
data = []
print(f"📁 Найдено файлов: {len(file_list)}")

for file in tqdm(file_list, desc="📦 Обработка файлов", unit="файл"):
    filename = file['title']
    file_id = file['id']
    view_url = f"https://drive.google.com/uc?export=view&id={file_id}"
    data.append({
        'filename': filename,
        'file_id': file_id,
        'link': view_url
    })

# === Сохраняем CSV ===
os.makedirs('csv', exist_ok=True)
df = pd.DataFrame(data)
df.to_csv(OUTPUT_CSV, index=False)

print(f"\n✅ Ссылки успешно сохранены в {OUTPUT_CSV}")

Your browser has been opened to visit:

    https://accounts.google.com/o/oauth2/auth?client_id=849244129200-hdfqohk1rs46hjekajgu7pa4jqrn9sqj.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8080%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive&access_type=offline&response_type=code

Authentication successful.
🔍 Получаем список файлов из Google Drive...
📁 Найдено файлов: 2431


📦 Обработка файлов: 100%|██████████| 2431/2431 [00:00<00:00, 545814.09файл/s]


✅ Ссылки успешно сохранены в csv/google_drive_links.csv





In [None]:
# --- STEP 8. Get all info from photos_combined.csv and google_drive_links.csv into photos_final.csv


In [69]:
import os
import pandas as pd

# === Пути ===
CSV_FOLDER = "csv"
COMBINED_CSV = os.path.join(CSV_FOLDER, "photos_combined.csv")
LINKS_CSV = os.path.join(CSV_FOLDER, "google_drive_links.csv")
FINAL_CSV = os.path.join(CSV_FOLDER, "photos_final.csv")

# === Загрузка данных ===
print("📥 Загрузка CSV...")
df_combined = pd.read_csv(COMBINED_CSV)
df_links = pd.read_csv(LINKS_CSV)

# === Объединение по filename ===
df_final = df_combined.merge(df_links[['filename', 'link']], on='filename', how='left')

# === Сохранение итогового файла ===
df_final.to_csv(FINAL_CSV, index=False)

# === Отчёт ===
print("\n✅ Итоговый CSV сохранён:", FINAL_CSV)
print("📄 Строк: ", len(df_final))
print("🧷 Колонки: ", list(df_final.columns))


📥 Загрузка CSV...

✅ Итоговый CSV сохранён: csv/photos_final.csv
📄 Строк:  2431
🧷 Колонки:  ['filename', 'folder', 'latitude', 'longitude', 'year', 'month', 'day', 'album', 'source_filename', 'source_type', 'description', 'link']


In [None]:
# --- Step 9. Put all info from photos_final.csv into photos.geojson

In [86]:
import os
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

# === Пути ===
CSV_FOLDER = "csv"
GEOJSON_FOLDER = "geojson"
FINAL_CSV = os.path.join(CSV_FOLDER, "photos_final.csv")
GEOJSON_OUTPUT = os.path.join(GEOJSON_FOLDER, "photos.geojson")

# === Загрузка данных ===
df = pd.read_csv(FINAL_CSV)

# === Фильтрация точек с координатами ===
df_geo = df[df['latitude'].notnull() & df['longitude'].notnull()].copy()

# === Создание геометрии ===
df_geo['geometry'] = df_geo.apply(lambda row: Point(float(row['longitude']), float(row['latitude'])), axis=1)

# === Добавление image и fullname вместо link ===
def make_drive_urls(link):
    if isinstance(link, str) and "id=" in link:
        file_id = link.split("id=")[-1]
        return pd.Series({
            "image": f"https://drive.google.com/thumbnail?id={file_id}",
            "fullname": f"https://drive.google.com/uc?export=view&id={file_id}"
        })
    return pd.Series({"image": None, "fullname": None})

url_cols = df_geo["link"].apply(make_drive_urls)
df_geo = pd.concat([df_geo.drop(columns=["link"]), url_cols], axis=1)

# === Преобразование в GeoDataFrame и сохранение ===
gdf = gpd.GeoDataFrame(df_geo, geometry='geometry', crs='EPSG:4326')
os.makedirs(GEOJSON_FOLDER, exist_ok=True)
gdf.to_file(GEOJSON_OUTPUT, driver='GeoJSON')

# === Отчёт ===
print("\n🌍 GeoJSON успешно обновлён:", GEOJSON_OUTPUT)
print("📍 Точек: ", len(gdf))
print("📄 Колонки: ", list(gdf.columns))



🌍 GeoJSON успешно обновлён: geojson/photos.geojson
📍 Точек:  2431
📄 Колонки:  ['filename', 'folder', 'latitude', 'longitude', 'year', 'month', 'day', 'album', 'source_filename', 'source_type', 'description', 'geometry', 'image', 'fullname']


In [None]:
# --- STEP 10. Get country code from nominate site into photos.geojson

In [92]:
import json
import requests
import time
from tqdm import tqdm

GEOJSON_FILE = "geojson/photos.geojson"
NOMINATIM_URL = "https://nominatim.openstreetmap.org/reverse"
DELAY = 2  # секунды между запросами

HEADERS = {
    'User-Agent': 'PhotoMapsApp (bizbur08@gmail.com)'  # 🔧 Укажи свой email!
}

# — Кэш координат, чтобы не дублировать запросы
country_cache = {}

def get_country_code(lat, lon):
    """
    Получение кода страны по координатам через Nominatim API
    """
    key = f"{lat:.4f},{lon:.4f}"
    if key in country_cache:
        return country_cache[key]

    try:
        response = requests.get(NOMINATIM_URL, params={
            'format': 'json',
            'lat': lat,
            'lon': lon
        }, headers=HEADERS)

        if response.status_code == 200:
            data = response.json()
            code = data.get('address', {}).get('country_code', '').upper()
            country_cache[key] = code if code else '??'
            return country_cache[key]
        else:
            print(f"❌ Ошибка {response.status_code} для {lat},{lon}")
            return '??'

    except Exception as e:
        print(f"❌ Ошибка соединения для {lat},{lon}: {e}")
        return '??'

def enrich_geojson():
    """
    Добавление кода страны в каждую точку GeoJSON
    """
    with open(GEOJSON_FILE, 'r', encoding='utf-8') as f:
        geojson_data = json.load(f)

    for feature in tqdm(geojson_data["features"], desc="🌍 Обработка точек"):
        lat, lon = feature["geometry"]["coordinates"][1], feature["geometry"]["coordinates"][0]
        country_code = get_country_code(lat, lon)
        feature["properties"]["country_code"] = country_code
        time.sleep(DELAY)

    with open(GEOJSON_FILE, 'w', encoding='utf-8') as f:
        json.dump(geojson_data, f, indent=2, ensure_ascii=False)

    print(f"\n✅ Файл {GEOJSON_FILE} обновлён с country_code")

# === Запуск ===
if __name__ == "__main__":
    enrich_geojson()

🌍 Обработка точек: 100%|██████████| 2431/2431 [1:31:56<00:00,  2.27s/it]


✅ Файл geojson/photos.geojson обновлён с country_code



