In [None]:
#- Prerequisites - all files in their album folders are located inside images/ folder

In [79]:
import os
import re
from collections import defaultdict

# === Папка с альбомами ===
ROOT_DIR = "images"
album_dirs = [d for d in os.listdir(ROOT_DIR) if os.path.isdir(os.path.join(ROOT_DIR, d))]

# === Инициализация счётчиков ===
stats = {}
total = defaultdict(int)

# === Шаблоны ===
pattern_copy = re.compile(r"\s\(\d+\)| copy", re.IGNORECASE)

# === Анализ по альбомам ===
for album in album_dirs:
    folder_path = os.path.join(ROOT_DIR, album)
    files = os.listdir(folder_path)

    counts = defaultdict(int)

    for file in files:
        file_path = os.path.join(folder_path, file)
        if not os.path.isfile(file_path):
            continue

        ext = os.path.splitext(file)[1].lower()

        if ext in ['.jpg', '.jpeg', '.heic']:
            if '-edited' in file.lower():
                counts['edited'] += 1
            else:
                counts[ext] += 1
                if pattern_copy.search(file):
                    counts['renamed'] += 1
        elif ext == '.json':
            counts['json'] += 1
        else:
            counts['other'] += 1

    stats[album] = dict(counts)
    for k, v in counts.items():
        total[k] += v

# === Вывод статистики ===
print("\n📊 Статистика по альбомам:")
for album, counts in stats.items():
    print(f"\n📁 {album}:")
    for k, v in counts.items():
        print(f"  {k}: {v}")

# === Подсчёт общего количества пригодных фото ===
photo_exts = ['.jpg', '.jpeg', '.heic']
total_photos = sum(total[ext] for ext in photo_exts)
total_edited = total['edited']
usable_photos = total_photos  # editable уже исключены выше, не учитываются

print("\n📈 Итого по всем альбомам:")
for k, v in total.items():
    print(f"  {k}: {v}")

print(f"\n🧮 Всего фото-файлов (без -edited): {total_photos}")
print(f"✂️  Файлов с '-edited' в имени:        {total_edited}")
print(f"✅ Годных для обработки файлов:        {usable_photos}")


📊 Статистика по альбомам:

📁 PhotoMap 2010-2013:
  json: 258
  .jpg: 257
  edited: 14

📁 PhotoMap 2014-2016:
  json: 382
  edited: 76
  .jpg: 381

📁 PhotoMap 2019-2021:
  .jpg: 782
  json: 785
  other: 2
  edited: 58

📁 PhotoMap 2022-2025:
  json: 654
  .jpg: 627
  edited: 7
  .heic: 6
  .jpeg: 20

📁 PhotoMap 08-09:
  .jpg: 109
  json: 110
  edited: 83

📁 PhotoMap 2017-2019:
  json: 250
  .jpg: 249
  edited: 5

📈 Итого по всем альбомам:
  json: 2439
  .jpg: 2405
  edited: 243
  other: 2
  .heic: 6
  .jpeg: 20

🧮 Всего фото-файлов (без -edited): 2431
✂️  Файлов с '-edited' в имени:        243
✅ Годных для обработки файлов:        2431


In [98]:
import os
import csv
import re
import json
from PIL import Image
from PIL.ExifTags import TAGS, GPSTAGS
from datetime import datetime

# === Параметры ===
IMAGES_FOLDER = "images"
OUT_FOLDER = "csv"
MISSING_FOLDER = "csv"

os.makedirs(OUT_FOLDER, exist_ok=True)
os.makedirs(MISSING_FOLDER, exist_ok=True)

photos_output_path = os.path.join(OUT_FOLDER, "photos.csv")
missing_output_path = os.path.join(MISSING_FOLDER, "missing_coords.csv")

def extract_gps_from_exif(image_path):
    # Пробуем только для JPG, не для HEIC
    if not image_path.lower().endswith(('.jpg', '.jpeg')):
        return None, None
    try:
        image = Image.open(image_path)
        exif_data = image._getexif()
        if not exif_data:
            return None, None

        gps_info = {}
        date_str = None

        for tag, value in exif_data.items():
            decoded = TAGS.get(tag)
            if decoded == "GPSInfo":
                for t in value:
                    sub_decoded = GPSTAGS.get(t)
                    gps_info[sub_decoded] = value[t]
            elif decoded == "DateTimeOriginal":
                date_str = value

        def convert_to_degrees(value):
            d, m, s = value
            return float(d) + float(m)/60 + float(s)/3600

        if not gps_info:
            return None, date_str

        lat = convert_to_degrees(gps_info.get("GPSLatitude"))
        if gps_info.get("GPSLatitudeRef") == "S":
            lat = -lat

        lon = convert_to_degrees(gps_info.get("GPSLongitude"))
        if gps_info.get("GPSLongitudeRef") == "W":
            lon = -lon

        return (lat, lon), date_str
    except:
        return None, None

def extract_gps_from_json(json_path):
    try:
        with open(json_path, "r", encoding="utf-8") as f:
            data = json.load(f)
            geo = data.get("geoData")
            photo_taken_time = data.get("photoTakenTime", {}).get("timestamp")
            if geo:
                lat = geo.get("latitude")
                lon = geo.get("longitude")
                return (lat, lon), photo_taken_time
    except:
        pass
    return None, None

def find_json_path_for_image(image_path):
    base = os.path.basename(image_path)
    folder = os.path.dirname(image_path)

    suffixes = [
        ".supplemental-metadata.json",
        ".supplemental-meta.json",
        ".supplemental-metada.json",
        ".supplemental-metadat.json",
        ".supplemental-me.json"
    ]

    for suffix in suffixes:
        candidate = image_path + suffix
        if os.path.exists(candidate):
            return candidate

    match = re.match(r'^(.*)\((\d+)\)\.(jpg|jpeg|heic)$', base, re.IGNORECASE)
    if match:
        base_clean = match.group(1).strip()
        suffix_num = match.group(2)
        ext = match.group(3)
        for sfx in suffixes:
            sfx_with_index = sfx.replace(".json", f"({suffix_num}).json")
            alt_json = os.path.join(folder, f"{base_clean}.{ext}{sfx_with_index}")
            if os.path.exists(alt_json):
                return alt_json

    return None

def parse_date_components(date_str):
    try:
        if date_str and len(str(date_str)) == 10 and str(date_str).isdigit():
            dt = datetime.fromtimestamp(int(date_str))
        else:
            dt = datetime.strptime(date_str, "%Y:%m:%d %H:%M:%S")
        return dt.year, dt.month, dt.day
    except:
        return None, None, None

def process_images(image_folder, output_file, missing_file):
    photo_records = []
    missing_records = []
    total_processed = 0

    for dirpath, _, filenames in os.walk(image_folder):
        for file in filenames:
            if not file.lower().endswith(('.jpg', '.jpeg', '.heic')):
                continue
            if '-edited' in file.lower() or '_edited' in file.lower():
                continue

            fname = os.path.join(dirpath, file)
            base_name = os.path.basename(fname)
            source_type = None
            total_processed += 1

            try:
                gps, date_str = extract_gps_from_exif(fname)
                source_type = "exif"

                if not gps:
                    json_path = find_json_path_for_image(fname)
                    if json_path:
                        gps, json_date = extract_gps_from_json(json_path)
                        source_type = "json"
                        if not date_str and json_date:
                            date_str = json_date

                lat, lon = gps if gps else (None, None)
                year, month, day = parse_date_components(date_str) if date_str else (None, None, None)

                if lat == 0.0 and lon == 0.0:
                    print(f"[!] Координаты 0.0/0.0 → {fname}")
                    raise ValueError("zero-coordinates")

                if lat is not None and lon is not None:
                    row = {
                        'filename': base_name,
                        'folder': os.path.basename(OUT_FOLDER),
                        'latitude': lat,
                        'longitude': lon,
                        'year': year,
                        'month': month,
                        'day': day,
                        'source_path': fname,
                        'source_type': source_type
                    }
                    photo_records.append(row)
                else:
                    raise ValueError("no-coordinates")

            except Exception as e:
                missing_records.append({
                    'filename': base_name,
                    'folder': os.path.basename(MISSING_FOLDER),
                    'latitude': None,
                    'longitude': None,
                    'year': None,
                    'month': None,
                    'day': None,
                    'error': str(e),
                    'source_path': fname,
                    'source_type': None
                })

    # Запись CSV
    photo_fields = ['filename', 'folder', 'latitude', 'longitude', 'year', 'month', 'day', 'source_path', 'source_type']
    missing_fields = photo_fields + ['error']

    with open(output_file, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=photo_fields)
        writer.writeheader()
        writer.writerows(photo_records)

    with open(missing_file, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=missing_fields)
        writer.writeheader()
        writer.writerows(missing_records)

    # 📊 Финальная сводка
    print("\n📊 Сводка:")
    print(f"🧮 Всего просмотрено файлов (без edited): {total_processed}")
    print(f"✅ С координатами: {len(photo_records)}")
    print(f"🚫 Без координат:  {len(missing_records)}")
    print(f"📦 Учтено всего:   {len(photo_records) + len(missing_records)}")
    print(f"❗ Пропущено:      {total_processed - (len(photo_records) + len(missing_records))}")

# === Запуск ===
process_images(IMAGES_FOLDER, photos_output_path, missing_output_path)

[!] Координаты 0.0/0.0 → images/PhotoMap 2010-2013/IMG_4498.JPG
[!] Координаты 0.0/0.0 → images/PhotoMap 2010-2013/IMG_0513.JPG
[!] Координаты 0.0/0.0 → images/PhotoMap 2010-2013/IMG_0924.JPG
[!] Координаты 0.0/0.0 → images/PhotoMap 08-09/IMG_1192.jpg
[!] Координаты 0.0/0.0 → images/PhotoMap 08-09/IMG_0529.JPG
[!] Координаты 0.0/0.0 → images/PhotoMap 08-09/IMG_0532.JPG
[!] Координаты 0.0/0.0 → images/PhotoMap 08-09/IMG_0533.JPG

📊 Сводка:
🧮 Всего просмотрено файлов (без edited): 2431
✅ С координатами: 2424
🚫 Без координат:  7
📦 Учтено всего:   2431
❗ Пропущено:      0
