In [None]:
!pip install flickrapi
!pip list | grep 'flickr'

In [None]:
import logging
from pathlib import Path

import pandas as pd
import requests
from tqdm import tqdm

import flickrapi


def get_photo_url_etree(photo_id):
    sizes = flickr.photos.getSizes(photo_id=photo_id)
    largest_available_size = (
        pd.DataFrame([dict(e.items()) for e in list(sizes.find("sizes"))])
        .sort_values(by=["width", "height"], ascending=False)
        .iloc[0]
    )

    return largest_available_size.to_dict()


def retrieve_photo_meta_data(album_id):
    photo_records = []
    try:
        photos_raw = list(flickr.walk_set(album_id))
    except:
        logging.error(f"Unable to walk photos for album: {album_id}")
        return pd.DataFrame()  # cat empty df all the same? bit yikes

    for photo in tqdm(
        photos_raw, desc=f"Retrieving photo meta data for album: {album_id}"
    ):
        photo = dict(photo.items())  # silly e-tree format
        try:
            largest_size = get_photo_url_etree(photo["id"])
            photo["photo_meta"] = largest_size
            photo_records.append(photo)
        except:
            logging.error(f"Unable to retrieve photo size for: {photo['id']}")

    photos = (
        pd.DataFrame(photo_records)
        .assign(album_id=album_id)
        .assign(download_url=lambda x: x.photo_meta.apply(lambda y: y["source"]))
        # filter out small images
        .assign(width=lambda x: x.photo_meta.apply(lambda y: int(y["width"])))
        .assign(height=lambda x: x.photo_meta.apply(lambda y: int(y["height"])))
        .query("height >= 256 & width >= 256")
        .pipe(
            lambda x: x.sample(n=n_photos_per_album, random_state=42)
            if x.shape[0] > n_photos_per_album
            else x
        )
    )
    return photos


def download_flickr_image(url, save_path):
    response = requests.get(url)
    with open(save_path, "wb") as file:
        file.write(response.content)
        file.close()
    res = requests.get(url, stream=True)


def download_photo_record(record, download_dir):
    # mkdir album save dir if doesn't exist
    if (download_dir / record.album_id).exists() == False:
        (download_dir / record.album_id).mkdir(parents=True, exist_ok=True)

    save_path = f"{(download_dir / record.album_id / record.id).as_posix()}{Path(record.download_url).suffix}"
    print(save_path)
    if Path(save_path).exists() == True:
        logging.info(f"Previously saved: {save_path}; skipping")
    else:
        try:
            download_flickr_image(record.download_url, save_path)
        except Exception:
            logging.error(f"Unable to download image at: {record.download_url}")


In [None]:
# some real fucked auth
api_key = "4084101d41128cb58d7ff6607cbf4fe0"
api_secret = "0eccc48b5d59596b"

# e tree for user retrievals, json for favourite retrievals**
flickr = flickrapi.FlickrAPI(api_key, api_secret, format="etree")
flickr.authenticate_console()  # 401 error anyway? but still works?
# flickr.authenticate_via_browser(perms='write')


In [None]:
n_albums = 1
n_photos_per_album = 10
user_id = "61021753@N02"

# retrieve some biodiversity albums
bdhl = flickr.photosets.getList(user_id=user_id, page=1)  # paginated
bdhl_df = pd.DataFrame([dict(e.items()) for e in list(bdhl.find("photosets"))]).sample(
    n=n_albums, random_state=42
)


In [None]:
curated_albums = [
    "https://www.flickr.com/photos/biodivlibrary/albums/72157719480387299",
    "https://www.flickr.com/photos/biodivlibrary/albums/72157719491069662",
    "https://www.flickr.com/photos/biodivlibrary/albums/72157719533382815",
    "https://www.flickr.com/photos/biodivlibrary/albums/72157719532261290",
    "https://www.flickr.com/photos/biodivlibrary/albums/72157719532226635",
]
curated_albums = [Path(e).name for e in curated_albums]


In [None]:
# for album in curated_albums:
#     all_photos.append(retrieve_photo_meta_data(album))
# all_photos = pd.concat(all_photos)


In [None]:
# walk the albums, retrieve individual photo details
all_photos = []
for idx, album in bdhl_df.iterrows():
    all_photos.append(retrieve_photo_meta_data(album.id))

# download each photo
download_dir = Path("./biodiversity_library")
download_dir.mkdir(
    parents=True, exist_ok=True
) if download_dir.exists() == False else None

bio_diversity_all = all_photos.apply(
    lambda y: download_photo_record(y, download_dir), axis=1
)
