# 01 : lister les photos

Travail en batch. Un dossier arrive dans totreat, il est traiter de la manière suivante :
- création d'un numéro de batch
- séparation en dossier par date
- un csv par dossier avec la liste des photos
- un json avec les informations sur le batch dans le dossier

In [1]:
import os
from importlib.resources import files

import pandas as pd
from exif import Image
from datetime import datetime
import uuid
import shutil

In [28]:
def list_folders(path) -> list:
	dossiers = os.listdir(path)
	dossiers = [dossier for dossier in dossiers if not dossier.startswith('.')]
	return dossiers

In [33]:
def list_files(folder:str, filetype = '') -> list:
	files = os.listdir(f'{folder}')
	files = [file for file in files if not file.startswith('.')]
	if filetype is not None:
		files = [file for file in files if file.endswith(filetype)]
	return files

In [30]:
def extract_infos(folder:str, file:str) -> dict:
	base = 'artefacts/totreat'
	path = f'{base}/{folder}/{file}'
    
	with open(path, "rb") as img_file:
		img = Image(img_file)
		if img.has_exif:
		    return img

In [35]:
def extract_datetime_from_infos(infos:dict) -> str:
	if infos.has_exif and hasattr(infos, "datetime"):
		date_time = datetime.strptime(infos.datetime, "%Y:%m:%d %H:%M:%S")
		return date_time

In [34]:
def is_image(file:str) -> bool:
	result = False

	if file.endswith('.JPG'):
		result = True

	if file.endswith('.jpg'):
		result = True

	return result

In [47]:
generated_uuid = uuid.uuid4()
print(generated_uuid)

5f1b80ef-f0fc-4eba-8a90-7d897734749d


In [91]:
def generate_csv():
	base = 'artefacts/totreat'
	folders = list_folders(base)
	for folder in folders:
		path_folder = f'{base}/{folder}'
		listing = []
		files = list_files(folder)
		for file in files:
			path_file = f'{base}/{folder}/{file}'
			if is_image(path_file) is True:
				infos = extract_infos(folder, file)
				datetime = extract_datetime_from_infos(infos)
				listing.append([folder, file, datetime])
		df = pd.DataFrame(listing, columns=['folder', 'file', 'datetime'])
		df.to_csv(path_folder + '/listing.csv', index=False)

In [95]:
def generate_filename():
	base = 'artefacts/totreat'
	folders = list_folders(base)
	for folder in folders:
		path = f'{base}/{folder}/listing.csv'
		df = pd.read_csv(path)
		df["datetime"] = pd.to_datetime(df["datetime"])
		df["date"] = df["datetime"].dt.date
		df["uuid"] = [f"{uuid.uuid4()}{os.path.splitext(file)[1].lower()}" for file in df["file"]]
		df.to_csv(path, index=False)

In [84]:
def list_move_files(batch):
	base = 'artefacts/totreat'
	listing = []
	folders = list_folders(base)
	for folder in folders:
		path = f'{base}/{folder}/listing.csv'
		df = pd.read_csv(path)
		for row in df.itertuples(index=False):
			filename = row.file
			new_folder = row.date
			new_filename = row.uuid
			old_path = f'{base}/{folder}'
			new_path = f'artefacts/photos/{batch}/{new_folder}'
			listing.append([old_path, filename, new_path, new_filename])
	return listing
        

In [89]:
def move_files(listing):
	for row in listing:
		old_path, filename, new_path, new_filename = row
		if not os.path.exists(new_path):
			os.makedirs(new_path)
		shutil.copy(f'{old_path}/{filename}', f'{new_path}/{new_filename}')

In [18]:
def split_csv(batch):
    base = 'artefacts'
    folders = list_folders(base)
    for folder in folders:
        df = pd.read_csv(f'{base}/totreat/{folder}/listing.csv')
        if 'date' in df.columns:
            dates = df['date'].unique()

            for date in dates:
                file_name =  uuid.uuid4()
                output_file = f'{base}/photos/{batch}/{date}/{file_name}.csv'
                df_filtered = df[df['date'] == date]
                df_filtered.to_csv(output_file, index=False)

In [37]:
def merge_csv(batch):
	base = f'artefacts/photos/{batch}'
	folders = list_folders(base)
	for folder in folders:
		path = f'{base}/{folder}'
		files = list_files(path, '.csv')
		df = pd.concat([pd.read_csv(f'{path}/{file}') for file in files])
		df.to_csv(f'{path}/merged.csv', index=False)

In [97]:
generate_csv()
generate_filename()
batch = uuid.uuid4()
listing = list_move_files(batch)
move_files(listing)
split_csv(batch)
merge_csv(batch)