In [122]:
import pandas as pd
from google.oauth2 import service_account
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
from googleapiclient.http import MediaFileUpload
import io
from googleapiclient.errors import HttpError
from dotenv import load_dotenv
import os
import re

In [123]:
# Load environment variables from .env file
load_dotenv()

key_file_name = os.getenv('KEY_FILE_NAME')
folder_id = os.getenv('MY_FOLDER_ID')

In [124]:
# It controls the access of the application. Currently it allows read and write both
scope = ['https://www.googleapis.com/auth/drive']

current_directory = os.getcwd()
service_account_json_key = os.path.join(current_directory, key_file_name)

credentials = service_account.Credentials.from_service_account_file(
                              filename=service_account_json_key, 
                              scopes=scope)

service = build('drive', 'v3', credentials=credentials)

In [125]:
def custom_sort_key(s):
    parts = re.split(r'(\d+)', s)  # Split the string into numeric and non-numeric parts
    return (int(parts[1]), parts[2]) if len(parts) > 1 else (float('inf'), parts[0])

In [126]:
page_token = None
Image_Name_To_Image_Id_List = []
# Google drive can serve upto 1000 files in one request, page solves this issue by accessing the next in the order
while True:
    results = service.files().list(
        pageSize=1000,  
        fields="nextPageToken, files(id, name)",
        q=f"'{folder_id}' in parents",
        pageToken=page_token
    ).execute()

    files = results.get('files', [])
    
    if not files:
        print(f'No more files found in folder with ID: {folder_id}')
        break

    # Print the name of files in the folder
    for file in files:
        file_id = file['id']
        file_name = file['name']
        Image_Name_To_Image_Id_Instance = [file_name, file_id]
        Image_Name_To_Image_Id_List.append(Image_Name_To_Image_Id_Instance)

    
    # Get the next page token
    page_token = results.get('nextPageToken')

    if not page_token:
        break
        
Image_Name_To_Image_Id_List = sorted(Image_Name_To_Image_Id_List, key=lambda x: custom_sort_key(x[0]))


In [127]:
df = pd.DataFrame(Image_Name_To_Image_Id_List, columns=['image_name', 'image_id'])
df

Unnamed: 0,image_name,image_id
0,1a.jpg,1NL3oIljLGFbkGZa7BqTvPznTuUqGjEbj
1,1b.jpg,1D4nzD-_A1yqyebaVySyqKhsB9eFj53I_
2,3a.jpg,11FOlxo-C1NdDt0kATWu_S7dept-v4HwZ
3,3b.jpg,19EbzQqUfNvG5akf3Kyyan_6L8whoJOzu
4,3c.jpg,1QH3G-nbxHYKzvTAHoLoMHMeTw3QXS5cP
...,...,...
1968,200g.jpg,1gXMdCZXncnRSxZLLLb2yChmZZiq9ExoS
1969,200h.jpg,17ioZm_OxjtHU_WxSjQ61CFoMG79nMQrN
1970,200i.jpg,1c49anE4NWX5TSy0_CsgAiKXOCsv_NqQV
1971,200j.jpg,1QiucOrcrNr8vH1lrleCicvKLPxfHluYF


In [128]:
file_path = os.path.join(current_directory, '../Data/file_name_to_file_id_mapper.csv')

# Convert the DataFrame to a CSV file at the specified location
df.to_csv(file_path, index=False)