# **Installations**


In [9]:
pip install google-auth google-auth-oauthlib google-auth-httplib2 google-api-python-client pandas




# **Drive Link Extractor**


In [25]:
import os
import json
import pickle
import pandas as pd
from google.colab import auth
from google.auth.transport.requests import Request
from googleapiclient.discovery import build
import google.auth
from google.auth import credentials

In [26]:
# Define the scope for accessing Google Drive
SCOPES = ['https://www.googleapis.com/auth/drive.readonly']
PROGRESS_FILE = 'progress.json'


In [27]:
def initialize_progress():
    if not os.path.exists(PROGRESS_FILE):
        with open(PROGRESS_FILE, 'w') as f:
            json.dump({
                'processed_rows': 0,
                'total_rows': 0,
                'last_successful_row': 0
            }, f)

def load_progress():
    with open(PROGRESS_FILE, 'r') as f:
        return json.load(f)

def save_progress(processed, total, last_successful):
    with open(PROGRESS_FILE, 'w') as f:
        json.dump({
            'processed_rows': processed,
            'total_rows': total,
            'last_successful_row': last_successful
        }, f)

def authenticate_google_drive():
    creds = None
    if os.path.exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            creds = pickle.load(token)

    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            auth.authenticate_user()
            creds, _ = google.auth.default(scopes=SCOPES)

        with open('token.pickle', 'wb') as token:
            pickle.dump(creds, token)

    return build('drive', 'v3', credentials=creds)

def list_files_in_folder(service, folder_id):
    all_files = []
    page_token = None
    query = f"'{folder_id}' in parents"

    while True:
        results = service.files().list(
            q=query,
            fields="files(id, name, mimeType, parents, webViewLink), nextPageToken",
            pageToken=page_token
        ).execute()

        files = results.get('files', [])
        for file in files:
            if file['mimeType'] != 'application/vnd.google-apps.folder':
                all_files.append(file)
            else:
                all_files.extend(list_files_in_folder(service, file['id']))

        page_token = results.get('nextPageToken')
        if not page_token:
            break

    return all_files

def extract_image_name(hyperlink_formula):
    if isinstance(hyperlink_formula, str):
        start = hyperlink_formula.find('"') + 1
        end = hyperlink_formula.rfind('"')
        return hyperlink_formula[start:end].split("\\")[-1]
    return ""

def find_image_url(service, folder_ids, image_name):
    for folder_id in folder_ids:
        files = list_files_in_folder(service, folder_id)
        for file in files:
            if file['mimeType'] == 'application/vnd.google-apps.folder':
                continue
            if image_name.lower() in file['name'].lower():
                return file.get('webViewLink', '')
    return None

def update_csv_with_screenshot(csv_file, folder_ids, service):
    initialize_progress()
    progress = load_progress()

    df = pd.read_csv(csv_file)
    total_rows = len(df)

    # Initialize new column if not exists
    if 'Drive Link' not in df.columns:
        df['Drive Link'] = None

    # Resume from last successful row
    start_row = progress['last_successful_row']

    for index in range(start_row, total_rows):
        row = df.iloc[index]
        print(f"\nProcessing row {index + 1}/{total_rows}")

        screenshot_formula = row['ScreenShot']
        result = "No screenshot provided"

        if pd.notna(screenshot_formula):
            image_name = extract_image_name(screenshot_formula)
            if image_name:
                screenshot_url = find_image_url(service, folder_ids, image_name)
                result = screenshot_url if screenshot_url else "Image not found"
            else:
                result = "Invalid hyperlink"

        # Update dataframe
        df.at[index, 'Drive Link'] = result

        # Save incremental progress
        df.to_csv("updated_" + csv_file, index=False)
        save_progress(index + 1, total_rows, index)
        print(f"Saved results up to row {index + 1}")

    # Final completion update
    save_progress(total_rows, total_rows, total_rows - 1)
    print("\nProcessing complete! Final results saved to updated_" + csv_file)


In [29]:
def main():
    folder_ids = [
        'Folder_1',
        'Folder_2'
    ]
    csv_file = 'Your_csv.csv'

    service = authenticate_google_drive()
    update_csv_with_screenshot(csv_file, folder_ids, service)

if __name__ == '__main__':
    main()


Processing row 746/746
Saved results up to row 746

Processing complete! Final results saved to updated_leads_combined.csv
