In [3]:
import os
import pdfplumber
import pandas as pd
import gspread
from oauth2client.service_account import ServiceAccountCredentials

In [None]:
def extract_data_from_pdf(pdf_path):
    """
       Extracts data from a Pdf and converts in into a Dataframe.
       Adjust the logic here depending on the structure of your PDf.
    """
    with pdfplumber.open(pdf_path) as pdf:
        text = ""
        for page in pdf.pages:
            text += page.extract_text() + '\n'
            
    lines = text.splitlines()
    data = [line.split() for line in lines if line.strip()]
    df = pd.DataFrame(data)
    return df

def upload_pdfs_to_google_sheets(folder_path,sheet_id,creds_path):
    """
    Extracts data from all pdfs in a folder and uploads each to a separate sheet in Google Sheet
    """
    # Authethicate with google sheets API
    
    scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
    creds = ServiceAccountCredentials.from_json_keyfile_name(creds_path,scope)
    client = gspread.authorize(creds)
    
    # Open the Google Sheet
    sheet = client.open_by_key(sheet_id)
    
    # Iterate through all pdf files in the folder
    for file_name in os.listdir(folder_path):
        if file_name.endswith(".pdf"):
            pdf_path = os.path.join(folder_path,file_name)
            sheet_name = os.path.splitext(file_name)[0]
        
            # Extract data from the pdf
            df = extract_data_from_pdf(pdf_path)
        
            # create or open a worksheet the google Sheets
            try:
                worksheet = sheet.add_worksheet(title=sheet_name,rows="1000",cols='20')
            except gpspread.exceptions.APIError:
                worksheet = sheet.worksheet(sheet_name)
            
             # Update the worksheet with the extracted data
            worksheet.clear()
            worksheet.update([df.columns.tolist()]+df.values.tolist())
            print(f"Uploaded {file_name} to sheet {sheet_name}") 
        
if __name__ == "__main__":
    folder_path = r"C:\NISHANT\Skill Academy\Statistics\Probability Distributions & Central Limit Theorem\Probability Distributions & Central Limit Theorem"
    sheet_id = "1mUE0g_JYRWRhRE6GKtqRo_chW5d4_0UOM1Fg7yYrSzA"
    creds_path = r"C:\Users\Nishant shah\OneDrive\Desktop\Numpy\subtle-reserve-441716-r6-6e49d5b4ced4.json"
    
    upload_pdfs_to_google_sheets(folder_path,sheet_id,creds_path)
    

SpreadsheetNotFound: <Response [404]>