In [None]:
import requests
from bs4 import BeautifulSoup
from fake_useragent import UserAgent
import pandas as pd

# Set up fake user agent
ua = UserAgent()
headers = {"User-Agent": ua.random}
data1 = {"Date Cited": [],"Paper Title": [], "Abstract": []}

# URL
url = "https://scholar.google.com/citations?hl=en&user=trMsrB4AAAAJ&view_op=list_works&sortby=pubdate"

# Send an HTTP GET request to the page
response = requests.get(url, headers=headers)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    while True:
        # Parse the HTML content of the page using BeautifulSoup
        soup = BeautifulSoup(response.content, "html.parser")

        # Find the container that holds the papers
        papers_container = soup.find("tbody", {"id": "gsc_a_b"})

        if papers_container:
            # Find all individual papers
            papers = papers_container.find_all("tr", {"class": "gsc_a_tr"})

            # Iterate through each paper and extract information
            for paper in papers:
                # Extract title, date_cited, and href link
                title_link = paper.find("a", {"class": "gsc_a_at"})
                title = title_link.text.strip()
                date_cited = paper.find("td", {"class": "gsc_a_y"}).text.strip()
                # venue = paper.find("div",{"class": "gsc_oci_value"}).text.strip()

                data1["Paper Title"].append(title)
                data1["Date Cited"].append(date_cited)
                # data1["Venue"].append(venue)

                href_link = title_link["href"]
                paper_url = f"https://scholar.google.com{href_link}"

                # Fetch the paper's page to extract the abstract
                paper_response = requests.get(paper_url, headers=headers)
                paper_soup = BeautifulSoup(paper_response.content, "html.parser")

                # Extract the abstract if available
                abstract_container = paper_soup.find("div", {"id": "gsc_oci_descr", "class": "gsc_oci_value"})
                if abstract_container:
                    abstract = abstract_container.text.strip()
                    data1["Abstract"].append(abstract)
                else:
                    data1["Abstract"].append("Abstract not available")

            # Check if there's a "Show more" button and it's not disabled
            show_more_button = soup.find("button", {"id": "gsc_bpf_more", "disabled": None})
            if show_more_button:
                # Click the "Show more" button to load additional papers
                response = requests.get(url + "&cstart=" + str(len(data1["Paper Title"])), headers=headers)
            else:
                # No more papers to load, exit the loop
                break
        else:
            print("No papers found on the page.")
            break

    # Create a DataFrame
    df = pd.DataFrame(data1)

    # Export the DataFrame to Excel
    df.to_excel("papers_data.xlsx", index=False)

    print("Data extraction and export to Excel completed.")
else:
    print("Failed to fetch the Google Scholar profile page.")