In [30]:
!pip install -q requests
!pip install -q beautifulsoup4
!pip install -q pandas
!pip install openpyxl



In [31]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime

In [43]:
def get_top_cited_papers(query):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    url = f"https://scholar.google.com/scholar?hl=en&q={query}"
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')
    results = soup.find_all('div', class_='gs_ri')

    current_year = datetime.now().year
    papers = []

    for result in results:
        title_element = result.find('h3', class_='gs_rt')
        if title_element is None:
            continue
        title = title_element.text
        link = title_element.a['href'] if title_element.a else 'No link available'

        abstract = result.find('div', class_='gs_rs')
        abstract_text = abstract.text if abstract else "No abstract available"

        citation_element = result.find('div', class_='gs_fl').find_all('a')
        cites_text = citation_element[2].text if len(citation_element) > 2 else "Cited by 0"
        num_cites = int(cites_text.split(' ')[2]) if 'Cited by' in cites_text else 0

        author_info = result.find('div', class_='gs_a').text
        year = next((int(s) for s in author_info.split() if s.isdigit()), None)

        if year and (current_year - year <= 20):
            papers.append({
                'Title': title,
                'Abstract': abstract_text,
                'Link': link,
                'Cited by': num_cites,
                'Publication Year': year
            })

    papers_df = pd.DataFrame(papers)
    sorted_papers_df = papers_df.sort_values(by='Cited by', ascending=False).head(20)

    # Write DataFrame to an Excel file
    sorted_papers_df.to_excel('top_cited_papers.xlsx', index=False)

    return sorted_papers_df

In [44]:
query = "GenAI and African Entrepreneurship"
result_df = get_top_cited_papers(query)
print(result_df)

                                               Title  \
2  [HTML][HTML] Drivers of generative AI adoption...   
4  Why and how is the power of Big Tech increasin...   
0  Preparing the next generation: Integrating gen...   
1  Exploring the Transformative Journey of Academ...   
3  [BOOK][B] Innovation and Entrepreneurship from...   
5  Attracting Investment and Reducing Poverty in ...   
6  The Role of Generative Artificial Intelligence...   
7  Robots vs. Predators: Can Generative Artificia...   
8  Making Sense of the Information Needs for Acqu...   

                                            Abstract  \
2  … intention to use GenAI tools, and how the la...   
4  … of its GenAI tools) and Anthropic (investmen...   
0  … following positions concerning GenAI’s place...   
1  … Using a comparative approach, this study loo...   
3  … We will then turn to entrepreneurship and en...   
5  Africa’s greatest assets are its young and dyn...   
6  … An advanced form of artificial intelligenc