In [None]:
import requests
import pandas as pd

def find_papers_by_country_and_year(country, year, keywords):
    base_url = "https://api.crossref.org/works"
    query = " ".join(keywords)

    params = {
        "filter": f"from-pub-date:{year}-01-01,until-pub-date:{year}-12-31",
        "query": query,
        "rows": 1000
    }

    response = requests.get(base_url, params=params)
    response.raise_for_status()

    data = response.json()
    items = data.get("message", {}).get("items", [])

    paper_count = 0

    for item in items:
        author_list = item.get("author", [])
        for author in author_list:
            affiliation_list = author.get("affiliation", [])
            for affiliation in affiliation_list:
                if country.lower() in affiliation.get("name", "").lower():
                    paper_count += 1
                    break

    return paper_count

# List of countries
countries = ["Bangladesh", "India", "Pakistan", "Sri Lanka", "Nepal", "Bhutan", "Maldives", "Afghanistan", "USA", "China"]

# Define years
years = list(range(2015, 2024 + 1))

# Keywords
general_ai_ml_nlp = [
    "Artificial Intelligence", "AI", "Machine Learning", "ML",
    "Neural Networks", "Deep Learning", "Deep Neural Networks",
    "Natural Language Processing", "NLP", "Text Mining",
    "Computational Linguistics", "Speech Recognition", "Voice Recognition",
    "Image Recognition", "Computer Vision", "Pattern Recognition",
    "Data Mining", "Predictive Analytics", "Supervised Learning",
    "Unsupervised Learning", "Reinforcement Learning", "Self-supervised Learning",
    "Zero-shot Learning", "Few-shot Learning", "Transfer Learning",
    "Bayesian Networks", "Probabilistic Models", "Graph Neural Networks",
    "Cognitive Computing", "Knowledge Representation", "Semantic Analysis",
    "Sentiment Analysis", "Recommendation Systems", "Clustering Algorithms",
    "Classification Algorithms", "Support Vector Machines", "Decision Trees",
    "Random Forests", "Gradient Boosting", "Ensemble Methods"
]
generative_ai = [
    "Generative AI", "Large Language Models", "LLM", "ChatGPT",
    "GPT-3", "GPT-4", "GPT-2", "OpenAI GPT", "Transformer models",
    "Attention Mechanism", "Self-attention", "DALL-E", "BERT", "RoBERTa",
    "DistilBERT", "ALBERT", "T5", "XLNet", "ELECTRA",
    "Generative Pretrained Transformer", "GPT", "Autoregressive Models",
    "Masked Language Models", "Variational Autoencoders", "VAEs",
    "Generative Adversarial Networks", "GANs", "Conditional GANs",
    "CycleGAN", "StyleGAN", "DeepFake", "Text Generation",
    "Language Model", "Sequence-to-Sequence Models", "Seq2Seq",
    "Prompt Engineering", "Prompt Tuning", "Text-to-Image Generation",
    "Image Synthesis", "Neural Machine Translation", "NMT",
    "Dialogue Systems", "Conversational AI", "Natural Language Generation",
    "NLG", "Text Summarization", "Story Generation"
]

# Initialize dataframe
data = []

# Loop over each country and year
for country in countries:
    for year in years:
        ai_publications = find_papers_by_country_and_year(country, year, general_ai_ml_nlp)
        genai_publications = find_papers_by_country_and_year(country, year, generative_ai)
        data.append([year, country, ai_publications, genai_publications])

# Create dataframe
df = pd.DataFrame(data, columns=["Year", "Country", "AI Publications", "GenAI Publications"])

# Display dataframe
print(df)

    Year     Country  AI Publications  GenAI Publications
0   2015  Bangladesh                0                   0
1   2016  Bangladesh                2                   0
2   2017  Bangladesh                0                   0
3   2018  Bangladesh                0                   0
4   2019  Bangladesh                0                   0
..   ...         ...              ...                 ...
95  2020       China               41                   0
96  2021       China               98                   0
97  2022       China              403                   0
98  2023       China              475                  54
99  2024       China             1356                  45

[100 rows x 4 columns]


In [None]:
df.head(20)

Unnamed: 0,Year,Country,AI Publications,GenAI Publications
0,2015,Bangladesh,0,0
1,2016,Bangladesh,2,0
2,2017,Bangladesh,0,0
3,2018,Bangladesh,0,0
4,2019,Bangladesh,0,0
5,2020,Bangladesh,0,0
6,2021,Bangladesh,4,0
7,2022,Bangladesh,8,1
8,2023,Bangladesh,8,1
9,2024,Bangladesh,7,6


In [None]:
df.to_csv('gen_ai_publication_data.csv', index = None)