In [26]:
import os
import ast
import pandas as pd

researchers_path = os.path.join(os.getcwd(), "datasets", "users", 'researchers.xlsx')
articles_path = os.path.join(os.getcwd(), "datasets", "articles", "excels_CS", "articles.xlsx")

In [27]:
researchers_df = pd.read_excel(researchers_path, sheet_name="Sheet")
articles_df = pd.read_excel(articles_path, sheet_name="Sheet1")

In [28]:
def assign_weights(interests):
    num_interests = len(interests)
  
    weights = []
    weight = 0.9

    for i in range(num_interests):
        weights.append((interests[i], round(weight, 3)))
        weight *= 0.8
    
    return weights

In [29]:
def compute_relevance(article_subjects, researcher_interests):
    researcher_interests_dict = dict(researcher_interests)
    matched_subjects = []
    
    # print("Researcher interests dict:", researcher_interests_dict)
    
    relevance_score = 0
    
    for subject in article_subjects:
        # print("Current article subj:", subject)
        for interest, weight in researcher_interests_dict.items():
            # print("Current researcher interest:", interest)
            if (interest.lower() == subject.lower()) or (subject.lower() == "security" and interest.lower() == "it security"):
                # print(f"match found: {interest} in {subject}")
                relevance_score += weight
                matched_subjects.append(interest)
    

    # print("Article Subjects:", article_subjects)
    # # print("Relevance Score:", relevance_score)
    # print()
    
    return relevance_score, matched_subjects


In [30]:
for index, researcher in researchers_df.iterrows():

    interests_as_string = researcher['Expertise']
    interests = ast.literal_eval(interests_as_string)
    num_interests = len(interests)

    weighted_interests = assign_weights(interests)
    # print(f"Weighted interests for {researcher['Full Name']}:", weighted_interests)


    articles_relevance = []

    for index_a, article in articles_df.iterrows():
        article_id = article['id']
        # print(f"Article id is {article_id}")
        article_subjects_as_string = article['subject_split']
        article_subjects = ast.literal_eval(article_subjects_as_string)


        relevance_score, matched_subjects = compute_relevance(article_subjects, weighted_interests)
        articles_relevance.append((article, relevance_score, matched_subjects))

        
    # Debugging..
    # print articles and their scores for  current researcher
    # print(f"\nArticles relevance for {researcher['Full Name']}:")
    
    # for article, relevance_score in articles_relevance:
    #     print(f"Article ID: {article['id']} | Title: {article['title']} | Relevance Score: {relevance_score}")
    #     print('*' * 60)



    articles_relevance.sort(key=lambda x: x[1], reverse=True)
    top_5_articles = articles_relevance[:10]

    print(f"\nTop 10 recommendations according to interests for {researcher['Full Name']}:")
    for article, relevance_score, matched_subjects in top_5_articles:
        print(f"ID: {article['id']} | URL: {article['url']} | Title: {article['title']} | Relevance Score: {relevance_score} | Matched Subjects: {matched_subjects}")
    


Top 10 recommendations according to interests for APOSTOL, Elena Simona:
ID: arXiv:2310.02113 | URL: https://arxiv.org/pdf/2310.02113.pdf | Title: FLEDGE: Ledger-based Federated Learning Resilient to Inference and  Backdoor Attacks | Relevance Score: 2.081 | Matched Subjects: ['Machine learning', 'Distributed systems', 'IT security']
ID: arXiv:2310.05269 | URL: https://arxiv.org/pdf/2310.05269.pdf | Title: Federated Learning: A Cutting-Edge Survey of the Latest Advancements and  Applications | Relevance Score: 2.081 | Matched Subjects: ['Machine learning', 'Distributed systems', 'IT security']
ID: arXiv:2310.11730 | URL: https://arxiv.org/pdf/2310.11730.pdf | Title: Federated Heterogeneous Graph Neural Network for Privacy-preserving  Recommendation | Relevance Score: 2.081 | Matched Subjects: ['Machine learning', 'Distributed systems', 'IT security']
ID: arXiv:2310.09665 | URL: https://arxiv.org/pdf/2310.09665.pdf | Title: A Blockchain-empowered Multi-Aggregator Federated Learning Arc