In [58]:
import os
import ast
import pandas as pd
import psycopg2
from collections import defaultdict
from sqlalchemy import create_engine
from urllib.parse import quote_plus

nr_of_recommendations = 30

In [59]:
# connection details
db_user = 'postgres'
db_password = 'admin'
db_host = 'localhost'
db_port = '5432'
db_name = 'AOI'

# format special characters
password = quote_plus(db_password)

conn = psycopg2.connect(
    dbname=db_name,
    user=db_user,
    password=db_password,
    host=db_host,
    port=db_port
)

# SQLAlchemy engine
engine = create_engine(f'postgresql://{db_user}:{password}@{db_host}:{db_port}/{db_name}')

# PostgreSQL tables -> pandas DataFrames
researchers_query = 'SELECT "Researcher ID", "Full Name", "Expertise", "Appreciated", "Random Recommendation" FROM researchers_table'
articles_query = 'SELECT id, title, authors, url, subject_split FROM articles_table'

researchers_df = pd.read_sql(researchers_query, engine)
articles_df = pd.read_sql(articles_query, engine)

researchers_df.head(33)


Unnamed: 0,Researcher ID,Full Name,Expertise,Appreciated,Random Recommendation
0,AAG-9392-2021,"APOSTOL, Elena Simona","['Distributed systems', 'IT security', 'Parall...","arXiv:2310.02113, arXiv:2310.05269, arXiv:2310...",
1,JCE-1061-2023,"CARABAS, Costin","['Computer Science', 'Software Engineering', '...","arXiv:2310.00562, arXiv:2310.03736, arXiv:2310...",
2,AAY-5210-2020,"CHIRU, Costin Gabriel","['NLP', 'Machine learning', 'Artificial intell...","arXiv:2310.02357, arXiv:2310.14261, arXiv:2310...",
3,C-5751-2009,"CIRTOAJE, Cristina","['Liquid crystal', 'Liquid crystals, polarised...","arXiv:2310.04022, arXiv:2310.10524, arXiv:2310...",
4,O-4984-2014,"DASCALU, Mihai","['NLP', 'Discourse analysis', 'Learning analyt...","arXiv:2310.02357, arXiv:2310.00603, arXiv:2310...",
5,L-6699-2016,"DEACONESCU, Razvan","['Systems', 'Security']","arXiv:2310.03598, arXiv:2310.03994, arXiv:2310...",
6,B-5699-2011,"DOBRE, Ciprian Mihai","['Communication protocols', 'Distributed syste...","arXiv:2310.00254, arXiv:2310.04975, arXiv:2310...",
7,G-5326-2016,"FLOREA, Adina Magda","['Artificial intelligence', 'Ambient intellige...","arXiv:2310.01557, arXiv:2310.03086, arXiv:2310...",
8,D-7296-2012,"LEORDEANU, Catalin","['Distributed Systems', 'Big Data', 'Security'...","arXiv:2311.00724, arXiv:2311.04517, arXiv:2311...",
9,AAT-3386-2021,"MOCANU, Bogdan Costel","['Distributed systems', 'Peer-To-Peer', 'Netwo...","arXiv:2310.08373, arXiv:2310.14283, arXiv:2310...",


In [60]:
def find_most_matches(target_researcher_id, researchers_df):

    max_common_count = 0
    most_common_person = 'Not Found'

    target_appreciated_articles = researchers_df.loc[researchers_df['Researcher ID'] == target_researcher_id, 'Appreciated'].values[0].split(', ')
    # target_appreciated_articles.extend(researchers_df.loc[researchers_df['Researcher ID'] == target_researcher_id, 'Random Recommendation'].values[0].split(', '))

    for _, researcher in researchers_df.iterrows():
        if researcher['Researcher ID'] != target_researcher_id:

            other_appreciated_articles = researcher['Appreciated'].split(', ')
            # other_appreciated_articles.extend(researcher['Random Recommendation'].split(', '))
            # print("target ", target_appreciated_articles)
            # print("other ", other_appreciated_articles)

            common_articles = set(other_appreciated_articles) & set(target_appreciated_articles)
            common_count = len(common_articles)

            if common_count > max_common_count and common_count < nr_of_recommendations:
                max_common_count = common_count
                most_common_person = researcher['Researcher ID']

                
    return most_common_person, max_common_count

In [61]:
for index, researcher in researchers_df.iterrows():

    most_common_person, max_common_count = find_most_matches(target_researcher_id=researcher['Researcher ID'], researchers_df=researchers_df)

    if max_common_count != 0:
        found_person = researchers_df.loc[researchers_df['Researcher ID'] == most_common_person, 'Full Name'].values[0]
        print(f"For researcher {researcher['Full Name']}, best match is {found_person} with a match of {max_common_count} common articles")
    else:
        print(f"No match found for {researcher['Full Name']}")

For researcher APOSTOL, Elena Simona, best match is LEORDEANU, Catalin with a match of 26 common articles
For researcher CARABAS, Costin, best match is MOCANU, Mariana Ionela with a match of 23 common articles
For researcher CHIRU, Costin Gabriel, best match is DASCALU, Mihai with a match of 14 common articles
For researcher CIRTOAJE, Cristina, best match is FLOREA, Adina Magda with a match of 1 common articles
For researcher DASCALU, Mihai, best match is CHIRU, Costin Gabriel with a match of 14 common articles
No match found for DEACONESCU, Razvan
For researcher DOBRE, Ciprian Mihai, best match is APOSTOL, Elena Simona with a match of 11 common articles
For researcher FLOREA, Adina Magda, best match is CIRTOAJE, Cristina with a match of 1 common articles
For researcher LEORDEANU, Catalin, best match is APOSTOL, Elena Simona with a match of 26 common articles
For researcher MOCANU, Bogdan Costel, best match is NEGRU, Catalin with a match of 19 common articles
For researcher MOCANU, Mar