In [1]:
import os
from flask import Flask, request, jsonify
from flask_cors import CORS 
# import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from nltk.corpus import stopwords
import nltk
import pymysql
import numpy as np
import datetime

from dotenv import load_dotenv

In [2]:

app = Flask(__name__)
app.json.sort_keys = False
CORS(app) 

db = pymysql.connect(
    host=os.getenv('DATABASE_HOST'),
    user=os.getenv('DATABASE_USER'),
    password=os.getenv('DATABASE_PASSWORD'),
    db=os.getenv('DATABASE_DB'),
    connect_timeout=8800,
    cursorclass=pymysql.cursors.DictCursor
)

## Authors

In [11]:
# sql_query= """
#             SELECT * FROM contributors 
#            """
sql_query= """
            SELECT * FROM author 
           """

In [9]:
db.ping(reconnect=True)
cursor = db.cursor()
cursor.execute(sql_query)
data = cursor.fetchall()

In [10]:
data

[{'author_id': 1,
  'journal_id': 1,
  'first_name': 'QCU',
  'last_name': 'Journal',
  'middle_name': '',
  'public_name': None,
  'email': 'qcujournalexample@gmail.com',
  'email_verified': 'qcujournalexample@gmail.com',
  'password': '5e884898da28047151d0e56f8dc6292773603d0d6aabbdd62a11ef721d1542d8',
  'gender': '',
  'birth_date': '0000-00-00',
  'phone_number': '',
  'afiliations': None,
  'position': 'FACULTY',
  'country': None,
  'school_name': '',
  'field_of_expertise': '',
  'bio': None,
  'orc_id': '',
  'url_orc_id': '',
  'date_added': datetime.datetime(2024, 1, 8, 0, 0),
  'role': 'Admin',
  'marital_status': None,
  'status': 1,
  'privacyAgreement': 0,
  'affix': None,
  'public_private_profile': 0,
  'profile_pic': None},
 {'author_id': 159,
  'journal_id': None,
  'first_name': 'Eloisa Marie',
  'last_name': 'Baylon',
  'middle_name': 'Maglana',
  'public_name': None,
  'email': 'eloisamariebaylon@gmail.com',
  'email_verified': 'eloisamariebaylon@gmail.com',
  'pass

In [14]:
ids = [row['author_id'] for row in data]
field_of_expertises = [row['field_of_expertise'] for row in data]
bios = [row['bio'] for row in data] 



In [15]:
field_of_expertises

['',
 'Web Development',
 'Web Development',
 'Web Development',
 'Web development, Software Engineering',
 'sdfsdf',
 'Web Development, IT',
 'IT DEV',
 None,
 None,
 None,
 None,
 '',
 'Student, IT Student',
 None,
 'computer hardware',
 'Computer Hardware',
 None,
 '',
 None,
 None,
 None,
 None,
 'Math',
 'Web Developer, UI Designer, IT, Computer',
 'Developer HAAAAAAAAAAAAAAAAAAAAAAAAAAA',
 None,
 'it',
 'Umasa',
 'IT, Computer, Research',
 ', IT, COMPUTER',
 'Information Technology',
 None,
 None,
 None,
 None,
 None,
 None,
 'Math',
 'Human Settlement, Ecology, Transportation',
 None]

In [32]:

# Preprocessing
nltk.download("stopwords")
stop_words = set(stopwords.words("english"))

modified_bios = []

for n, bio in enumerate(bios):
    if bio is None:
        modified_bios.append("")
        continue
    temp = bio.lower().split(" ")
    temp = [''.join([letter for letter in word if letter.isalnum()]) for word in temp]
    temp = [word for word in temp if word not in stop_words]
    temp = ' '.join(temp)
    modified_bios.append(temp)
    
# for n, field_of_expertise in enumerate(field_of_expertises):
#     temp = field_of_expertise.lower().split(" ")
#     temp = [''.join([letter for letter in word if letter.isalnum()]) for word in temp]
#     temp = [word for word in temp if word not in stop_words]
#     temp = ' '.join(temp)
#     field_of_expertise[n] = temp

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\kimbe\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [33]:
modified_bios

['',
 '',
 '',
 'move anim na letrang salita madaling sabihin mahirap gawin lalo na kung pipilitin mong kalimutan yung taong hanggang ngayon ay mahal mo pa rin',
 'passionate bs information technology qcu student loves learning developing programs reading documentations',
 '',
 'bsit ',
 '',
 'maginoo pero medyo bastos',
 '',
 '',
 '',
 'student ',
 '4th year student quezon city university',
 '',
 '',
 '',
 '',
 '',
 '',
 'hajafdd',
 '',
 'researcher document',
 '',
 'hahaahahhaahha',
 '',
 '',
 '',
 '',
 'hello world',
 'haha',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 'researcher',
 '']

In [34]:
modified_field_of_expertises = []

for n, field_of_expertise in enumerate(field_of_expertises):
    if field_of_expertise is None:
        modified_field_of_expertises.append("")
        continue
    temp = field_of_expertise.lower().split(" ")
    temp = [''.join([letter for letter in word if letter.isalnum()]) for word in temp]
    temp = [word for word in temp if word not in stop_words]
    temp = ' '.join(temp)
    modified_field_of_expertises.append(temp)

In [35]:
modified_field_of_expertises

['',
 'web development',
 'web development',
 'web development',
 'web development software engineering',
 'sdfsdf',
 'web development',
 'dev',
 '',
 '',
 '',
 '',
 '',
 'student student',
 '',
 'computer hardware',
 'computer hardware',
 '',
 '',
 '',
 '',
 '',
 '',
 'math',
 'web developer ui designer computer',
 'developer haaaaaaaaaaaaaaaaaaaaaaaaaaa',
 '',
 '',
 'umasa',
 'computer research',
 ' computer',
 'information technology',
 '',
 '',
 '',
 '',
 '',
 '',
 'math',
 'human settlement ecology transportation',
 '']

In [38]:
joined_data = [field_of_expertise.replace(" ", "") + " " + bio if bio is not None else None for field_of_expertise, bio in zip(modified_field_of_expertises, modified_bios)]

print(joined_data)


[' ', 'webdevelopment ', 'webdevelopment ', 'webdevelopment move anim na letrang salita madaling sabihin mahirap gawin lalo na kung pipilitin mong kalimutan yung taong hanggang ngayon ay mahal mo pa rin', 'webdevelopmentsoftwareengineering passionate bs information technology qcu student loves learning developing programs reading documentations', 'sdfsdf ', 'webdevelopment bsit ', 'dev ', ' maginoo pero medyo bastos', ' ', ' ', ' ', ' student ', 'studentstudent 4th year student quezon city university', ' ', 'computerhardware ', 'computerhardware ', ' ', ' ', ' ', ' hajafdd', ' ', ' researcher document', 'math ', 'webdeveloperuidesignercomputer hahaahahhaahha', 'developerhaaaaaaaaaaaaaaaaaaaaaaaaaaa ', ' ', ' ', 'umasa ', 'computerresearch hello world', 'computer haha', 'informationtechnology ', ' ', ' ', ' ', ' ', ' ', ' ', 'math ', 'humansettlementecologytransportation researcher', ' ']


## Articles

In [42]:
sql_query= """
            SELECT * FROM article WHERE status = 1 
           """

In [43]:
db.ping(reconnect=True)
cursor = db.cursor()
cursor.execute(sql_query)
data = cursor.fetchall()


In [44]:
data

[{'article_id': 3,
  'author_id': 1,
  'journal_id': 1,
  'issues_id': 1,
  'title': 'Addressing The Trade Offs Between Labor Productivity And Employment Among Call Center Agents In Metro Manila',
  'author': 'Caroline Romero, Luz Banes, Faustino Tolentino, Gertrudes Ochoa',
  'volume': 'Volume 1',
  'privacy': 0,
  'date': 'March 2023',
  'abstract': "Call center agents work in a telephone call center where they receive calls from customers inquiring about a product or service and make outgoing calls to current and potential customers. They must assist customers in a variety of areas, including billing, service issues, and orders, log calls made or received, and enter customer data into a specialized database. Additional responsibilities, such as sales and customer retention, are performed on a computer at a workstation or cubicle by agents in a call center. The majority of the work utilizes computers, software applications, and the internet. They are essential to the existence of all

In [45]:


id = [row['article_id'] for row in data]
overviews = [row['abstract'] for row in data]
titles = [row['title'] for row in data] 



In [46]:
for n, name in enumerate(overviews):
    temp = name.lower().split(" ")
    temp = [''.join([letter for letter in word if letter.isalnum()]) for word in temp]
    temp = [word for word in temp if word not in stop_words]
    temp = ' '.join(temp)
    overviews[n] = temp
    
for n, title in enumerate(titles):
    temp = title.lower().split(" ")
    temp = [''.join([letter for letter in word if letter.isalnum()]) for word in temp]
    temp = [word for word in temp if word not in stop_words]
    temp = ' '.join(temp)
    titles[n] = temp

## Cosine Similarity

In [48]:
# Calculate cosine similarity
from sklearn.feature_extraction.text import CountVectorizer

In [180]:
def get_reviewer_recommendation(input_article):

    sql_query = f'''
        SELECT * FROM author
    '''
    
    db.ping(reconnect=True)
    with db.cursor() as cursor:
        cursor.execute(sql_query)
        data = cursor.fetchall()
        
    # newIds = [row['article_id'] for row in datas]
    # newOverviews = [row['abstract'] for row in datas]
    # newTitles = [row['title'] for row in datas]
    ids = [row['author_id'] for row in data]
    field_of_expertises = [row['field_of_expertise'] for row in data]
    bios = [row['bio'] for row in data] 

    # print(len(newIds))
    # for i in newIds:
    #     print(i)

    
    modified_bios = []
    
    for n, bio in enumerate(bios):
        if bio is None:
            modified_bios.append("")
            continue
        temp = bio.lower().split(" ")
        temp = [''.join([letter for letter in word if letter.isalnum()]) for word in temp]
        temp = [word for word in temp if word not in stop_words]
        temp = ' '.join(temp)
        modified_bios.append(temp)
        
    modified_field_of_expertises = []

    for n, field_of_expertise in enumerate(field_of_expertises):
        if field_of_expertise is None:
            modified_field_of_expertises.append("")
            continue
        parts = field_of_expertise.strip().split(",")
        processed_parts = []
        for part in parts:
            words = part.lower().strip().split() 
            processed_part = " ".join(word for word in words if word not in stop_words) 
            processed_parts.append(processed_part)
        modified_field_of_expertises.append(" ".join(processed_parts)) 
        
    
    # Joining the data
    joined_data = [field_of_expertise + " " + bio if bio is not None else None for field_of_expertise, bio in zip(modified_field_of_expertises, modified_bios)]

    # Preprocess input_article
    input_article = input_article.lower().strip().split(" ")
    input_article = [''.join([letter for letter in word if letter.isalnum()]) for word in input_article]
    input_article = [word for word in input_article if word not in stop_words]
    input_article = ' '.join(input_article)
    joined_data.append(input_article)

    # Vectorization
    vectorizer = CountVectorizer().fit(joined_data)
    vectorized_data = vectorizer.transform(joined_data).toarray()

    # Compute cosine similarity
    cosine_sim_words = cosine_similarity(vectorized_data, vectorized_data)

    # Sort similar words
    similar_words = sorted(enumerate(cosine_sim_words[-1]), key=lambda x: x[1], reverse=True)

    recommended_articles = []

    # Iterate over similar words
    for i, similarity_score in similar_words:
        if i < len(joined_data) - 1:  
            recommended_article = {key: data[i][key] for key in data[i]}
            recommended_article['score'] = similarity_score
            recommended_articles.append(recommended_article)

    return recommended_articles

In [181]:
get_reviewer_recommendation("An information-analytical software has been developed for creating digital models of structures of porous materials. The information-analytical software allows you to select a model that accurately reproduces structures of porous materials—aerogels—creating a digital model by which you can predict their properties. In add")

[{'author_id': 174,
  'journal_id': None,
  'first_name': 'Claire',
  'last_name': 'Jacob',
  'middle_name': 'Samson',
  'public_name': None,
  'email': 'clairekaye.jacob18@gmail.com',
  'email_verified': 'clairekaye.jacob18@gmail.com',
  'password': '7b1c3024f98bff03edb36591f7f70ed3dae852ac8826b8ffbe9b54b006175210',
  'gender': 'Female',
  'birth_date': datetime.date(2001, 11, 18),
  'phone_number': '',
  'afiliations': 'COLLEGE OF COMPUTER STUDIES',
  'position': 'STUDENT',
  'country': 'Philippines',
  'school_name': None,
  'field_of_expertise': 'Networking, Programming, Software Tools',
  'bio': "I'm Claire, a tech-driven student skilled in programming, networking, and digital design. With experience in diff. coding languages & tools like CPT, I'm eager to tackle real-world challenges and become a leader in technology.",
  'orc_id': '0009-0007-0892-2033',
  'url_orc_id': None,
  'date_added': datetime.datetime(2024, 2, 24, 0, 0),
  'role': 'Author',
  'marital_status': 'Single',
 

In [176]:
get_reviewer_recommendation("Education is universally recognized as the answer to socio-economic problems of the world. Hence, course selection is one of many important choices that students will make in determining future plans. It becomes one of the biggest dilemma and challenge in any student's life. Hence, the study sought to investigate the course preference among students. The study used quantitative methods of data collection. The quantitative method was done through a survey questionnaire. The respondents are BSED major in English 2B and Science 2 majority are females, their parents' highest educational attainment finished Secondary Graduates have less than P11,690, monthly tuition and consist of 4 to 6 members. Findings revealed that employability, financial status, and availability of job are three of the most considered factors in choosing a college course among BSED English and Science students. It is also concluded that the factors affecting the course preference among students are less extent and that there are no significant differences on the course preference among respondents along all its demographic profile of the students. It is hereby recommended that students must have a clear understanding of what they really wanted in the future before choosing a course in college.")

[{'author_id': 177,
  'expertise': 'Computer Hardware',
  'bio': '“Education is one thing no one can take away from you.” ',
  'similarity': 0.10380684981717495},
 {'author_id': 162,
  'expertise': 'Education, Professor, Higher Education',
  'bio': 'Professor in quezon city university major in BS in EducatiaAs a dedicated educator and scholar, Dr. [Your Name] brings a wealth of experience and expertise to the field of education. With a passion for cultivating effective learning environments and foste',
  'similarity': 0.043768810953240846},
 {'author_id': 284,
  'expertise': 'IT, Computer, Research',
  'bio': 'Hello World!',
  'similarity': 0.032826608214930636},
 {'author_id': 1, 'expertise': '', 'bio': None, 'similarity': 0.0},
 {'author_id': 159,
  'expertise': 'Web Development',
  'bio': '',
  'similarity': 0.0},
 {'author_id': 160,
  'expertise': 'Web Development',
  'bio': '',
  'similarity': 0.0},
 {'author_id': 161,
  'expertise': 'Web Development',
  'bio': '“‘Move on.’ Anim n