Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/dev' into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
Markus Borg committed Jul 24, 2022
2 parents 11a51ba + 3f6816a commit 8f0f352
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 60 deletions.
4 changes: 0 additions & 4 deletions test/test_1_nonsense.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,8 @@

import pytest
import os.path
import csv
from datetime import date
from swesesci.scholar import SSSScholar
from swesesci.affiliation import SSSAffiliation
from swesesci.scholar_reader import ScholarReader
from swesesci.scholar_miner import ScholarMiner

class TestClass_NonSense:

Expand Down
3 changes: 0 additions & 3 deletions test/test_3_twoscholars.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
import pytest
import os.path
from datetime import date
from swesesci.scholar import SSSScholar
from swesesci.affiliation import SSSAffiliation
from swesesci.scholar_reader import ScholarReader
from swesesci.scholar_miner import ScholarMiner
from swesesci.scholar_analyzer import ScholarAnalyzer
Expand All @@ -32,7 +30,6 @@ def setup_method(self):
self.miner = ScholarMiner(self.filename_prefix, self.sss_scholars, self.sss_affiliations)
self.miner.parse_scholars()
self.sss_scholars = self.miner.get_scholars()
print(self.sss_scholars)

def test_two_results(self):
# TC1: Test that DBLP returns a result
Expand Down
1 change: 1 addition & 0 deletions test/test_4_nonasciititles.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Affiliation,Mauro Caporuscio,-1,https://dblp.org/pid/c/MauroCaporuscio.xml
66 changes: 13 additions & 53 deletions test/test_4_nonasciititles.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,80 +5,40 @@
@author: Markus Borg
"""

import pytest
import os.path
from datetime import date
from swesesci.scholar import SSSScholar
from swesesci.affiliation import SSSAffiliation
from swesesci.scholar_reader import ScholarReader
from swesesci.scholar_miner import ScholarMiner
from swesesci.scholar_analyzer import ScholarAnalyzer
from swesesci.scholar_tabulator import ScholarTabulator

def string_splitter(scholar_string):
affiliation = scholar_string[0]
name = scholar_string[1]
running_number = scholar_string[2]
url = scholar_string[3]

try:
split1 = url.split("pid/")
split2 = split1[1].split(".xml")
pid = split2[0]
except IndexError:
print("Invalid format of input XML URL. (" + name + ")")
raise IndexError

return affiliation, name, running_number, pid, url

class TestClass_NonASCIITitles:

def setup_method(self):
self.scholars = []
self.affiliations = []
self.sss_scholars = []
self.sss_affiliations = []
subdirectory = "output"
try:
os.mkdir(subdirectory)
except Exception:
pass
self.filename_prefix = os.path.join(subdirectory, str(date.today()) + "_sss_")
self.test_nonascii_scholar = [("Mauro Caporuscio", "-1", "https://dblp.org/pid/c/MauroCaporuscio.xml")]

def add_sss_scholars(self, process_list, affiliation):
for person in process_list:
name = person[0]
running_number = person[1]
url = person[2]
# extract the pid from the url by substringing
try:
split1 = url.split("pid/")
split2 = split1[1].split(".xml")
pid = split2[0]
except IndexError:
print("Invalid format of input XML URL.")
return

self.scholars.append(SSSScholar(name, running_number, pid, url, affiliation, -1))
tmp_aff = SSSAffiliation(affiliation)
if tmp_aff not in self.affiliations:
tmp_aff.nbr_scholars += 1
self.affiliations.append(tmp_aff)
else:
curr = next((x for x in self.affiliations if affiliation == x.name), None)
curr.nbr_scholars += 1
reader = ScholarReader("test/test_4_nonasciititles.csv")
self.sss_scholars, self.sss_affiliations = reader.read_candidate_scholars()
self.miner = ScholarMiner(self.filename_prefix, self.sss_scholars, self.sss_affiliations)
self.miner.parse_scholars()
self.sss_scholars = self.miner.get_scholars()

def test_mauro_caporuscio(self):
self.add_sss_scholars(self.test_nonascii_scholar, "N/A")
self.miner = ScholarMiner(self.filename_prefix, self.scholars, self.affiliations)
self.miner.parse_scholars() # This involves dealing with non-ASCII characters
self.scholars = self.miner.get_scholars()
mauro = None
for scholar in self.scholars:
for scholar in self.sss_scholars:
if scholar.name == "Mauro Caporuscio":
mauro = scholar

# TC1: Test that DBLP returns a result
assert self.scholars != None
assert len(self.scholars) == 1
assert self.sss_scholars != None
assert len(self.sss_scholars) == 1

# TC2: Test that Mauro has at least 40 DBLP entries
assert mauro.dblp_entries >= 40
Expand Down Expand Up @@ -108,11 +68,11 @@ def test_mauro_caporuscio(self):
assert file_stats_csv.st_size > 0

# TC8: Test analyzer
analyzer = ScholarAnalyzer(self.filename_prefix, self.scholars, self.affiliations)
analyzer = ScholarAnalyzer(self.filename_prefix, self.sss_scholars, self.sss_affiliations)
analyzer.analyze_individual_research_interests()
assert mauro.sss_contrib >= 1.50
assert mauro.sss_rating >= 1.00

# TC10: Test tabulator
tabulator = ScholarTabulator(self.filename_prefix, self.scholars, self.affiliations)
tabulator = ScholarTabulator(self.filename_prefix, self.sss_scholars, self.sss_affiliations)
tabulator.write_tables()

0 comments on commit 8f0f352

Please sign in to comment.