Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion hamlet/theses/fixtures/theses.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
[{"model": "theses.thesis", "pk": 32600, "fields": {"title": "Induced polarization of metallic minerals : a study of its chemical basis.", "degree": "Ph.D. / Sc.D.", "url": "https://dspace.mit.edu/bitstream/1721.1/17134/2/02361749-MIT.pdf", "year": 1976, "identifier": 17134, "unextractable": true, "_vector": null, "department": [287]}}, {"model": "theses.thesis", "pk": 43703, "fields": {"title": "Clock division as a power saving strategy in a system constrained by high transmission frequency and low data rate", "degree": "Master's degree", "url": "https://dspace.mit.edu/bitstream/1721.1/33360/2/62413893-MIT.pdf", "year": 2005, "identifier": 33360, "unextractable": false, "_vector": null, "department": [275]}}, {"model": "theses.thesis", "pk": 60330, "fields": {"title": "Architecture for ultra-low power multi-channel transmitters for Body Area Networks using RF resonators", "degree": "Master's degree", "url": "https://dspace.mit.edu/bitstream/1721.1/66473/2/756456998-MIT.pdf", "year": 2011, "identifier": 66473, "unextractable": false, "_vector": null, "department": [275]}}, {"model": "theses.thesis", "pk": 76265, "fields": {"title": "Ultra low power, high sensitivity secure wake-up receiver for the Internet of Things", "degree": "Master's degree", "url": "https://dspace.mit.edu/bitstream/1721.1/111908/1/1005140973-MIT.pdf", "year": 2017, "identifier": 111908, "unextractable": false, "_vector": null, "department": [275]}}]
[{"model": "theses.thesis", "pk": 32600, "fields": {"title": "Induced polarization of metallic minerals : a study of its chemical basis.", "degree": "Ph.D. / Sc.D.", "url": "https://dspace.mit.edu/bitstream/1721.1/17134/2/02361749-MIT.pdf", "year": 1976, "identifier": 17134, "unextractable": true, "department": [287]}}, {"model": "theses.thesis", "pk": 43703, "fields": {"title": "Clock division as a power saving strategy in a system constrained by high transmission frequency and low data rate", "degree": "Master's degree", "url": "https://dspace.mit.edu/bitstream/1721.1/33360/2/62413893-MIT.pdf", "year": 2005, "identifier": 33360, "unextractable": false, "department": [275]}}, {"model": "theses.thesis", "pk": 60330, "fields": {"title": "Architecture for ultra-low power multi-channel transmitters for Body Area Networks using RF resonators", "degree": "Master's degree", "url": "https://dspace.mit.edu/bitstream/1721.1/66473/2/756456998-MIT.pdf", "year": 2011, "identifier": 66473, "unextractable": false, "department": [275]}}, {"model": "theses.thesis", "pk": 76265, "fields": {"title": "Ultra low power, high sensitivity secure wake-up receiver for the Internet of Things", "degree": "Master's degree", "url": "https://dspace.mit.edu/bitstream/1721.1/111908/1/1005140973-MIT.pdf", "year": 2017, "identifier": 111908, "unextractable": false, "department": [275]}}]
19 changes: 19 additions & 0 deletions hamlet/theses/migrations/0007_remove_thesis__vector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# -*- coding: utf-8 -*-
# Generated by Django 1.11.10 on 2018-02-06 21:14
from __future__ import unicode_literals

from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
('theses', '0006_auto_20171120_1935'),
]

operations = [
migrations.RemoveField(
model_name='thesis',
name='_vector',
),
]
17 changes: 1 addition & 16 deletions hamlet/theses/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def clean_metadata(namestring):
', Nav.E. Massachusetts Institute of Technology',
', Nav. E. Massachusetts Institute of Technology',
', M.B.A. Massachusetts Institute of Technology',
' Massachusetts Institute of Technology']
', Massachusetts Institute of Technology']

for deg in degrees:
names = [name.replace(deg, '') for name in names]
Expand Down Expand Up @@ -94,9 +94,7 @@ def clean_metadata(deptstring):

@staticmethod
def get_or_create_from_metadata(metadata):
print(metadata)
clean = Department.clean_metadata(metadata)
print(clean)
dept, _ = Department.objects.get_or_create(name=clean)
return dept

Expand Down Expand Up @@ -136,15 +134,6 @@ class Thesis(models.Model):
help_text='Will be set to True if attempts to extract text from '
'the pdf failed; such theses are not part of the neural net, '
'and cannot be used in data visualization.')
# Contains the inferred vector for a document. Always needed for getting
# neural net data on documents outside of the training set. Documents
# inside the training set use their labels where possible, but cannot
# always do so (e.g. if being compared to documents outside the training
# set).
# The inferred vector is a numpy array; we use pickle to serialize it into
# bytes before storing it here. Access via the vector property, which
# unpickles.
_vector = models.BinaryField(blank=True, null=True)

def __str__(self):
return self.title
Expand All @@ -153,10 +142,6 @@ def __str__(self):
def label(self):
return '1721.1-{}.txt'.format(self.identifier)

@cached_property
def vector(self):
return pickle.loads(self._vector)

@cached_property
def authors(self):
contribs = Contribution.objects.filter(
Expand Down
Empty file added hamlet/theses/tests/__init__.py
Empty file.
176 changes: 176 additions & 0 deletions hamlet/theses/tests/test_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
from django.test import TestCase

from ..models import Person, Department, Thesis


class PersonTestCase(TestCase):
fixtures = ['theses.json', 'departments.json', 'authors.json',
'contributions.json']

def test_clean_metadata_remove_degrees_1(self):
base = 'Kofi Annan, S.M. Massachusetts Institute of Technology'
self.assertEqual(['Kofi Annan'], Person.clean_metadata(base))

def test_clean_metadata_remove_degrees_2(self):
base = 'Somebody, M. Eng. Massachusetts Institute of Technology'
self.assertEqual(['Somebody'], Person.clean_metadata(base))

def test_clean_metadata_remove_degrees_3(self):
base = 'Ronald McNair, Ph. D. Massachusetts Institute of Technology'
self.assertEqual(['Ronald McNair'], Person.clean_metadata(base))

def test_clean_metadata_remove_degrees_4(self):
base = 'Boaty McBoatface, Nav. E. Massachusetts Institute of Technology' # noqa
self.assertEqual(['Boaty McBoatface'], Person.clean_metadata(base))

def test_clean_metadata_remove_degrees_5(self):
base = 'Boaty von Boatface, Nav.E. Massachusetts Institute of Technology' # noqa
self.assertEqual(['Boaty von Boatface'], Person.clean_metadata(base))

def test_clean_metadata_remove_degrees_6(self):
base = 'The Man, M.B.A. Massachusetts Institute of Technology'
self.assertEqual(['The Man'], Person.clean_metadata(base))

def test_clean_metadata_remove_degrees_7(self):
base = 'Buzz Aldrin, Massachusetts Institute of Technology'
self.assertEqual(['Buzz Aldrin'], Person.clean_metadata(base))

def test_clean_metadata_multiple_people(self):
base = 'Dr. Jekyll and Mr. Hyde'
self.assertEqual(['Dr. Jekyll', 'Mr. Hyde'],
Person.clean_metadata(base))


class DepartmentTestCase(TestCase):
fixtures = ['theses.json', 'departments.json', 'authors.json',
'contributions.json']

def test_clean_metadata_1(self):
base = 'Massachusetts Institute of Technology. Department of Basketweaving' # noqa
self.assertEqual('Department of Basketweaving',
Department.clean_metadata(base))

def test_clean_metadata_2(self):
base = 'Dept. of Basketweaving'
self.assertEqual('Department of Basketweaving',
Department.clean_metadata(base))

def test_get_or_create_from_metadata(self):
# A department is created.
base = 'Department of Basketweaving'
dept = Department.get_or_create_from_metadata(base)
self.assertEqual(dept.name, 'Department of Basketweaving')

# New departments are not created when we are fed metadata we already
# know about - instead we fetch an existing one.
base = 'Department of Basketweaving'
dept2 = Department.get_or_create_from_metadata(base)
self.assertEqual(dept.pk, dept2.pk)

base = 'Dept. of Basketweaving'
dept3 = Department.get_or_create_from_metadata(base)
self.assertEqual(dept.pk, dept3.pk)


class ThesisTestCase(TestCase):
fixtures = ['theses.json', 'departments.json', 'authors.json',
'contributions.json']

def test_add_single_new_person(self):
name = 'Whitfield Diffie'
thesis = Thesis.objects.first()

assert not Person.objects.filter(name=name)

thesis.add_people([name])
# Don't use thesis.authors - since it's a cached property, it may not
# have updated.
author_names = [person.name for person in thesis.authors.all()]

assert name in author_names

def test_add_multiple_new_people(self):
name1 = 'Limor Fried'
name2 = 'Shirley Jackson'
thesis = Thesis.objects.first()

assert not Person.objects.filter(name=name1)
assert not Person.objects.filter(name=name2)

thesis.add_people([name1, name2])
author_names = [person.name for person in thesis.authors.all()]

assert name1 in author_names
assert name2 in author_names

def test_add_single_known_person(self):
name = 'Irene Pepperberg'
person = Person.objects.get_or_create(name=name)
thesis = Thesis.objects.first()

assert person not in thesis.authors

thesis.add_people([name])
author_names = [person.name for person in thesis.authors.all()]

assert name in author_names

def test_add_multiple_known_people(self):
name1 = 'Oliver R. Smoot'
name2 = 'Tom Magliozzi'
name3 = 'Ray Magliozzi'
thesis = Thesis.objects.first()

oliver, _ = Person.objects.get_or_create(name=name1)
tom, _ = Person.objects.get_or_create(name=name2)
ray, _ = Person.objects.get_or_create(name=name3)

assert oliver not in thesis.authors
assert tom not in thesis.authors
assert ray not in thesis.authors

thesis.add_people([name1, name2, name3])

assert oliver in thesis.authors.all()
assert tom in thesis.authors.all()
assert ray in thesis.authors.all()

def test_add_an_advisor(self):
name = 'Ellen Spertus'
thesis = Thesis.objects.first()

assert not Person.objects.filter(name=name)

thesis.add_people([name], author=False)
advisor_names = [person.name for person in thesis.advisors.all()]

assert name in advisor_names

def test_add_department(self):
dept = 'Department of Baconology'
thesis = Thesis.objects.first()
thesis.add_departments([dept])

dept_names = [dept.name for dept in thesis.department.all()]
assert dept in dept_names

def test_add_departments(self):
dept1 = 'Department of Transwarp Technologies'
dept2 = 'Department of Marshmallow Engineering'
thesis = Thesis.objects.first()
thesis.add_departments([dept1, dept2])

dept_names = [dept.name for dept in thesis.department.all()]
assert dept1 in dept_names
assert dept2 in dept_names

def test_add_known_department(self):
name = 'Course Eleventy-One'
dept, _ = Department.objects.get_or_create(name=name)
thesis = Thesis.objects.first()

assert dept not in thesis.department.all()

thesis.add_departments([name])

assert dept in thesis.department.all()
8 changes: 3 additions & 5 deletions hamlet/theses/tests.py → hamlet/theses/tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
from django.core.urlresolvers import reverse
from django.test import Client, RequestFactory, TestCase, override_settings

from .forms import AuthorAutocompleteForm, TitleAutocompleteForm
from .models import Thesis, Person, Contribution
from . import views
from ..forms import AuthorAutocompleteForm, TitleAutocompleteForm
from ..models import Thesis, Person, Contribution
from .. import views


# See http://tech.novapost.fr/django-unit-test-your-views-en.html .
Expand Down Expand Up @@ -54,12 +54,10 @@ def test_suggestion_context(self):
assert 'suggestions' in response.context

pks = [t.pk for t in response.context['suggestions']]
assert 43703 in pks
assert 60330 in pks

content = str(response.content)

assert 'Clock division as a power saving strategy in a system constrained by high transmission frequency and low data rate' in content # noqa
assert 'Architecture for ultra-low power multi-channel transmitters for Body Area Networks using RF resonators' in content # noqa

def test_get_correct_object(self):
Expand Down