Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
tree: 9e58e1dfb5
Fetching contributors…

Cannot retrieve contributors at this time

file 53 lines (46 sloc) 1.764 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
import urllib2


opener = urllib2.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')]


class Tag:
    """
This is a data type that we use for all of our interest tags.
"""
    def __init__(self, tag):
        """
Initialze the tag.
Takes on argument of type String
"""
        self.tag = tag.lower()

    def similarityTo(self, otherTag):
        """
Calculates whether or not a tag matches with another tag. We could
potentially do a multiple depth search, but for now we are keeping it
at a depth of 1 to keep testing things quick and simple.
Takes one argument of type Tag
"""
        try:
            if(self.tag == otherTag.tag): return True
            htmlBody1, htmlBody2 = WikipediaArticle(self.tag.replace(' ', '_').lower()).htmlBody, WikipediaArticle(otherTag.tag.replace(' ', '_').lower()).htmlBody
            bool1, bool2 = False, False
            for line in htmlBody1:
                line = line.strip()
                if self.tag in line:
                    bool1 = True
                    break
            if(not bool1): return False
            for line in htmlBody2:
                line = line.strip()
                if otherTag.tag in line:
                    bool2 = True
                    break
            return bool1 and bool2
        except Exception as e:
            return self.tag == otherTag.tag


class WikipediaArticle:
    def __init__(self, keyword):
        self.keyword = keyword
    @property
    def htmlBody(self):
        infile = opener.open('http://en.wikipedia.org/w/index.php?title=' +\
                        self.keyword + '&printable=yes')
        return infile
Something went wrong with that request. Please try again.