diff --git a/chapter4/searchengine.py b/chapter4/searchengine.py index 1b99b62..4c729d5 100644 --- a/chapter4/searchengine.py +++ b/chapter4/searchengine.py @@ -76,6 +76,11 @@ def separatewords(self,text): # Return true if this url is already indexed def isindexed(self,url): + u=self.con.execute("select rowid from urllist where url='%s'" % url).fetchone() + if u!=None: + # Check if it has actually been crawled + v=self.con.execute( 'select * from wordlocation where urlid=%d' % u[0]).fetchone() + if v!=None: return True return False # Add a link between two pages