Skip to content

Commit

Permalink
Remove cache db, add ftp:// to absolute prefixes, and add support for…
Browse files Browse the repository at this point in the history
… creating

cache directory on the fly.
  • Loading branch information
lionaneesh committed Apr 4, 2012
1 parent a9c5dee commit 2096efa
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 7 deletions.
18 changes: 11 additions & 7 deletions monitor.py
Expand Up @@ -58,7 +58,8 @@ def save_all():

web_list = read_config()

# connect to sqlite database
# connect to sqlite database
directory = os.getcwd() + '/cache'
data = []
crawled = []
tracked_pages = []
Expand All @@ -68,13 +69,16 @@ def save_all():
content = ''
pages = {}
web_diff = {}
prefixes = ('http://', 'https://') # prefixes to check whether the link is an absolute link
prefixes = ('http://', 'https://', 'ftp://') # prefixes to check whether the link is an absolute link
c, con = db_connect()
db_setup_everything(c, con)
# db_get_data(crawled, to_crawl, pages, c)
db_get_data(pages, c)
if len(sys.argv) == 2:
error("No directory Provided, Using %s as default" % os.getcwd(), 1)

try:
os.stat(directory)
except:
os.makedirs(directory)

#---- main -----
signal.signal(signal.SIGINT, handle_SIGINT)

Expand Down Expand Up @@ -104,7 +108,7 @@ def save_all():
soup = BeautifulSoup(source)
content = soup.prettify()
minus = 0
add = 0
add = 0
if current_url in tracked_pages:
# See if there is any difference in the page
len_diff = 0
Expand Down Expand Up @@ -144,7 +148,7 @@ def save_all():
diff += "+ %s\n" % (a,)
add = add + 1
web_diff[current_url] = '\nNew Page Added\n%d Additions, %d Deletions\n' % (add, minus) + diff
temp = os.getcwd() + "/cache/cache.%.7f.html" % time()
temp = directory + "/cache.%.7f.html" % time()
fp = open(temp, 'w')
fp.write(content)
fp.close()
Expand Down
Binary file removed web_monitor.db
Binary file not shown.

0 comments on commit 2096efa

Please sign in to comment.