Skip to content
Fetching contributors…
Cannot retrieve contributors at this time
231 lines (171 sloc) 4.88 KB
 # A small model of one of the many things Larry Page and Sergey Brin would have been # coming up with in the early days of Google def compute_ranks(graph): d = 0.8 # damping factor numloops = 10 ranks = {} npages = len(graph) for page in graph: ranks[page] = 1.0 / npages for i in range(0, numloops): newranks = {} for page in graph: newrank = (1 - d) / npages inlinks = [] for anotherpage in graph: if page in graph[anotherpage]: inlinks.append(anotherpage) for inlink in inlinks: newrank = newrank + d*ranks[inlink]/len(graph[inlink]) newranks[page] = newrank ranks = newranks return ranks cache = { 'http://udacity.com/cs101x/urank/index.html': """

Dave's Cooking Algorithms

Here are my favorite recipies:

For more expert opinions, check out the Nickel Chef and Zinc Chef. """, 'http://udacity.com/cs101x/urank/zinc.html': """

The Zinc Chef

I learned everything I know from the Nickel Chef.

For great hummus, try this recipe. """, 'http://udacity.com/cs101x/urank/nickel.html': """

The Nickel Chef

This is the best Hummus recipe! """, 'http://udacity.com/cs101x/urank/kathleen.html': """

Kathleen's Hummus Recipe

1. Open a can of garbonzo beans.
2. Crush them in a blender.
3. Add 3 tablesppons of tahini sauce.
4. Squeeze in one lemon.
5. Add salt, pepper, and buttercream frosting to taste.
""", 'http://udacity.com/cs101x/urank/arsenic.html': """

The Arsenic Chef's World Famous Hummus Recipe

1. Kidnap the Nickel Chef.
2. Force her to make hummus for you.
""", 'http://udacity.com/cs101x/urank/hummus.html': """

Hummus Recipe

1. Go to the store and buy a container of hummus.
2. Open it.
""", } def crawl_web(seed): # returns index, graph of inlinks tocrawl = [seed] crawled = [] graph = {} # , [list of pages it links to] index = {} while tocrawl: page = tocrawl.pop() if page not in crawled: content = get_page(page) add_page_to_index(index, page, content) outlinks = get_all_links(content) graph[page] = outlinks union(tocrawl, outlinks) crawled.append(page) return index, graph def get_page(url): if url in cache: return cache[url] else: return None def get_next_target(page): start_link = page.find('>> {'http://udacity.com/cs101x/urank/kathleen.html': 0.11661866666666663, #'http://udacity.com/cs101x/urank/zinc.html': 0.038666666666666655, #'http://udacity.com/cs101x/urank/hummus.html': 0.038666666666666655, #'http://udacity.com/cs101x/urank/arsenic.html': 0.054133333333333325, #'http://udacity.com/cs101x/urank/index.html': 0.033333333333333326, #'http://udacity.com/cs101x/urank/nickel.html': 0.09743999999999997}
You can’t perform that action at this time.