-
Notifications
You must be signed in to change notification settings - Fork 0
/
Search.py
executable file
·126 lines (96 loc) · 3.48 KB
/
Search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import pymongo
from pymongo import MongoClient
import json
def oneWordSearch(db, word, data):
results = db.find({word.lower():"term"}).sort("rating",pymongo.DESCENDING).limit(10)
if len(str(results)) == 0:
return False
count = 0
for post in results:
count +=1
print str(count)+"."
print(" URL: "+data[post["docID"]])
print(" RATING (tf-idf): "+str(post["rating"]))
print""
return True
def multiWord(db, words, data):
commonDocID = {}
words = set(words.split())
for word in words:
print ""
print "Querying word: " , word
results = db.find({word.lower():"term"})
for item in results:
key = item["docID"]
values = (item["rating"], 1)
# if commonDic contains website then increment occurence and update td-idf
if commonDocID.has_key(key):
incrementOccurence = commonDocID[key][1] + 1
updateTDIDF = commonDocID[key][0] + item["rating"]
commonDocID[key] = (updateTDIDF, incrementOccurence)
else:
commonDocID[key] = values
if len(commonDocID) == 0:
return False
# sort values in dictionary
# print top values
# for k,v in sorted(dictionary_input.items(),key=lambda x:(-x[1],x[0].lower()),reverse=False)
count = 0
for item in sorted(commonDocID.items(),key = lambda x: (x[1][1],x[1][0]), reverse = True):
count += 1
print str(count)+"."
print(" URL: "+str(data[item[0]]))
print(" RATING (tf-idf): "+str(item[1][0]))
print""
if count == 10:
break;
return True
def readingJson():
json_file = open('./WEBPAGES_RAW/bookkeeping.json')
json_str = json_file.read()
json_data = json.loads(json_str)
json_file.close()
return json_data
def SearchEngine():
userInput = ""
data = readingJson()
print("")
print (" _______________________________________________________________")
print("")
print(" Welcome to our Search Engine")
print("")
print(" ** enter 999 to quit **")
print("")
print (" _______________________________________________________________")
print("")
# connecting to the dbs only once
client = MongoClient('mongodb://localhost:27017')
db = client.project3v2
dbConnector = db.posts
while userInput.strip() != "999":
result = []
userInput = str(raw_input("SEARCH: "))
if userInput.strip() != "999":
print("SEARCHING FOR \""+str(userInput.lower())+"\" ...")
print("")
print"RESULTS FOR \"" + userInput.lower() +"\":"
print("")
hasResults = False
#handle 1 word search
if len(userInput.split(" ")) == 1:
hasResults = oneWordSearch(dbConnector, userInput, data)
#handle multijob
else:
hasResults = multiWord(dbConnector, userInput, data)
if hasResults == False:
print "No results"
#handle multie search
print (" _______________________________________________________________")
print("")
else:
print("Quitting Search \nShut Down...")
print("")
print("")
# close connection
client.close()
SearchEngine()