This repository has been archived by the owner on Nov 30, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 4
/
cate_mongo_specific1.py
122 lines (90 loc) · 3.77 KB
/
cate_mongo_specific1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import youtube
import google_custom_search_books
import google_custom_search_people
import google_custom_search_places
import freebase,re
from pymongo import Connection
import re
#from imdb import IMDb
#connecting with mongodb
connection = Connection()
db = connection.dataset_db_spec
film_count = 0
books_count = 0
location_count = 0
people_count = 0
#category = []
class categorization:
def __init__(self):
print "hello"
def get_category(self,kw):
category = []
count_film = 0
count_books = 0
count_location = 0
count_people = 0
threshold = 0
ismovie = 0
self.__init__()
#kw = ' '.join(kwd)
count_film += db.film_data.find({"name": {"$regex":kw, "$options":"i"}}).count()
count_film += db.film_data.find({"starring": {"$regex":kw, "$options":"i"}}).count()
count_film += db.film_data.find({"directed_by": {"$regex":kw, "$options":"i"}}).count()
count_film += db.film_data.find({"produced_by": {"$regex":kw, "$options":"i"}}).count()
count_film += db.film_data.find({"story_by": {"$regex":kw, "$options":"i"}}).count()
count_film += db.film_data.find({"written_by": {"$regex":kw, "$options":"i"}}).count()
count_film += db.film_data.find({"music": {"$regex":kw, "$options":"i"}}).count()
count_film += db.film_data.find({"film_series": {"$regex":kw, "$options":"i"}}).count()
#print "\nname count = %d"%count_film
#if(db.film_data.find({"name": {"$regex":kw, "$options":"i"}}).count()):
# ismovie = 1#if count is more than 1
# self.search_film(kw)
#count_film += db.film_data.find({"article": {"$regex":kw,"$options":"i"}}).count()
count_books += db.book_data.find({"name": {"$regex":kw, "$options":"i"}}).count()
count_books += db.book_data.find({"characters": {"$regex":kw,"$options":"i"}}).count()
count_location += db.location_data.find({"name": {"$regex":kw, "$options":"i"}}).count()
count_location += db.location_data.find({"coterminous_with": {"$regex":kw,"$options":"i"}}).count()
count_location += db.location_data.find({"containedby": {"$regex":kw,"$options":"i"}}).count()
count_location += db.location_data.find({"contains": {"$regex":kw,"$options":"i"}}).count()
count_people += db.people_data.find({"name": {"$regex":kw, "$options":"i"}}).count()
#count_people += db.people_data.find({"article": {"$regex":kw,"$options":"i"}}).count()
#threshold = (count_film+count_books+count_location+count_people)/4
print "\n\n" + kw + "\nfilm:"+ str(count_film)+"\nbook:"+str(count_books)+"\npeople:"+str(count_people)+"\nlocation:"+str(count_location)
print "CATEGORY:"
if (count_film>=1):
print " film "
category.append("film")
#search_film()
if (count_books>=1):
category.append("books")
print " books "
if (count_people>=1):
category.append("people")
print " people "
if (count_location>=1):
category.append("location")
print " location "
return category
def search_film(self,kwd):
youtube_obj = youtube.youtube()
result = youtube_obj.SearchAndPrint(kwd)
#print "\nRESULT for video \n:"
#print result
return result
def search_books(self,search_query):
print "\n\nsearch books"
bookobj = google_custom_search_books.search()
result = bookobj.search_books(search_query)
return result
def search_people(self,search_query):
print "\n\nsearch people"
peopleobj = google_custom_search_people.search()
result = peopleobj.search_people(search_query)
return result
def search_location(self,search_query):
print "\n\nsearch location"
placesobj = google_custom_search_places.search()
result = placesobj.search_places(search_query)
return result
#obj = categorization()
#obj.get_category(["steven","spielberg"])