-
Notifications
You must be signed in to change notification settings - Fork 0
/
bing_main.py
104 lines (74 loc) · 3.1 KB
/
bing_main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
from __future__ import (absolute_import, division,
print_function, unicode_literals)
import itertools
import logging
from settings import settings
from csvdef import bing_input_csvdef, bing_output_csvdef
from csvdef.bing_input_csvdef import read_bing_input_data
from csvdef.bing_output_csvdef import save_bing_output_data
from py_bing_search import PyBingWebSearch
import os
from tqdm import tqdm
from utils import SCRIPT_LOCATION
log = logging.getLogger(__name__)
def bing_search(search_term):
latitude = settings['bing'].get('latitude')
longitude = settings['bing'].get('longitude')
custom_params = {
# 'Sources': "'" + settings['bing']['sources'] + "'",
'Market': "'" + settings['bing']['market'] + "'",
}
if latitude is not None and longitude is not None:
custom_params.update({
'Latitude': latitude,
'Longitude': longitude,
})
custom_params_str = "".join(["&" + k + "=" + v for k, v in iter(custom_params.items())])
bing_web = PyBingWebSearch(
settings['bing']['api_key'],
search_term,
web_only=False,
custom_params=custom_params_str,
)
# web_only is optional, but should be true to use your web only quota instead of your all purpose quota
return bing_web.search(limit=int(settings['bing']['results_limit']), format='json')
def convert_to_data_frame(query, bing_result):
convert_dict = {
bing_output_csvdef.C_QUERY: query,
bing_output_csvdef.C_ID: bing_result.id,
bing_output_csvdef.C_TITLE: bing_result.title,
bing_output_csvdef.C_DESCRIPTION: bing_result.description,
bing_output_csvdef.C_DISPLAY_URL: bing_result.url,
bing_output_csvdef.C_URL: bing_result.url,
}
return convert_dict
def process_bing_result(search_term, bing_result):
df = convert_to_data_frame(search_term, bing_result)
return df
def process_bing_input(bing_input_row):
search_term = bing_input_row.loc[bing_input_csvdef.C_QUERY]
bing_results = bing_search(search_term)
res = [process_bing_result(search_term, bing_result) for bing_result in bing_results]
return res
import argparse
parser = argparse.ArgumentParser(description='Bing API to CSV')
parser.add_argument('-i', '--input', required=True,
help="CSV file that contains search terms.")
parser.add_argument('-o', '--output', required=True,
help="CSV file to write results to.")
def main():
logging.basicConfig(
filename='{}.log'.format(os.path.basename(__file__)),
level=logging.DEBUG
)
args = parser.parse_args()
input_filename = args.input
output_filename = args.output
bing_input_data = read_bing_input_data(input_filename)
log.info("Read {} search terms.".format(len(bing_input_data)))
bing_results_list = [process_bing_input(row) for (_, row) in tqdm(list(bing_input_data.iterrows()))]
# flatten the list
bing_results = list(itertools.chain.from_iterable(bing_results_list))
save_bing_output_data(bing_results, output_filename)
if __name__ == "__main__":
main()