/
ls-geo.py
executable file
·183 lines (157 loc) · 5.88 KB
/
ls-geo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
#!/usr/bin/env python
# coding: utf-8
import MySQLdb
import requests
from time import sleep
import json
from difflib import SequenceMatcher
import pickle
import os
HOST = "localhost"
USER = "********"
PASSWORD= "*********"
DB = "***********"
USER_TABLE = "prefix_user"
GEOTARGET_TABLE = "prefix_geo_target"
GEOCOUNTRY_TABLE = "prefix_geo_country"
GEOREGION_TABLE = "prefix_geo_region"
GEOCITY_TABLE = "prefix_geo_city"
def search_db(targettable, name_en, targetid=None):
sql = u'''SELECT * FROM %s \
WHERE''' % (targettable)
if targettable==GEOREGION_TABLE:
field = 'country_id'
if targettable==GEOCITY_TABLE:
field = 'region_id'
if targetid:
sql += u''' %s=%s AND name_en="%s"''' % (field, targetid, name_en)
else:
sql += u''' name_en="%s"''' % name_en
cursor.execute(sql)
return cursor.fetchone()
def searchsynonym(targettable, targetid, name_en):
if os.path.isfile("ls-geo.dat"):
f = open("ls-geo.dat", "rb")
synonyms = pickle.load(f)
f.close()
else:
synonyms = {u"""Moskovskaya Oblast'""": u"""Moscow & Moscow Region""", u"""Moskva""":u"""Moscow & Moscow Region"""}
f = open("ls-geo.dat", "wb")
pickle.dump(synonyms, f)
f.close()
if name_en in synonyms.keys():
sname_en = synonyms[name_en]
return search_db(targettable, targetid=targetid, name_en=sname_en)
else:
print "! Cannot find target for %s in DB." % name_en
if targettable==GEOREGION_TABLE:
field = 'country_id'
if targettable==GEOCITY_TABLE:
field = 'region_id'
sql = u"""SELECT name_en FROM %s \
WHERE %s=%s""" \
% (targettable, field, targetid )
cursor.execute(sql)
row = cursor.fetchone()
ratiolist = []
while row is not None:
s = SequenceMatcher(None, name_en, row[0])
ratiolist.append({'name':row[0],'ratio':s.ratio()})
row = cursor.fetchone()
sratiolist = sorted(ratiolist, key=lambda k: k['ratio'])
for w in sratiolist:
print "%d - %s" % (sratiolist.index(w), w['name'])
try:
x = int(raw_input('Choose synonym for %s (Any symbol for None) ' % name_en))
except ValueError:
return None
sname_en = sratiolist[x]['name']
synonyms[name_en] = sname_en
f = open("ls-geo.dat", "wb")
pickle.dump(synonyms, f)
f.close()
return search_db(targettable, targetid=targetid, name_en=sname_en)
db = MySQLdb.connect(HOST,USER,PASSWORD,DB, charset="utf8")
cursor = db.cursor()
cursor.execute("""SELECT user_id, user_ip_register FROM %s""" % (USER_TABLE))
users = cursor.fetchall()
print "There are %d users in DB" % len(users)
cursor.execute("""SELECT target_id FROM %s WHERE target_type='user'""" % (GEOTARGET_TABLE))
data = cursor.fetchall()
users_in_geo = [item for sublist in data for item in sublist]
users_not_in_geo = []
for user in users:
if not(user[0] in users_in_geo):
users_not_in_geo.append({"user_id":user[0],"ip":user[1]})
print "%d of them haven\'t location info" % len(users_not_in_geo)
for user in users_not_in_geo:
if user["ip"]:
print "Trying to find location for %s" % user["ip"]
#sleep(1)
text = "http://api.sypexgeo.net/json/%s" % user["ip"]
r = requests.get(text)
if r.status_code == 200:
j = json.loads(r.text)
try:
country_en = j['country']['name_en']
except TypeError:
print "! Exception trying fetch data for %s" % user["ip"]
continue
if not country_en:
continue
country = search_db(GEOCOUNTRY_TABLE, name_en=country_en)
if country:
country_id = country[0]
country_name = country[2]
print "-- %s" % country_name
else:
print "! Cannot find country %s in DB" % country_en
continue
try:
region_en = j['region']['name_en']
except TypeError:
print "! Exception while searching for data in %s" % user["ip"]
continue
region = search_db(GEOREGION_TABLE, targetid=country_id, name_en=region_en)
if not region:
region = searchsynonym(targettable=GEOREGION_TABLE,
targetid=country_id,
name_en =region_en)
if not region:
continue
region_id = region[0]
print "-- %s" % region[3]
try:
city_en = j['city']['name_en']
except TypeError:
print "! Exception while searching for city in %s geodata" % user["ip"]
city = search_db(GEOCITY_TABLE, targetid=region_id, name_en=city_en)
if not city:
city = searchsynonym(targettable=GEOCITY_TABLE, targetid=region_id, name_en=city_en)
if not city:
continue
city_id = city[0]
print "-- %s" % city[4]
### exporting data in table
geo_type = 'city'
geo_id = city_id
target_type = 'user'
target_id = user['user_id']
#country_id
#region_id
#city_id
sql = """INSERT INTO %s \
VALUES ("%s",%s,"%s",%s,%s,%s,%s)""" \
% (GEOTARGET_TABLE,
geo_type,
geo_id,
target_type,
target_id,
country_id,
region_id,
city_id)
cursor.execute(sql)
db.commit()
else:
print "Cannot fetch info. Error code %d" % r.status_code
db.close()