Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 35c4c00
Showing
8 changed files
with
1,318 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
#!/usr/bin/python3.1 | ||
import cgi | ||
|
||
print("Content-type: text/html\r\n\r\n") | ||
print("<title>Reply Page</title>") | ||
f = open("../htdocs/history/test", "r") | ||
f.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,298 @@ | ||
#!/usr/bin/env python3.1 | ||
# -*- coding: UTF-8 -*- | ||
|
||
# Wed Aug 31, 02:46 sunuslee | ||
# sunuslee (at) gmail(dot)com | ||
# This is my first python program , with my first cgi page and first apache server | ||
# You can do anything you want with those files under ONLY ONE condition: | ||
# Please Do keep those lines above | ||
import cgitb | ||
cgitb.enable() | ||
import sys, os, codecs | ||
import cgi | ||
import sys | ||
import re | ||
import urllib.request | ||
import urllib.parse | ||
import urllib.error | ||
import threading | ||
import queue | ||
import time | ||
import fcntl | ||
# Please use your own api key instead. e.g. : | ||
# APIKEY = "23eeeb4347bdd26bfc6b7ee9a3b755dd" | ||
ROOTDIR = "/home/sunus/apache/" | ||
HOSTNAME = "http://localhost/" | ||
LINK_DB_PEOPLE = "http://www.douban.com/people/" | ||
APIKEY = "053caab0d0224c680fb600127066e538" | ||
SECRET = '' | ||
LOCK_EX = fcntl.LOCK_EX | ||
LOCK_UN = fcntl.LOCK_UN | ||
LOCK_NB = fcntl.LOCK_NB | ||
flog = open("log", "a+") | ||
people_list = [] | ||
table_movie = [] | ||
table_music = [] | ||
table_book = [] | ||
total_bytes_recv = 0 | ||
tbr_lock = threading.Lock() | ||
class Worker(threading.Thread): | ||
|
||
def __init__(self, work_queue): | ||
super().__init__() | ||
self.work_queue = work_queue | ||
def run(self): | ||
while True: | ||
try: | ||
uid , cat = self.work_queue.get() | ||
self.process(uid, cat) | ||
finally: | ||
self.work_queue.task_done() | ||
|
||
def process(self, uid, cat): | ||
get_user_collection(uid, cat) | ||
|
||
def try_api(api_url): | ||
while True: | ||
while True: | ||
try: | ||
fh = open("api_limit", "a+") | ||
fcntl.flock(fh.fileno(), LOCK_EX|LOCK_NB) | ||
break | ||
except IOError: | ||
flog.write("FILE's LOCKED,WAIT....\n") | ||
fh.close() | ||
time.sleep(1) | ||
|
||
if(fh.tell() == 0): | ||
fh.write("{0}\t{1},{2},{3}\n".format(api_url[0:-40], time.ctime(), 1, int(time.time()))) | ||
break; | ||
|
||
os.lseek(fh.fileno(), -17, os.SEEK_END) | ||
lastline = fh.read(16) | ||
lasttime, cnt = lastline.split(',')[-1:-3:-1] # get the last two element, cnt and lasttime | ||
if(int(time.time()) - int(lasttime) >= 65): | ||
cnt = 1 | ||
fh.write("{0}\t{1},{2},{3}\n".format(api_url[0:-40], time.ctime(), cnt, int(time.time()))) | ||
break | ||
elif int(cnt) == 40: | ||
flog.write("No More API at this time {0}".format(time.ctime())) | ||
fcntl.flock(fh.fileno(), LOCK_UN) | ||
fh.close() | ||
time.sleep(60) | ||
else: | ||
cnt = int(cnt) | ||
cnt += 1 | ||
fh.write("{0}\t{1},{2},{3}\n".format(api_url[0:-40], time.ctime(),cnt, lasttime )) | ||
break | ||
fcntl.flock(fh.fileno(), LOCK_UN) | ||
return urllib.request.urlopen(api_url) | ||
|
||
def get_nickname(uid): | ||
fh = try_api("http://api.douban.com/people/{0}?alt=atom&apikey={1}".format(uid,APIKEY)) | ||
Content = fh.read().decode("utf8") | ||
fh.close() | ||
for line in Content.splitlines(): | ||
if "</title>" in line: | ||
nikename = line[8:-8] | ||
return nikename | ||
|
||
status_chs = {"wishmovie":"想看", "watchingmovie":"在看", "watchedmovie":"看过", | ||
"wishmusic":"想听", "listeningmusic":"在听", "listenedmusic":"听过", | ||
"wishbook":"想读", "readingbook":"在读", "readbook":"读过"} | ||
|
||
def get_user_collection(uid, cat): | ||
global table_movie, table_music, table_music | ||
global total_bytes_recv | ||
if cat == "movie": | ||
table = table_movie | ||
elif cat == "music": | ||
table = table_music | ||
else: | ||
table = table_book | ||
vaild_title = False | ||
start = 1 | ||
item_dict = {} #{"item_name#1":[item_status, item_link, item_aka], "item_name#2":[item_status, item_link, item_aka], .....} | ||
#It just START FROM 1 | ||
|
||
item_status = "<db:status>" | ||
item_title = "<title>" | ||
item_link = 'http://{0}.douban.com/subject'.format(cat) | ||
item_link_re = re.compile(r'\d+') | ||
item_aka = '<db:attribute lang="zh_CN" name="aka">' | ||
while True: | ||
uri = "http://api.douban.com/people/{0}/collection?cat={1}&tag=&status=&start-index={2}&max-results=50&alt=atom&apykey={3}".format(uid, cat, start,APIKEY) | ||
fh = try_api(uri) | ||
content = fh.read().decode("utf8") | ||
tbr_lock.acquire() | ||
total_bytes_recv += len(content) | ||
tbr_lock.release() | ||
step = 1 | ||
if "entry" in content: | ||
for lines in content.splitlines(): | ||
if step == 1 and item_status in lines: | ||
status = lines[13:-12] | ||
step = 2 | ||
elif step == 2 and item_title in lines: | ||
title = lines[10:-8] | ||
step = 3 | ||
elif step == 3 and item_link in lines: | ||
link = item_link_re.search(lines).group() | ||
step = 4 | ||
aka = None | ||
#in case the item DOESN NOT HAVE A AKA | ||
elif step == 4 and item_aka in lines: | ||
aka = lines[41: -15] | ||
step = 1 | ||
elif "</entry>" in lines: | ||
step = 1 | ||
item_dict[title] = [status_chs[status + cat], link, aka] | ||
else: | ||
break | ||
start += 50 | ||
table.append([uid, item_dict]) | ||
# print("<h4>User {0:4} : count = {1} finished</h4>".format(uid, len(item_dict))) | ||
|
||
# This function returns the uid1-uid2's match rate | ||
# uid1 and uid2 has it's own index in table. | ||
def get_match_rate(uid1_idx, uid2_idx, cat): | ||
global table_movie, table_music, table_music | ||
global f | ||
if cat == "movie": | ||
table = table_movie | ||
cat_local = "电影" | ||
elif cat == "music": | ||
table = table_music | ||
cat_local = "音乐" | ||
else: | ||
table = table_book | ||
cat_local = "书籍" | ||
u1_set = set() | ||
u2_set = set() | ||
common_set = set() | ||
for item_name in table[uid1_idx][1].keys(): | ||
u1_set.add(item_name) | ||
|
||
for item_name in table[uid2_idx][1].keys(): | ||
u2_set.add(item_name) | ||
|
||
common_set = u1_set & u2_set | ||
nikename1 = get_nickname(table[uid1_idx][0]) | ||
nikename2 = get_nickname(table[uid2_idx][0]) | ||
print('<h4><a href="{0}{1}">{3}</a>\ | ||
和\ | ||
<a href="{0}{2}">{4}</a>\ | ||
都喜欢的{5}有({6}):</h4>'.format(LINK_DB_PEOPLE, table[uid1_idx][0], table[uid2_idx][0], nikename1, nikename2, cat_local, len(common_set))) | ||
print('<table border="1" width="600px">') | ||
print('<tr>\ | ||
<th align="left" width="300px"><h4>{0}</h4></th>\ | ||
<th align="left" width="150px"><h4><a href="{3}{4}">{1}</a></h4></th>\ | ||
<th align="left" width="150px"><h4><a href="{3}{5}">{2}</a></h4></th></tr>'.format("match", nikename1, nikename2, | ||
LINK_DB_PEOPLE, table[uid1_idx][0], table[uid2_idx][0])) | ||
link_db_item = "http://{0}.douban.com/subject/".format(cat) | ||
for item_name in common_set: | ||
name = table[uid1_idx][1][item_name][2] if table[uid1_idx][1][item_name][2] != None else item_name | ||
print('<tr>\ | ||
<td width="300px"><h4><a href="{3}" target="_blank">{0}</a></h4></td>\ | ||
<td width="150px"><h4>{1}</h4></td>\ | ||
<td width="150px"><h4>{2}</h4></td></tr>'.\ | ||
format(name, table[uid1_idx][1][item_name][0], table[uid2_idx][1][item_name][0], link_db_item + table[uid1_idx][1][item_name][1])) | ||
print("</table>") | ||
rate = len(common_set) | ||
return [uid2_idx, rate] | ||
|
||
def var_verify_2p(user1, user2): | ||
global f | ||
fh_1 = None | ||
fh_2 = None | ||
try: | ||
fh_1 = try_api("http://api.douban.com/people/{0}?alt=atom&apikey={1}".format(user1,APIKEY)) | ||
fh_2 = try_api("http://api.douban.com/people/{0}?alt=atom&apikey={1}".format(user2,APIKEY)) | ||
except (urllib.error.URLError, ValueError) as e: | ||
if hasattr(e, 'reason'): | ||
print("<h4>Cannot connected to the server</h4>") | ||
if hasattr(e, 'code'): | ||
print("<h4>Return code:",e.code,"error</h4>") | ||
print("<h4>Usernames may not exsit</h4>") | ||
finally: | ||
if fh_1 != None: | ||
fh_1.close() | ||
if fh_2 != None: | ||
fh_2.close() | ||
if fh_1 == None or fh_2== None: | ||
return False | ||
return True | ||
def get_shortenurl(long_url): | ||
global f | ||
data = '{"longUrl": ' + '"{0}"'.format(long_url) + "}" | ||
try: | ||
req = urllib.request.Request("https://www.googleapis.com/urlshortener/v1/url", data, {'Content-Type': 'application/json'}) | ||
rec = urllib.request.urlopen(req).read() | ||
for s in rec.decode().split('"'): | ||
if "http://goo.gl" in s: | ||
return s | ||
except: | ||
print("<h4>Short URL IS UNAVAILABLE NOW!</h4>") | ||
pass | ||
def main(): | ||
global f | ||
global total_bytes_recv | ||
global people_list | ||
form = cgi.FieldStorage() | ||
user1 = form.getvalue("user1", "3215295") | ||
user2 = form.getvalue("user2", "sunus") | ||
Old_stdout = sys.stdout | ||
while True: | ||
try: | ||
sys.stdout = open(ROOTDIR + "/htdocs/history/couple_{0}_{1}.html".format(user1, user2), "w", encoding = "utf8") | ||
break | ||
except: | ||
os.remove(ROOTDIR + "/htdocs/history/couple_{0}_{1}.html".format(user1, user2)) | ||
#delete the file if it exsits | ||
print("<html>") | ||
print("<head>") | ||
print('<meta http-equiv="content-type" content="text/html; charset=utf8" />') | ||
print("<title>Result</title>") | ||
print("</head>") | ||
print("<body>") | ||
if var_verify_2p(user1, user2) == False: | ||
return 0 | ||
# print("<h4>user1:{0}</h4>".format(user1)) | ||
# print("<h4>user2:{0}</h4>".format(user2)) | ||
# print("<h4>Start at : {0}</h4>".format(time.asctime())) | ||
# THREAD PART | ||
user_queue = queue.Queue() | ||
for i in range(3): | ||
worker = Worker(user_queue) | ||
worker.daemon = True | ||
worker.start() | ||
# THREAD PART | ||
for people in [user1, user2]: | ||
for cat in ["movie", "music", "book"]: | ||
user_queue.put([people, cat]) | ||
user_queue.join() | ||
# print("<h4>Download finished at : {0}</h4>".format(time.asctime())) | ||
mp = [] | ||
rank = [] | ||
# 0 is uid1, and uid2_idx starts from 1 | ||
get_match_rate(0, 1, "movie") | ||
get_match_rate(0, 1, "music") | ||
get_match_rate(0, 1, "book") | ||
# print("<h4>hopefully u will find she/he!</h4>") | ||
# print("<h4>received",total_bytes_recv, "bytes</h4>") | ||
# print("<h4>end at : {0}</h4>".format(time.asctime())) | ||
longurl = HOSTNAME + "/history/couple_{0}_{1}.html".format(user1, user2) | ||
short_url = get_shortenurl(longurl) | ||
print("<h4>转发本页地址:{0}</h4>".format(short_url)) | ||
print("</body>") | ||
print("</html>") | ||
sys.stdout.close() | ||
sys.stdout = Old_stdout | ||
f = open(ROOTDIR + "/htdocs/history/couple_{0}_{1}.html".format(user1, user2), "r", encoding = "utf8") | ||
sys.stdout = codecs.getwriter('utf8')(sys.stdout.buffer) # comment this out if you want to debug | ||
print("Content-type:text/html; charset=UTF-8\r\n\r\n") | ||
print(f.read()) | ||
flog.close() | ||
f.close() | ||
|
||
main() |
Oops, something went wrong.