Skip to content

Commit

Permalink
IT's now up and running:)
Browse files Browse the repository at this point in the history
  • Loading branch information
sunuslee committed Sep 21, 2011
0 parents commit 35c4c00
Show file tree
Hide file tree
Showing 8 changed files with 1,318 additions and 0 deletions.
572 changes: 572 additions & 0 deletions api_limit

Large diffs are not rendered by default.

7 changes: 7 additions & 0 deletions cgi101.py
@@ -0,0 +1,7 @@
#!/usr/bin/python3.1
import cgi

print("Content-type: text/html\r\n\r\n")
print("<title>Reply Page</title>")
f = open("../htdocs/history/test", "r")
f.close()
298 changes: 298 additions & 0 deletions do_2people
@@ -0,0 +1,298 @@
#!/usr/bin/env python3.1
# -*- coding: UTF-8 -*-

# Wed Aug 31, 02:46 sunuslee
# sunuslee (at) gmail(dot)com
# This is my first python program , with my first cgi page and first apache server
# You can do anything you want with those files under ONLY ONE condition:
# Please Do keep those lines above
import cgitb
cgitb.enable()
import sys, os, codecs
import cgi
import sys
import re
import urllib.request
import urllib.parse
import urllib.error
import threading
import queue
import time
import fcntl
# Please use your own api key instead. e.g. :
# APIKEY = "23eeeb4347bdd26bfc6b7ee9a3b755dd"
ROOTDIR = "/home/sunus/apache/"
HOSTNAME = "http://localhost/"
LINK_DB_PEOPLE = "http://www.douban.com/people/"
APIKEY = "053caab0d0224c680fb600127066e538"
SECRET = ''
LOCK_EX = fcntl.LOCK_EX
LOCK_UN = fcntl.LOCK_UN
LOCK_NB = fcntl.LOCK_NB
flog = open("log", "a+")
people_list = []
table_movie = []
table_music = []
table_book = []
total_bytes_recv = 0
tbr_lock = threading.Lock()
class Worker(threading.Thread):

def __init__(self, work_queue):
super().__init__()
self.work_queue = work_queue
def run(self):
while True:
try:
uid , cat = self.work_queue.get()
self.process(uid, cat)
finally:
self.work_queue.task_done()

def process(self, uid, cat):
get_user_collection(uid, cat)

def try_api(api_url):
while True:
while True:
try:
fh = open("api_limit", "a+")
fcntl.flock(fh.fileno(), LOCK_EX|LOCK_NB)
break
except IOError:
flog.write("FILE's LOCKED,WAIT....\n")
fh.close()
time.sleep(1)

if(fh.tell() == 0):
fh.write("{0}\t{1},{2},{3}\n".format(api_url[0:-40], time.ctime(), 1, int(time.time())))
break;

os.lseek(fh.fileno(), -17, os.SEEK_END)
lastline = fh.read(16)
lasttime, cnt = lastline.split(',')[-1:-3:-1] # get the last two element, cnt and lasttime
if(int(time.time()) - int(lasttime) >= 65):
cnt = 1
fh.write("{0}\t{1},{2},{3}\n".format(api_url[0:-40], time.ctime(), cnt, int(time.time())))
break
elif int(cnt) == 40:
flog.write("No More API at this time {0}".format(time.ctime()))
fcntl.flock(fh.fileno(), LOCK_UN)
fh.close()
time.sleep(60)
else:
cnt = int(cnt)
cnt += 1
fh.write("{0}\t{1},{2},{3}\n".format(api_url[0:-40], time.ctime(),cnt, lasttime ))
break
fcntl.flock(fh.fileno(), LOCK_UN)
return urllib.request.urlopen(api_url)

def get_nickname(uid):
fh = try_api("http://api.douban.com/people/{0}?alt=atom&apikey={1}".format(uid,APIKEY))
Content = fh.read().decode("utf8")
fh.close()
for line in Content.splitlines():
if "</title>" in line:
nikename = line[8:-8]
return nikename

status_chs = {"wishmovie":"想看", "watchingmovie":"在看", "watchedmovie":"看过",
"wishmusic":"想听", "listeningmusic":"在听", "listenedmusic":"听过",
"wishbook":"想读", "readingbook":"在读", "readbook":"读过"}

def get_user_collection(uid, cat):
global table_movie, table_music, table_music
global total_bytes_recv
if cat == "movie":
table = table_movie
elif cat == "music":
table = table_music
else:
table = table_book
vaild_title = False
start = 1
item_dict = {} #{"item_name#1":[item_status, item_link, item_aka], "item_name#2":[item_status, item_link, item_aka], .....}
#It just START FROM 1

item_status = "<db:status>"
item_title = "<title>"
item_link = 'http://{0}.douban.com/subject'.format(cat)
item_link_re = re.compile(r'\d+')
item_aka = '<db:attribute lang="zh_CN" name="aka">'
while True:
uri = "http://api.douban.com/people/{0}/collection?cat={1}&tag=&status=&start-index={2}&max-results=50&alt=atom&apykey={3}".format(uid, cat, start,APIKEY)
fh = try_api(uri)
content = fh.read().decode("utf8")
tbr_lock.acquire()
total_bytes_recv += len(content)
tbr_lock.release()
step = 1
if "entry" in content:
for lines in content.splitlines():
if step == 1 and item_status in lines:
status = lines[13:-12]
step = 2
elif step == 2 and item_title in lines:
title = lines[10:-8]
step = 3
elif step == 3 and item_link in lines:
link = item_link_re.search(lines).group()
step = 4
aka = None
#in case the item DOESN NOT HAVE A AKA
elif step == 4 and item_aka in lines:
aka = lines[41: -15]
step = 1
elif "</entry>" in lines:
step = 1
item_dict[title] = [status_chs[status + cat], link, aka]
else:
break
start += 50
table.append([uid, item_dict])
# print("<h4>User {0:4} : count = {1} finished</h4>".format(uid, len(item_dict)))

# This function returns the uid1-uid2's match rate
# uid1 and uid2 has it's own index in table.
def get_match_rate(uid1_idx, uid2_idx, cat):
global table_movie, table_music, table_music
global f
if cat == "movie":
table = table_movie
cat_local = "电影"
elif cat == "music":
table = table_music
cat_local = "音乐"
else:
table = table_book
cat_local = "书籍"
u1_set = set()
u2_set = set()
common_set = set()
for item_name in table[uid1_idx][1].keys():
u1_set.add(item_name)

for item_name in table[uid2_idx][1].keys():
u2_set.add(item_name)

common_set = u1_set & u2_set
nikename1 = get_nickname(table[uid1_idx][0])
nikename2 = get_nickname(table[uid2_idx][0])
print('<h4><a href="{0}{1}">{3}</a>\
\
<a href="{0}{2}">{4}</a>\
都喜欢的{5}有({6}):</h4>'.format(LINK_DB_PEOPLE, table[uid1_idx][0], table[uid2_idx][0], nikename1, nikename2, cat_local, len(common_set)))
print('<table border="1" width="600px">')
print('<tr>\
<th align="left" width="300px"><h4>{0}</h4></th>\
<th align="left" width="150px"><h4><a href="{3}{4}">{1}</a></h4></th>\
<th align="left" width="150px"><h4><a href="{3}{5}">{2}</a></h4></th></tr>'.format("match", nikename1, nikename2,
LINK_DB_PEOPLE, table[uid1_idx][0], table[uid2_idx][0]))
link_db_item = "http://{0}.douban.com/subject/".format(cat)
for item_name in common_set:
name = table[uid1_idx][1][item_name][2] if table[uid1_idx][1][item_name][2] != None else item_name
print('<tr>\
<td width="300px"><h4><a href="{3}" target="_blank">{0}</a></h4></td>\
<td width="150px"><h4>{1}</h4></td>\
<td width="150px"><h4>{2}</h4></td></tr>'.\
format(name, table[uid1_idx][1][item_name][0], table[uid2_idx][1][item_name][0], link_db_item + table[uid1_idx][1][item_name][1]))
print("</table>")
rate = len(common_set)
return [uid2_idx, rate]

def var_verify_2p(user1, user2):
global f
fh_1 = None
fh_2 = None
try:
fh_1 = try_api("http://api.douban.com/people/{0}?alt=atom&apikey={1}".format(user1,APIKEY))
fh_2 = try_api("http://api.douban.com/people/{0}?alt=atom&apikey={1}".format(user2,APIKEY))
except (urllib.error.URLError, ValueError) as e:
if hasattr(e, 'reason'):
print("<h4>Cannot connected to the server</h4>")
if hasattr(e, 'code'):
print("<h4>Return code:",e.code,"error</h4>")
print("<h4>Usernames may not exsit</h4>")
finally:
if fh_1 != None:
fh_1.close()
if fh_2 != None:
fh_2.close()
if fh_1 == None or fh_2== None:
return False
return True
def get_shortenurl(long_url):
global f
data = '{"longUrl": ' + '"{0}"'.format(long_url) + "}"
try:
req = urllib.request.Request("https://www.googleapis.com/urlshortener/v1/url", data, {'Content-Type': 'application/json'})
rec = urllib.request.urlopen(req).read()
for s in rec.decode().split('"'):
if "http://goo.gl" in s:
return s
except:
print("<h4>Short URL IS UNAVAILABLE NOW!</h4>")
pass
def main():
global f
global total_bytes_recv
global people_list
form = cgi.FieldStorage()
user1 = form.getvalue("user1", "3215295")
user2 = form.getvalue("user2", "sunus")
Old_stdout = sys.stdout
while True:
try:
sys.stdout = open(ROOTDIR + "/htdocs/history/couple_{0}_{1}.html".format(user1, user2), "w", encoding = "utf8")
break
except:
os.remove(ROOTDIR + "/htdocs/history/couple_{0}_{1}.html".format(user1, user2))
#delete the file if it exsits
print("<html>")
print("<head>")
print('<meta http-equiv="content-type" content="text/html; charset=utf8" />')
print("<title>Result</title>")
print("</head>")
print("<body>")
if var_verify_2p(user1, user2) == False:
return 0
# print("<h4>user1:{0}</h4>".format(user1))
# print("<h4>user2:{0}</h4>".format(user2))
# print("<h4>Start at : {0}</h4>".format(time.asctime()))
# THREAD PART
user_queue = queue.Queue()
for i in range(3):
worker = Worker(user_queue)
worker.daemon = True
worker.start()
# THREAD PART
for people in [user1, user2]:
for cat in ["movie", "music", "book"]:
user_queue.put([people, cat])
user_queue.join()
# print("<h4>Download finished at : {0}</h4>".format(time.asctime()))
mp = []
rank = []
# 0 is uid1, and uid2_idx starts from 1
get_match_rate(0, 1, "movie")
get_match_rate(0, 1, "music")
get_match_rate(0, 1, "book")
# print("<h4>hopefully u will find she/he!</h4>")
# print("<h4>received",total_bytes_recv, "bytes</h4>")
# print("<h4>end at : {0}</h4>".format(time.asctime()))
longurl = HOSTNAME + "/history/couple_{0}_{1}.html".format(user1, user2)
short_url = get_shortenurl(longurl)
print("<h4>转发本页地址:{0}</h4>".format(short_url))
print("</body>")
print("</html>")
sys.stdout.close()
sys.stdout = Old_stdout
f = open(ROOTDIR + "/htdocs/history/couple_{0}_{1}.html".format(user1, user2), "r", encoding = "utf8")
sys.stdout = codecs.getwriter('utf8')(sys.stdout.buffer) # comment this out if you want to debug
print("Content-type:text/html; charset=UTF-8\r\n\r\n")
print(f.read())
flog.close()
f.close()

main()

0 comments on commit 35c4c00

Please sign in to comment.