Permalink
Browse files

IT's now up and running:)

  • Loading branch information...
0 parents commit 35c4c00d7a64282ce17ac9e4fb471ddcdf208453 @sunuslee committed Sep 21, 2011
Showing with 1,318 additions and 0 deletions.
  1. +572 −0 api_limit
  2. +7 −0 cgi101.py
  3. +298 −0 do_2people
  4. +371 −0 do_ci_html
  5. +23 −0 do_func.py
  6. +11 −0 hello.py
  7. +23 −0 log
  8. +13 −0 printenv
Oops, something went wrong.
@@ -0,0 +1,7 @@
+#!/usr/bin/python3.1
+import cgi
+
+print("Content-type: text/html\r\n\r\n")
+print("<title>Reply Page</title>")
+f = open("../htdocs/history/test", "r")
+f.close()
@@ -0,0 +1,298 @@
+#!/usr/bin/env python3.1
+# -*- coding: UTF-8 -*-
+
+# Wed Aug 31, 02:46 sunuslee
+# sunuslee (at) gmail(dot)com
+# This is my first python program , with my first cgi page and first apache server
+# You can do anything you want with those files under ONLY ONE condition:
+# Please Do keep those lines above
+import cgitb
+cgitb.enable()
+import sys, os, codecs
+import cgi
+import sys
+import re
+import urllib.request
+import urllib.parse
+import urllib.error
+import threading
+import queue
+import time
+import fcntl
+# Please use your own api key instead. e.g. :
+# APIKEY = "23eeeb4347bdd26bfc6b7ee9a3b755dd"
+ROOTDIR = "/home/sunus/apache/"
+HOSTNAME = "http://localhost/"
+LINK_DB_PEOPLE = "http://www.douban.com/people/"
+APIKEY = "053caab0d0224c680fb600127066e538"
+SECRET = ''
+LOCK_EX = fcntl.LOCK_EX
+LOCK_UN = fcntl.LOCK_UN
+LOCK_NB = fcntl.LOCK_NB
+flog = open("log", "a+")
+people_list = []
+table_movie = []
+table_music = []
+table_book = []
+total_bytes_recv = 0
+tbr_lock = threading.Lock()
+class Worker(threading.Thread):
+
+ def __init__(self, work_queue):
+ super().__init__()
+ self.work_queue = work_queue
+ def run(self):
+ while True:
+ try:
+ uid , cat = self.work_queue.get()
+ self.process(uid, cat)
+ finally:
+ self.work_queue.task_done()
+
+ def process(self, uid, cat):
+ get_user_collection(uid, cat)
+
+def try_api(api_url):
+ while True:
+ while True:
+ try:
+ fh = open("api_limit", "a+")
+ fcntl.flock(fh.fileno(), LOCK_EX|LOCK_NB)
+ break
+ except IOError:
+ flog.write("FILE's LOCKED,WAIT....\n")
+ fh.close()
+ time.sleep(1)
+
+ if(fh.tell() == 0):
+ fh.write("{0}\t{1},{2},{3}\n".format(api_url[0:-40], time.ctime(), 1, int(time.time())))
+ break;
+
+ os.lseek(fh.fileno(), -17, os.SEEK_END)
+ lastline = fh.read(16)
+ lasttime, cnt = lastline.split(',')[-1:-3:-1] # get the last two element, cnt and lasttime
+ if(int(time.time()) - int(lasttime) >= 65):
+ cnt = 1
+ fh.write("{0}\t{1},{2},{3}\n".format(api_url[0:-40], time.ctime(), cnt, int(time.time())))
+ break
+ elif int(cnt) == 40:
+ flog.write("No More API at this time {0}".format(time.ctime()))
+ fcntl.flock(fh.fileno(), LOCK_UN)
+ fh.close()
+ time.sleep(60)
+ else:
+ cnt = int(cnt)
+ cnt += 1
+ fh.write("{0}\t{1},{2},{3}\n".format(api_url[0:-40], time.ctime(),cnt, lasttime ))
+ break
+ fcntl.flock(fh.fileno(), LOCK_UN)
+ return urllib.request.urlopen(api_url)
+
+def get_nickname(uid):
+ fh = try_api("http://api.douban.com/people/{0}?alt=atom&apikey={1}".format(uid,APIKEY))
+ Content = fh.read().decode("utf8")
+ fh.close()
+ for line in Content.splitlines():
+ if "</title>" in line:
+ nikename = line[8:-8]
+ return nikename
+
+status_chs = {"wishmovie":"想看", "watchingmovie":"在看", "watchedmovie":"看过",
+ "wishmusic":"想听", "listeningmusic":"在听", "listenedmusic":"听过",
+ "wishbook":"想读", "readingbook":"在读", "readbook":"读过"}
+
+def get_user_collection(uid, cat):
+ global table_movie, table_music, table_music
+ global total_bytes_recv
+ if cat == "movie":
+ table = table_movie
+ elif cat == "music":
+ table = table_music
+ else:
+ table = table_book
+ vaild_title = False
+ start = 1
+ item_dict = {} #{"item_name#1":[item_status, item_link, item_aka], "item_name#2":[item_status, item_link, item_aka], .....}
+ #It just START FROM 1
+
+ item_status = "<db:status>"
+ item_title = "<title>"
+ item_link = 'http://{0}.douban.com/subject'.format(cat)
+ item_link_re = re.compile(r'\d+')
+ item_aka = '<db:attribute lang="zh_CN" name="aka">'
+ while True:
+ uri = "http://api.douban.com/people/{0}/collection?cat={1}&tag=&status=&start-index={2}&max-results=50&alt=atom&apykey={3}".format(uid, cat, start,APIKEY)
+ fh = try_api(uri)
+ content = fh.read().decode("utf8")
+ tbr_lock.acquire()
+ total_bytes_recv += len(content)
+ tbr_lock.release()
+ step = 1
+ if "entry" in content:
+ for lines in content.splitlines():
+ if step == 1 and item_status in lines:
+ status = lines[13:-12]
+ step = 2
+ elif step == 2 and item_title in lines:
+ title = lines[10:-8]
+ step = 3
+ elif step == 3 and item_link in lines:
+ link = item_link_re.search(lines).group()
+ step = 4
+ aka = None
+ #in case the item DOESN NOT HAVE A AKA
+ elif step == 4 and item_aka in lines:
+ aka = lines[41: -15]
+ step = 1
+ elif "</entry>" in lines:
+ step = 1
+ item_dict[title] = [status_chs[status + cat], link, aka]
+ else:
+ break
+ start += 50
+ table.append([uid, item_dict])
+# print("<h4>User {0:4} : count = {1} finished</h4>".format(uid, len(item_dict)))
+
+# This function returns the uid1-uid2's match rate
+# uid1 and uid2 has it's own index in table.
+def get_match_rate(uid1_idx, uid2_idx, cat):
+ global table_movie, table_music, table_music
+ global f
+ if cat == "movie":
+ table = table_movie
+ cat_local = "电影"
+ elif cat == "music":
+ table = table_music
+ cat_local = "音乐"
+ else:
+ table = table_book
+ cat_local = "书籍"
+ u1_set = set()
+ u2_set = set()
+ common_set = set()
+ for item_name in table[uid1_idx][1].keys():
+ u1_set.add(item_name)
+
+ for item_name in table[uid2_idx][1].keys():
+ u2_set.add(item_name)
+
+ common_set = u1_set & u2_set
+ nikename1 = get_nickname(table[uid1_idx][0])
+ nikename2 = get_nickname(table[uid2_idx][0])
+ print('<h4><a href="{0}{1}">{3}</a>\
+\
+ <a href="{0}{2}">{4}</a>\
+ 都喜欢的{5}有({6}):</h4>'.format(LINK_DB_PEOPLE, table[uid1_idx][0], table[uid2_idx][0], nikename1, nikename2, cat_local, len(common_set)))
+ print('<table border="1" width="600px">')
+ print('<tr>\
+ <th align="left" width="300px"><h4>{0}</h4></th>\
+ <th align="left" width="150px"><h4><a href="{3}{4}">{1}</a></h4></th>\
+ <th align="left" width="150px"><h4><a href="{3}{5}">{2}</a></h4></th></tr>'.format("match", nikename1, nikename2,
+ LINK_DB_PEOPLE, table[uid1_idx][0], table[uid2_idx][0]))
+ link_db_item = "http://{0}.douban.com/subject/".format(cat)
+ for item_name in common_set:
+ name = table[uid1_idx][1][item_name][2] if table[uid1_idx][1][item_name][2] != None else item_name
+ print('<tr>\
+ <td width="300px"><h4><a href="{3}" target="_blank">{0}</a></h4></td>\
+ <td width="150px"><h4>{1}</h4></td>\
+ <td width="150px"><h4>{2}</h4></td></tr>'.\
+ format(name, table[uid1_idx][1][item_name][0], table[uid2_idx][1][item_name][0], link_db_item + table[uid1_idx][1][item_name][1]))
+ print("</table>")
+ rate = len(common_set)
+ return [uid2_idx, rate]
+
+def var_verify_2p(user1, user2):
+ global f
+ fh_1 = None
+ fh_2 = None
+ try:
+ fh_1 = try_api("http://api.douban.com/people/{0}?alt=atom&apikey={1}".format(user1,APIKEY))
+ fh_2 = try_api("http://api.douban.com/people/{0}?alt=atom&apikey={1}".format(user2,APIKEY))
+ except (urllib.error.URLError, ValueError) as e:
+ if hasattr(e, 'reason'):
+ print("<h4>Cannot connected to the server</h4>")
+ if hasattr(e, 'code'):
+ print("<h4>Return code:",e.code,"error</h4>")
+ print("<h4>Usernames may not exsit</h4>")
+ finally:
+ if fh_1 != None:
+ fh_1.close()
+ if fh_2 != None:
+ fh_2.close()
+ if fh_1 == None or fh_2== None:
+ return False
+ return True
+def get_shortenurl(long_url):
+ global f
+ data = '{"longUrl": ' + '"{0}"'.format(long_url) + "}"
+ try:
+ req = urllib.request.Request("https://www.googleapis.com/urlshortener/v1/url", data, {'Content-Type': 'application/json'})
+ rec = urllib.request.urlopen(req).read()
+ for s in rec.decode().split('"'):
+ if "http://goo.gl" in s:
+ return s
+ except:
+ print("<h4>Short URL IS UNAVAILABLE NOW!</h4>")
+ pass
+def main():
+ global f
+ global total_bytes_recv
+ global people_list
+ form = cgi.FieldStorage()
+ user1 = form.getvalue("user1", "3215295")
+ user2 = form.getvalue("user2", "sunus")
+ Old_stdout = sys.stdout
+ while True:
+ try:
+ sys.stdout = open(ROOTDIR + "/htdocs/history/couple_{0}_{1}.html".format(user1, user2), "w", encoding = "utf8")
+ break
+ except:
+ os.remove(ROOTDIR + "/htdocs/history/couple_{0}_{1}.html".format(user1, user2))
+ #delete the file if it exsits
+ print("<html>")
+ print("<head>")
+ print('<meta http-equiv="content-type" content="text/html; charset=utf8" />')
+ print("<title>Result</title>")
+ print("</head>")
+ print("<body>")
+ if var_verify_2p(user1, user2) == False:
+ return 0
+# print("<h4>user1:{0}</h4>".format(user1))
+# print("<h4>user2:{0}</h4>".format(user2))
+# print("<h4>Start at : {0}</h4>".format(time.asctime()))
+ # THREAD PART
+ user_queue = queue.Queue()
+ for i in range(3):
+ worker = Worker(user_queue)
+ worker.daemon = True
+ worker.start()
+ # THREAD PART
+ for people in [user1, user2]:
+ for cat in ["movie", "music", "book"]:
+ user_queue.put([people, cat])
+ user_queue.join()
+# print("<h4>Download finished at : {0}</h4>".format(time.asctime()))
+ mp = []
+ rank = []
+ # 0 is uid1, and uid2_idx starts from 1
+ get_match_rate(0, 1, "movie")
+ get_match_rate(0, 1, "music")
+ get_match_rate(0, 1, "book")
+# print("<h4>hopefully u will find she/he!</h4>")
+# print("<h4>received",total_bytes_recv, "bytes</h4>")
+# print("<h4>end at : {0}</h4>".format(time.asctime()))
+ longurl = HOSTNAME + "/history/couple_{0}_{1}.html".format(user1, user2)
+ short_url = get_shortenurl(longurl)
+ print("<h4>转发本页地址:{0}</h4>".format(short_url))
+ print("</body>")
+ print("</html>")
+ sys.stdout.close()
+ sys.stdout = Old_stdout
+ f = open(ROOTDIR + "/htdocs/history/couple_{0}_{1}.html".format(user1, user2), "r", encoding = "utf8")
+ sys.stdout = codecs.getwriter('utf8')(sys.stdout.buffer) # comment this out if you want to debug
+ print("Content-type:text/html; charset=UTF-8\r\n\r\n")
+ print(f.read())
+ flog.close()
+ f.close()
+
+main()
Oops, something went wrong.

0 comments on commit 35c4c00

Please sign in to comment.