IT's now up and running:)

sunuslee · Sep 21, 2011 · 35c4c00 · 35c4c00
commit 35c4c00
Show file tree

Hide file tree

Showing 8 changed files with 1,318 additions and 0 deletions.
diff --git a/api_limit b/api_limit
diff --git a/cgi101.py b/cgi101.py
@@ -0,0 +1,7 @@
+#!/usr/bin/python3.1
+import cgi  
+
+print("Content-type: text/html\r\n\r\n")
+print("<title>Reply Page</title>")
+f = open("../htdocs/history/test", "r")
+f.close()
diff --git a/do_2people b/do_2people
@@ -0,0 +1,298 @@
+#!/usr/bin/env python3.1
+# -*- coding: UTF-8 -*-
+
+# Wed Aug 31, 02:46 sunuslee
+# sunuslee (at) gmail(dot)com
+# This is my first python program , with my first cgi page and first apache server
+# You can do anything you want with those files under ONLY ONE condition:
+# Please Do keep those lines above
+import cgitb
+cgitb.enable()
+import sys, os, codecs
+import cgi
+import sys
+import re
+import urllib.request
+import urllib.parse
+import urllib.error
+import threading
+import queue
+import time
+import fcntl
+# Please use your own api key instead. e.g. :
+# APIKEY = "23eeeb4347bdd26bfc6b7ee9a3b755dd"
+ROOTDIR = "/home/sunus/apache/"
+HOSTNAME = "http://localhost/"
+LINK_DB_PEOPLE = "http://www.douban.com/people/"
+APIKEY = "053caab0d0224c680fb600127066e538"
+SECRET = ''
+LOCK_EX = fcntl.LOCK_EX
+LOCK_UN = fcntl.LOCK_UN
+LOCK_NB = fcntl.LOCK_NB
+flog = open("log", "a+")
+people_list = []
+table_movie = []
+table_music = []
+table_book  = []
+total_bytes_recv = 0
+tbr_lock = threading.Lock()
+class Worker(threading.Thread):
+
+        def __init__(self, work_queue):
+                super().__init__()
+                self.work_queue = work_queue
+        def run(self):
+                while True:
+                        try:
+                                uid , cat = self.work_queue.get()
+                                self.process(uid, cat)
+                        finally:
+                                self.work_queue.task_done()
+
+        def process(self, uid, cat):
+                get_user_collection(uid, cat)
+
+def try_api(api_url):
+        while True:
+                while True:
+                        try:
+                                fh = open("api_limit", "a+")
+                                fcntl.flock(fh.fileno(), LOCK_EX|LOCK_NB)
+                                break
+                        except IOError:
+                                flog.write("FILE's LOCKED,WAIT....\n")
+                                fh.close()
+                                time.sleep(1)
+
+                if(fh.tell() == 0):
+                        fh.write("{0}\t{1},{2},{3}\n".format(api_url[0:-40], time.ctime(), 1, int(time.time())))
+                        break;
+
+                os.lseek(fh.fileno(), -17, os.SEEK_END)
+                lastline = fh.read(16)
+                lasttime, cnt  = lastline.split(',')[-1:-3:-1] # get the last two element, cnt and lasttime 
+                if(int(time.time()) - int(lasttime) >= 65):
+                        cnt = 1
+                        fh.write("{0}\t{1},{2},{3}\n".format(api_url[0:-40], time.ctime(), cnt, int(time.time())))
+                        break
+                elif int(cnt) == 40:
+                        flog.write("No More API at this time {0}".format(time.ctime()))
+                        fcntl.flock(fh.fileno(), LOCK_UN)
+                        fh.close()
+                        time.sleep(60)
+                else:
+                        cnt = int(cnt)
+                        cnt += 1
+                        fh.write("{0}\t{1},{2},{3}\n".format(api_url[0:-40], time.ctime(),cnt, lasttime ))
+                        break
+        fcntl.flock(fh.fileno(), LOCK_UN)
+        return urllib.request.urlopen(api_url)
+
+def get_nickname(uid):
+        fh = try_api("http://api.douban.com/people/{0}?alt=atom&apikey={1}".format(uid,APIKEY))
+        Content = fh.read().decode("utf8")
+        fh.close()
+        for line in Content.splitlines():
+                if "</title>" in line:
+                        nikename = line[8:-8]
+                        return nikename
+
+status_chs = {"wishmovie":"想看",       "watchingmovie":"在看",         "watchedmovie":"看过",
+              "wishmusic":"想听",       "listeningmusic":"在听",        "listenedmusic":"听过",
+              "wishbook":"想读",        "readingbook":"在读",           "readbook":"读过"}
+
+def get_user_collection(uid, cat):
+        global table_movie, table_music, table_music
+        global total_bytes_recv
+        if cat == "movie":
+                table = table_movie
+        elif cat == "music":
+                table = table_music
+        else:
+                table = table_book
+        vaild_title = False
+        start = 1
+        item_dict = {}  #{"item_name#1":[item_status, item_link, item_aka], "item_name#2":[item_status, item_link, item_aka], .....}
+        #It just START FROM 1
+
+        item_status = "<db:status>"
+        item_title = "<title>"
+        item_link = 'http://{0}.douban.com/subject'.format(cat)
+        item_link_re = re.compile(r'\d+')
+        item_aka = '<db:attribute lang="zh_CN" name="aka">'
+        while True:
+                uri = "http://api.douban.com/people/{0}/collection?cat={1}&tag=&status=&start-index={2}&max-results=50&alt=atom&apykey={3}".format(uid, cat, start,APIKEY)
+                fh = try_api(uri)
+                content = fh.read().decode("utf8")
+                tbr_lock.acquire()
+                total_bytes_recv += len(content)
+                tbr_lock.release()
+                step = 1
+                if "entry" in content:
+                        for lines in content.splitlines():
+                                if step == 1 and item_status in lines:
+                                        status = lines[13:-12]
+                                        step = 2
+                                elif step == 2 and item_title in lines:
+                                        title = lines[10:-8]
+                                        step = 3
+                                elif step == 3 and item_link in lines:
+                                        link = item_link_re.search(lines).group()
+                                        step = 4
+                                        aka = None
+                                #in case the item DOESN NOT HAVE A AKA
+                                elif step == 4 and item_aka in lines:
+                                        aka = lines[41: -15]
+                                        step = 1
+                                elif "</entry>" in lines:
+                                        step = 1
+                                        item_dict[title] = [status_chs[status + cat], link, aka]
+                else:
+                        break
+                start += 50
+        table.append([uid, item_dict])
+#        print("<h4>User {0:4} : count = {1} finished</h4>".format(uid, len(item_dict)))
+
+# This function returns the uid1-uid2's match rate
+# uid1 and uid2 has it's own index in table.
+def get_match_rate(uid1_idx, uid2_idx, cat):
+        global table_movie, table_music, table_music
+        global f
+        if cat == "movie":
+                table = table_movie
+                cat_local = "电影"
+        elif cat == "music":
+                table = table_music
+                cat_local = "音乐"
+        else:
+                table = table_book
+                cat_local = "书籍"
+        u1_set = set()
+        u2_set = set()
+        common_set = set()
+        for item_name in table[uid1_idx][1].keys():
+                u1_set.add(item_name)
+
+        for item_name in table[uid2_idx][1].keys():
+                u2_set.add(item_name)
+
+        common_set = u1_set & u2_set
+        nikename1 = get_nickname(table[uid1_idx][0])
+        nikename2 = get_nickname(table[uid2_idx][0])
+        print('<h4><a href="{0}{1}">{3}</a>\
+              和\
+              <a href="{0}{2}">{4}</a>\
+              都喜欢的{5}有({6}):</h4>'.format(LINK_DB_PEOPLE, table[uid1_idx][0], table[uid2_idx][0], nikename1, nikename2, cat_local, len(common_set)))
+        print('<table border="1" width="600px">')
+        print('<tr>\
+        <th align="left" width="300px"><h4>{0}</h4></th>\
+        <th align="left" width="150px"><h4><a href="{3}{4}">{1}</a></h4></th>\
+        <th align="left" width="150px"><h4><a href="{3}{5}">{2}</a></h4></th></tr>'.format("match", nikename1, nikename2, 
+                                                                                           LINK_DB_PEOPLE, table[uid1_idx][0], table[uid2_idx][0]))
+        link_db_item = "http://{0}.douban.com/subject/".format(cat)
+        for item_name in common_set:
+                name = table[uid1_idx][1][item_name][2] if table[uid1_idx][1][item_name][2] != None else item_name
+                print('<tr>\
+                      <td width="300px"><h4><a href="{3}" target="_blank">{0}</a></h4></td>\
+                      <td width="150px"><h4>{1}</h4></td>\
+                      <td width="150px"><h4>{2}</h4></td></tr>'.\
+                      format(name, table[uid1_idx][1][item_name][0], table[uid2_idx][1][item_name][0], link_db_item + table[uid1_idx][1][item_name][1]))
+        print("</table>")
+        rate = len(common_set)
+        return [uid2_idx, rate]
+
+def var_verify_2p(user1, user2):
+        global f
+        fh_1 = None
+        fh_2 = None
+        try:
+                fh_1 = try_api("http://api.douban.com/people/{0}?alt=atom&apikey={1}".format(user1,APIKEY))
+                fh_2 = try_api("http://api.douban.com/people/{0}?alt=atom&apikey={1}".format(user2,APIKEY))
+        except (urllib.error.URLError, ValueError) as e:
+                if hasattr(e, 'reason'):
+                        print("<h4>Cannot connected to the server</h4>")
+                if hasattr(e, 'code'):
+                        print("<h4>Return code:",e.code,"error</h4>")
+                        print("<h4>Usernames may not exsit</h4>")
+        finally:
+                if fh_1 != None:
+                        fh_1.close()
+                if fh_2 != None:
+                        fh_2.close()
+                if fh_1 == None or fh_2== None:
+                        return False
+        return True
+def get_shortenurl(long_url):
+        global f
+        data = '{"longUrl": ' + '"{0}"'.format(long_url) + "}"
+        try:
+                req = urllib.request.Request("https://www.googleapis.com/urlshortener/v1/url", data, {'Content-Type': 'application/json'})
+                rec = urllib.request.urlopen(req).read()
+                for s in rec.decode().split('"'):
+                        if "http://goo.gl" in s:
+                                return s
+        except:
+                print("<h4>Short URL IS UNAVAILABLE NOW!</h4>")
+                pass
+def main():
+        global f
+        global total_bytes_recv
+        global people_list
+        form = cgi.FieldStorage()
+        user1 = form.getvalue("user1", "3215295")
+        user2 = form.getvalue("user2", "sunus")
+        Old_stdout = sys.stdout
+        while True:
+                try:
+                        sys.stdout = open(ROOTDIR + "/htdocs/history/couple_{0}_{1}.html".format(user1, user2), "w", encoding = "utf8")
+                        break
+                except:
+                        os.remove(ROOTDIR + "/htdocs/history/couple_{0}_{1}.html".format(user1, user2))
+                        #delete the file if it exsits
+        print("<html>")
+        print("<head>")
+        print('<meta http-equiv="content-type" content="text/html; charset=utf8" />')
+        print("<title>Result</title>")
+        print("</head>")
+        print("<body>")
+        if var_verify_2p(user1, user2) == False:
+                return 0
+#                print("<h4>user1:{0}</h4>".format(user1))
+#                print("<h4>user2:{0}</h4>".format(user2))
+#        print("<h4>Start at : {0}</h4>".format(time.asctime()))
+        # THREAD PART
+        user_queue = queue.Queue()
+        for i in range(3):
+                worker = Worker(user_queue)
+                worker.daemon = True
+                worker.start()
+        # THREAD PART
+        for people in [user1, user2]:
+                for cat in ["movie", "music", "book"]:
+                        user_queue.put([people, cat])
+        user_queue.join()
+#        print("<h4>Download finished at : {0}</h4>".format(time.asctime()))
+        mp = []
+        rank = []
+        # 0 is uid1, and uid2_idx starts from 1
+        get_match_rate(0, 1, "movie")
+        get_match_rate(0, 1, "music")
+        get_match_rate(0, 1, "book")
+#        print("<h4>hopefully u will find she/he!</h4>")
+#        print("<h4>received",total_bytes_recv, "bytes</h4>")
+#        print("<h4>end at : {0}</h4>".format(time.asctime()))
+        longurl = HOSTNAME + "/history/couple_{0}_{1}.html".format(user1, user2)
+        short_url = get_shortenurl(longurl)
+        print("<h4>转发本页地址:{0}</h4>".format(short_url))
+        print("</body>")
+        print("</html>")
+        sys.stdout.close()
+        sys.stdout = Old_stdout
+        f = open(ROOTDIR + "/htdocs/history/couple_{0}_{1}.html".format(user1, user2), "r", encoding = "utf8")
+        sys.stdout = codecs.getwriter('utf8')(sys.stdout.buffer) # comment this out if you want to debug
+        print("Content-type:text/html; charset=UTF-8\r\n\r\n")
+        print(f.read())
+        flog.close()
+        f.close()
+
+main()