Permalink
Find file
Fetching contributors…
Cannot retrieve contributors at this time
executable file 294 lines (274 sloc) 14.1 KB
#!/usr/bin/env python3.1
# -*- coding: UTF-8 -*-
# Wed Aug 31, 02:46 sunuslee
# sunuslee (at) gmail(dot)com
# This is my first python program , with my first cgi page and first apache server
# You can do anything you want with those files under ONLY ONE condition:
# Please Do keep those lines above
import threading
import queue
import sys
import urllib.parse
#import httplib2
import time
import random
import os
import cache
import dl
import pickle
import gzip
import io
CSS = True
IS_LOCAL = True
ROOTDIR = "/home/sunus/apache/" if IS_LOCAL == True else "/usr/local/apache2/"
LINK_DB_PEOPLE = "http://www.douban.com/people/"
people_nr = 5
people_list = []
table = []
cat_chs = {'movie':'电影','music':'音乐','book':'书籍'}
global fh_write_html
class Worker(threading.Thread):
def __init__(self, work_queue):
super().__init__()
self.work_queue = work_queue
def run(self):
while True:
try:
uid, cat, start, que = self.work_queue.get()
tname = self.getName()
self.process(uid, cat, start, que, tname)
finally:
self.work_queue.task_done()
def process(self, uid, cat, start, que, tname):
dl.download_t(uid, cat, start, que, tname)
def get_user_collection(user, cat):
file_path = ROOTDIR + r'htdocs/cache_datas/' + 'cache_' + user[0] + '_' + cat
item_dict = cache.cache_load(file_path)
table.append([user, item_dict])
def get_user(group_uri, where, you):
where = urllib.parse.quote(where)
global fh_write_html
global people_list
global people_nr
start = 13577293
uri = "{0}/member_search?search_text={1}&start={2}".format(group_uri, where, start)
req = urllib.request.Request(uri, headers= {'Accept-encoding':'gzip'})
rec = urllib.request.urlopen(req)
compressed_data = rec.read()
compressed_fh = io.BytesIO(compressed_data)
gzipper = gzip.GzipFile(fileobj = compressed_fh);
data = gzipper.read().decode('utf8')
max = 0
for line in data.splitlines():
if str(13577294) in line: # this num will SOMEHOW INCREASE 1
max = int(line.split('-')[1].split()[0])
start = 0 if max == 0 else random.randint(0, int(max) - people_nr)
#start = 307
uri = "{0}/member_search?search_text={1}&start={2}".format(group_uri, where, start)
req = urllib.request.Request(uri, headers= {'Accept-encoding':'gzip'})
rec = urllib.request.urlopen(req)
compressed_data = rec.read()
compressed_fh = io.BytesIO(compressed_data)
gzipper = gzip.GzipFile(fileobj = compressed_fh);
contents = gzipper.read().decode('utf8')
# line is like this pattern:
# <dd><a href="http://www.douban.com/people/__UID__/">__NICKNAME__</a><br/><span class="pl">(local)</span></dd></dl><dl class="obu" ><dt><a href="http://www.douban.com/people/__UID__/" class="nbg"><img src="http://img3.douban.com/icon/sdfs.jpg" class="m_sub_img" alt="__NIKENAME__"/></a></dt>
for line in contents.splitlines():
if '''<div class='obss'><dl class="obu" >''' in line:
first_icon = None
for l in line.split('"'):
if l.endswith('.jpg'):
first_icon = l
cur_user_icon = first_icon
if '<dd><a href="http://www.douban.com/people/' in line:
if(len(people_list) == people_nr + 1 and max != 0):
break
next_user_icon = None
for l in line.split('"'):
if l.endswith('.jpg'):
next_user_icon = l
idx = line.strip().find(r'</a>')
a = line.strip()[0:idx].rpartition(r'">')
if a[0][13:-1].split('/')[-1] != you:
people_list.append([a[0][13:-1].split('/')[-1], a[2], cur_user_icon])
if next_user_icon != None:
cur_user_icon = next_user_icon
# people_list[0] is YOU!, if max == 0, that means all the users are in the first page, which can't show the number of max, so max == 0,got it from previous
# lines, so, we need to get all the users in the first pages,which the number could be (0~35).Insteading of just get the number of people_nr
if max == 0:
if len(people_list) - people_nr > 0:
start = random.randint(0, len(people_list) - people_nr)
people_list = people_list[0:1] + people_list[start + 1 : start + 1 + people_nr]
# here, continue with the max == 0 case, get the random 'start' right,then cut the size of people_list to 1 + people_list
fh_write_html.write("<h4>你的幸运数字是 *{0}*</h4>\n".format(start))
#this function return the codes for print n light starts and maxstar - n dark stars
IMG_PATH = "/imgs/"
def print_stars(n, maxstar, summary='', item_url=''):
if n == -1:
n = 0
if summary == '-1':
summary = ''
n_st = n;
n_nst = maxstar - n
left = '<a href="{0}" title="{1}">'.format(item_url, summary)
s1 = '<img src="{0}sg.png" />'.format(IMG_PATH)
s2 = '<img src="{0}sw.png" />'.format(IMG_PATH)
html_code = left + s1 * n + s2 * n_nst
if summary != '':
html_code += '<img src="{0}info.png" /></a>'.format(IMG_PATH)
else:
html_code += '<img src="{0}info1.png" /></a>'.format(IMG_PATH)
return html_code
# This function returns the uid1-uid2's match rate
# uid1 and uid2 has it's own index in table.
def get_match_rate(uid1_idx, uid2_idx, cat):
global fh_write_html
cat_local = cat_chs[cat]
u1_set = set()
u2_set = set()
common_set = set()
try:
for item_name in table[uid1_idx][1].keys():
u1_set.add(item_name)
for item_name in table[uid2_idx][1].keys():
u2_set.add(item_name)
except IndexError as e:
fh_write_html.write("<h4>Got the problem, uid1_idx = {0} uid2_idx = {1}</h4>\n".format(uid1_idx, uid2_idx))
return None
common_set = u1_set & u2_set
nickname1 = table[uid1_idx][0][1]
nickname2 = table[uid2_idx][0][1]
icon1 = table[uid1_idx][0][2]
icon2 = table[uid2_idx][0][2]
#disable icons for now
#icon1 = '/imgs/empty.png'
#icon2 = '/imgs/empty.png'
fh_write_html.write('<h4><a href="{0}{1}">{2}</a>\
\
<a href="{0}{3}">{4}</a>\
都喜欢的{5}有({6}):</h4>\n'.format(LINK_DB_PEOPLE, table[uid1_idx][0][0], nickname1,
table[uid2_idx][0][0], nickname2,
cat_local, len(common_set),))
if len(common_set) != 0:
fh_write_html.write('<table border="1" width="600px">\n')
fh_write_html.write('<tr>\
<th align="left" width="300px"><h4>{1}</h4></th>\
<th align="left" width="300px"><h4><a href="{0}{2}"><img src="{4}" /><br>{3}</a></h4></th>\
<th align="left" width="300px"><h4><a href="{0}{5}"><img src="{7}" /><br>{6}</a></h4></th></tr>\n'.format(LINK_DB_PEOPLE, "match",
table[uid1_idx][0][0], nickname1, icon1,
table[uid2_idx][0][0], nickname2, icon2))
link_db_item = "http://{0}.douban.com/subject/".format(cat)
for item_name in common_set:
name = table[uid1_idx][1][item_name][2] if table[uid1_idx][1][item_name][2] != '-1' else item_name
item_url = table[uid1_idx][1][item_name][1]
fh_write_html.write('<tr>\
<td width="300px"><h4><a href="{5}" target="_blank">{0}</a></h4></td>\
<td width="300px"><h4>{1}<br>{2}</h4></td>\
<td width="300px"><h4>{3}<br>{4}</h4></td></tr>\n'.\
format(name,
table[uid1_idx][1][item_name][0], print_stars(int(table[uid1_idx][1][item_name][3]), 5, table[uid1_idx][1][item_name][4], item_url),
table[uid2_idx][1][item_name][0], print_stars(int(table[uid2_idx][1][item_name][3]), 5, table[uid2_idx][1][item_name][4], item_url),
item_url))
fh_write_html.write("</table>\n")
rate = len(common_set)
return [uid2_idx, rate]
def get_user_icon(uid):
fh = urllib.request.urlopen('http://www.douban.com/people/' + uid)
cont = fh.read().decode('utf8')
for line in cont.splitlines():
if 'douban.com/icon' in line:
icon = line.split('"')[1]
return icon
def main():
global people_list
global fh_write_html
fa = open('./wait_queue', encoding = 'utf8')
args = fa.readline()[:-1]
args = args.split('\t')
fa.close()
you = args[0]
your_nickname = args[1]
group_url = args[2]
group_name = args[3]
if group_url.endswith('/') == False:
group_url += '/'
location = args[4]
cat = args[5]
short_url = args[7]
html_page_path = args[8]
fh_write_html = open(html_page_path, "w", encoding = "utf8")
fh_write_html.write("<html>\n")
fh_write_html.write("<head>\n")
fh_write_html.write('<meta http-equiv="content-type" content="text/html; charset=UTF-8"/>\n')
fh_write_html.write("<title>Result</title>\n")
if CSS == True:
fh_write_html.write('<link href="styles.css" rel="stylesheet" type="text/css" />\n')
fh_write_html.write('<!--[if gte IE 6.]>\n')
fh_write_html.write('<script defer type="text/javascript" src="pngfix.js"></script>\n')
fh_write_html.write('<![endif]-->\n')
fh_write_html.write("</head>\n")
fh_write_html.write("<body>\n")
you_icon = get_user_icon(you)
people_list.append([you, your_nickname, you_icon])
get_user(group_url, location, you)
if len(people_list) == 1:
fh_write_html.write("<h4>该小组好似没有来自 {0} 的豆友噢:(</h4>\n".format(location))
fh_write_html.write("<h4>转发本页地址:{0}</h4>\n".format(short_url))
fh_write_html.write("</body>\n")
fh_write_html.write("</html>\n")
print('New page:', short_url)
return 0
# Thread Part
dl_queue = queue.Queue()
for i in range(5):
worker = Worker(dl_queue)
worker.daemon = True
worker.start()
for people in people_list:
file_path = ROOTDIR + r'htdocs/cache_datas/' + 'cache_' + people[0] + '_' + cat
if os.access(file_path, os.R_OK) == False:
dl_queue.put([people[0], cat, 1, dl_queue])
dl_queue.join()
if dl.EXIT == True:
fh_write_html.write("<h4> DL.exit =" + str(dl.EXIT) + "</h4>\n")
fh_write_html.write("<h4>转发本页地址:{0}</h4>\n".format(short_url))
fh_write_html.write("</body>\n")
fh_write_html.write("</html>\n")
print('New page:', short_url)
return 0
cache.cache_save(ROOTDIR + r'htdocs/cache_datas/' + people[0] + '_' + cat + '_1')
get_user_collection(people, cat)
else:
people[1] += '(c)'
get_user_collection(people, cat)
# Thread Part
rank = []
# 0 is uid1, and uid2_idx starts from 1
people_list.pop(0)
# YOU DO NOT BELONG TO PEOPLE_LIST ANYMORE!!
total_people = len(people_list)
for i in range(1, total_people + 1):
rank_item = get_match_rate(0, i, cat)
if rank_item != None:
rank.append(rank_item)
rank.sort(key = lambda node:node[1], reverse = True)
fh_write_html.write('<h4>在 <a href="{0}">{1}</a> 为您找到了在 {2} 的豆友:</h4>\n'.format(group_url, group_name, location))
fh_write_html.write('<!-- saying:glc\t{0}\t{1}\t{2}\t -->\n'.format(group_name, location, cat_chs[cat]))
r = 1
ir = 0
for i in range(ir, total_people):
uid = table[rank[i][0]][0][0] # use to @somebody
nickname = table[rank[i][0]][0][1] # use to show on html page
icon = table[rank[i][0]][0][2] # use to show user icon
#icon = '/imgs/empty.png' # disable icon for now
fh_write_html.write('<h4>#{0:<4} <a href="{3}{4}"><img src="{5}" />{1}</a>和你有 {2} 项共同喜好</h4>\n'.format(r, nickname, rank[i][1],
LINK_DB_PEOPLE, uid, icon))
fh_write_html.write('<!-- saying:rank\t{0}\t{1}\t{2}\t -->\n'.format(r, uid, rank[i][1]))
r += 1
fh_write_html.write("<h4>转发本页地址:{0}</h4>\n".format(short_url))
fh_write_html.write("</body>\n")
fh_write_html.write("</html>\n")
fh_write_html.close()
print('Result: {0}\t{1}\t{2}\t{3}'.format(cat, you, group_url, short_url))
return
main()