Permalink
Browse files

Add config.py weibo_login.py start.py

  • Loading branch information...
1 parent 25b009f commit 717a2d9fde7c851ab2e00fa5615c8cbf4ffe0d7e @junewu committed Oct 2, 2013
Showing with 351 additions and 0 deletions.
  1. +18 −0 config.py
  2. +106 −0 start.py
  3. +227 −0 weibo_login.py
View
18 config.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (C) Aurora Wu
+# Author: Aurora Wu
+# Contact: wuxy91@gmail.com
+
+#微博URL
+weibo_url = 'http://www.weibo.com'
+
+#粉丝URL
+followers_url = 'http://www.weibo.com/%s/myfans?t=1&f=1&page=%s'
+
+#关注者URL
+following_url = 'http://www.weibo.com/%s/myfollow?t=1&page=%s'
+
+#登录用户名(邮箱)和密码
+username = 'Input your login name here'
+passwd = 'Input your login password here'
View
106 start.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (C) Aurora Wu
+# Author: Aurora Wu
+# Contact: wuxy91@gmail.com
+
+import urllib2
+import config
+import weibo_login as login
+from BeautifulSoup import BeautifulSoup
+from datetime import datetime
+
+def get_followers_list(url, fans, page):
+ response = urllib2.urlopen(url)
+ soup = BeautifulSoup(response.read())
+ #解析HTML中的粉丝列表
+ fans_list = str(soup.findAll('script')[6])
+ index = '<script>STK &amp;&amp; STK.pageletM &amp;&amp; STK.pageletM.view('
+ data = parse_script_data(fans_list, index)
+ soup = BeautifulSoup(data)
+ soup = soup.findAll('li', attrs={'class': 'clearfix S_line5', 'action-type': 'itmeClick'})
+ #将粉丝的uid和昵称写入Followers.txt
+ following_list = file('Followers.txt', 'a')
+ sentence = str(datetime.now())[:19] + "的粉丝列表,总数:【%s】,第【%s】页\n" % (fans, page)
+ following_list.write(sentence)
+ for li in soup:
+ strong = li.find('strong')
+ a = str(strong.find('a', attrs={'target': '_blank', 'class': 'W_f14 S_func1'})).split(' ')
+ uid = a[2][(a[2].index('id=')+3): (len(a[2])-1)]
+ nickname = a[3][(a[3].index('=')+2): (len(a[3])-1)]
+ sentence = "uid=【%s】, nickname=【%s\n" % (uid, nickname)
+ following_list.write(sentence)
+ following_list.write("==========================================================\n")
+ following_list.close()
+
+def get_following_list(url, following, page):
+ response = urllib2.urlopen(url)
+ soup = BeautifulSoup(response.read())
+ #解析HTML中的关注者列表
+ follow_list = str(soup.findAll('script')[6])
+ index = '<script>STK &amp;&amp; STK.pageletM &amp;&amp; STK.pageletM.view('
+ data = parse_script_data(follow_list, index)
+ soup = BeautifulSoup(data)
+ #将关注者的uid和昵称写入Following.txt
+ following_list = file('Following.txt', 'a')
+ sentence = str(datetime.now())[:19] + "的关注者列表,总数:【%s】,第【%s】页\n" % (following, page)
+ following_list.write(sentence)
+ soup = soup.findAll('div', attrs={'action-type': 'user_item', 'class': 'myfollow_list S_line2 SW_fun'})
+ for div in soup:
+ a = div.find('ul', attrs={'class': 'info'}).find('a', attrs={'target': '_blank', 'class': 'S_func1', 'node-type': 'screen_name'})
+ nickname = a.string
+ uid = str(a['usercard']).split('=')[1]
+ sentence = "uid=【%s】, nickname=【%s\n" % (str(uid), str(nickname))
+ following_list.write(sentence)
+ following_list.write("==========================================================\n")
+ following_list.close()
+
+def get_user_info(url):
+ response = urllib2.urlopen(url)
+ soup = BeautifulSoup(response.read())
+ scripts = soup.findAll('script')
+ CONFIG = str(scripts[2])
+ CONFIG = CONFIG[CONFIG.index('$CONFIG[\'islogin\'] = \'1\';'): CONFIG.index('</script>')]
+ CONFIG = CONFIG[CONFIG.index('$CONFIG[\'uid\']'): CONFIG.index('$CONFIG[\'location\'] = \'home\';')]
+ CONFIG = CONFIG.split(';')
+ uid = CONFIG[0].split(' = ')[1] #当前用户uid
+ nickname = CONFIG[1].split(' = ')[1] #当前用户昵称
+ numbers = str(scripts[14])
+ index = '<script>FM.view('
+ data = parse_script_data(numbers, index)
+ soup = BeautifulSoup(data)
+ follow = soup.find('strong', attrs={'node-type': 'follow'}) #关注者数目
+ fans = soup.find('strong', attrs={'node-type': 'fans'}) #粉丝数目
+ return uid, nickname, int(follow.string), int(fans.string)
+
+def parse_script_data(script, index):
+ offset = ')</script>'
+ index = script.index(index) + len(index)
+ offset = script.index(offset)
+ data = script[index: offset]
+ index = data.index('html') + 7
+ data = data[index: (len(data) - 2)]
+ data = data.replace("&gt;", ">")
+ data = data.replace("&lt;", "<")
+ data = data.replace("\\n", "")
+ data = data.replace("\\r", "")
+ data = data.replace("\\", "")
+ return data
+
+def main():
+ rs = login.main()
+ if rs == 0:
+ uid, nickname, following, fans = get_user_info(config.weibo_url)
+ follow_page = following/20 + 1
+ fans_page = fans/20 + 1
+ #获取关注者列表
+ for i in range(1, follow_page):
+ get_following_list((config.following_url % (uid, i)).replace('\'', ''), following, i)
+ #获取粉丝列表
+ for i in range(1, fans_page):
+ get_followers_list((config.followers_url % (uid, i)).replace('\'', ''), fans, i)
+ else:
+ return u"登录微博失败!"
+
+if __name__ == "__main__":
+ main()
View
227 weibo_login.py
@@ -0,0 +1,227 @@
+#!/usr/bin/env python
+#coding=utf8
+
+'''
+Created on Mar 18, 2013
+
+@author: yoyzhou
+'''
+
+try:
+ import os
+ import sys
+ import urllib
+ import urllib2
+ import cookielib
+ import base64
+ import re
+ import hashlib
+ import json
+ import rsa
+ import binascii
+ import config ###by aurora
+
+except ImportError:
+ print >> sys.stderr, """\
+
+There was a problem importing one of the Python modules required.
+The error leading to this problem was:
+
+%s
+
+Please install a package which provides this module, or
+verify that the module is installed correctly.
+
+It's possible that the above module doesn't match the current version of Python,
+which is:
+
+%s
+
+""" % (sys.exc_info(), sys.version)
+ sys.exit(1)
+
+
+__prog__= "weibo_login"
+__site__= "http://yoyzhou.github.com"
+__weibo__= "@pigdata"
+__version__="0.1 beta"
+
+
+def get_prelogin_status(username):
+ """
+ Perform prelogin action, get prelogin status, including servertime, nonce, rsakv, etc.
+ """
+ #prelogin_url = 'http://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinaSSOController.preloginCallBack&client=ssologin.js(v1.4.5)'
+ prelogin_url = 'http://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinaSSOController.preloginCallBack&su=' + get_user(username) + \
+ '&rsakt=mod&checkpin=1&client=ssologin.js(v1.4.5)'
+ data = urllib2.urlopen(prelogin_url).read()
+ p = re.compile('\((.*)\)')
+
+ try:
+ json_data = p.search(data).group(1)
+ data = json.loads(json_data)
+ servertime = str(data['servertime'])
+ nonce = data['nonce']
+ rsakv = data['rsakv']
+ return servertime, nonce, rsakv
+ except:
+ print 'Getting prelogin status met error!'
+ return None
+
+
+def login(username, pwd, cookie_file):
+ """"
+ Login with use name, password and cookies.
+ (1) If cookie file exists then try to load cookies;
+ (2) If no cookies found then do login
+ """
+ #If cookie file exists then try to load cookies
+ if os.path.exists(cookie_file):
+ try:
+ cookie_jar = cookielib.LWPCookieJar(cookie_file)
+ cookie_jar.load(ignore_discard=True, ignore_expires=True)
+ loaded = 1
+ except cookielib.LoadError:
+ loaded = 0
+ print 'Loading cookies error'
+
+ #install loaded cookies for urllib2
+ if loaded:
+ cookie_support = urllib2.HTTPCookieProcessor(cookie_jar)
+ opener = urllib2.build_opener(cookie_support, urllib2.HTTPHandler)
+ urllib2.install_opener(opener)
+ print 'Loading cookies success'
+ return 1
+ else:
+ return do_login(username, pwd, cookie_file)
+
+ else: #If no cookies found
+ return do_login(username, pwd, cookie_file)
+
+
+def do_login(username, pwd, cookie_file):
+ """"
+ Perform login action with use name, password and saving cookies.
+ @param username: login user name
+ @param pwd: login password
+ @param cookie_file: file name where to save cookies when login succeeded
+ """
+ #POST data per LOGIN WEIBO, these fields can be captured using httpfox extension in FIrefox
+ login_data = {
+ 'entry': 'weibo',
+ 'gateway': '1',
+ 'from': '',
+ 'savestate': '7',
+ 'userticket': '1',
+ 'pagerefer':'',
+ 'vsnf': '1',
+ 'su': '',
+ 'service': 'miniblog',
+ 'servertime': '',
+ 'nonce': '',
+ 'pwencode': 'rsa2',
+ 'rsakv': '',
+ 'sp': '',
+ 'encoding': 'UTF-8',
+ 'prelt': '45',
+ 'url': 'http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack',
+ 'returntype': 'META'
+ }
+
+ cookie_jar2 = cookielib.LWPCookieJar()
+ cookie_support2 = urllib2.HTTPCookieProcessor(cookie_jar2)
+ opener2 = urllib2.build_opener(cookie_support2, urllib2.HTTPHandler)
+ urllib2.install_opener(opener2)
+ login_url = 'http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.5)'
+ try:
+ servertime, nonce, rsakv = get_prelogin_status(username)
+ except:
+ return
+
+ #Fill POST data
+ login_data['servertime'] = servertime
+ login_data['nonce'] = nonce
+ login_data['su'] = get_user(username)
+ login_data['sp'] = get_pwd_rsa(pwd, servertime, nonce)
+ login_data['rsakv'] = rsakv
+ login_data = urllib.urlencode(login_data)
+ http_headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0'}
+ req_login = urllib2.Request(url=login_url, data=login_data, headers=http_headers)
+ result = urllib2.urlopen(req_login)
+ text = result.read()
+
+ p = re.compile('location\.replace\(\"(.*?)\"\)')
+
+ try:
+ #Search login redirection URL
+ login_url = p.search(text).group(1)
+ data = urllib2.urlopen(login_url).read()
+ #Verify login feedback, check whether result is TRUE
+ patt_feedback = 'feedBackUrlCallBack\((.*)\)'
+ p = re.compile(patt_feedback, re.MULTILINE)
+
+ feedback = p.search(data).group(1)
+
+ feedback_json = json.loads(feedback)
+ if feedback_json['result']:
+ cookie_jar2.save(cookie_file, ignore_discard=True, ignore_expires=True)
+ return 1
+ else:
+ return 0
+ except:
+ return 0
+
+
+def get_pwd_wsse(pwd, servertime, nonce):
+ """
+ Get wsse encrypted password
+ """
+ pwd1 = hashlib.sha1(pwd).hexdigest()
+ pwd2 = hashlib.sha1(pwd1).hexdigest()
+ pwd3_ = pwd2 + servertime + nonce
+ pwd3 = hashlib.sha1(pwd3_).hexdigest()
+ return pwd3
+
+def get_pwd_rsa(pwd, servertime, nonce):
+ """
+ Get rsa2 encrypted password, using RSA module from https://pypi.python.org/pypi/rsa/3.1.1, documents can be accessed at
+ http://stuvel.eu/files/python-rsa-doc/index.html
+ """
+ #n, n parameter of RSA public key, which is published by WEIBO.COM
+ #hardcoded here but you can also find it from values return from prelogin status above
+ weibo_rsa_n = 'EB2A38568661887FA180BDDB5CABD5F21C7BFD59C090CB2D245A87AC253062882729293E5506350508E7F9AA3BB77F4333231490F915F6D63C55FE2F08A49B353F444AD3993CACC02DB784ABBB8E42A9B1BBFFFB38BE18D78E87A0E41B9B8F73A928EE0CCEE1F6739884B9777E4FE9E88A1BBE495927AC4A799B3181D6442443'
+
+ #e, exponent parameter of RSA public key, WEIBO uses 0x10001, which is 65537 in Decimal
+ weibo_rsa_e = 65537
+
+ message = str(servertime) + '\t' + str(nonce) + '\n' + str(pwd)
+
+ #construct WEIBO RSA Publickey using n and e above, note that n is a hex string
+ key = rsa.PublicKey(int(weibo_rsa_n, 16), weibo_rsa_e)
+
+ #get encrypted password
+ encropy_pwd = rsa.encrypt(message, key)
+
+ #trun back encrypted password binaries to hex string
+ return binascii.b2a_hex(encropy_pwd)
+
+
+def get_user(username):
+ username_ = urllib.quote(username)
+ username = base64.encodestring(username_)[:-1]
+ return username
+
+def main():
+ username = config.username
+ pwd = config.passwd
+ cookie_file = 'weibo_login_cookies.dat'
+ if login(username, pwd, cookie_file):
+ print 'Login WEIBO succeeded'
+ return 0
+ else:
+ print 'Login WEIBO failed'
+ return -1
+
+
+if __name__ == '__main__':
+ main()

0 comments on commit 717a2d9

Please sign in to comment.