Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
tree: 735747ab20
Fetching contributors…

Cannot retrieve contributors at this time

file 98 lines (85 sloc) 4.608 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
__license__ = 'GPL v3'
__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
'''
www.nikkei.com
'''

import re
from calibre.web.feeds.recipes import BasicNewsRecipe
import mechanize
from calibre.ptempfile import PersistentTemporaryFile


class NikkeiNet_sub_shakai(BasicNewsRecipe):
    title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Social)'
    __author__ = 'Hiroshi Miura'
    description = 'News and current market affairs from Japan'
    cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
    needs_subscription = True
    oldest_article = 2
    max_articles_per_feed = 20
    language = 'ja'
    remove_javascript = False
    temp_files = []

    remove_tags_before = {'class':"cmn-section cmn-indent"}
    remove_tags = [
                       {'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
                       {'class':"cmn-article_keyword cmn-clearfix"},
                       {'class':"cmn-print_headline cmn-clearfix"},
                         ]
    remove_tags_after = {'class':"cmn-pr_list"}

    feeds = [
(u'\u793e\u4f1a', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai')
        ]

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()

        cj = mechanize.LWPCookieJar()
        br.set_cookiejar(cj)

        #br.set_debug_http(True)
        #br.set_debug_redirects(True)
        #br.set_debug_responses(True)

        if self.username is not None and self.password is not None:
            #print "----------------------------get login form--------------------------------------------"
            # open login form
            br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
            response = br.response()
            #print "----------------------------get login form---------------------------------------------"
            #print "----------------------------set login form---------------------------------------------"
            # remove disabled input which brings error on mechanize
            response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
            response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
            br.set_response(response)
            br.select_form(name='LA0010Form01')
            br['LA0010Form01:LA0010Email'] = self.username
            br['LA0010Form01:LA0010Password'] = self.password
            br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
            br.submit()
            br.response()
            #print "----------------------------send login form---------------------------------------------"
            #print "----------------------------open news main page-----------------------------------------"
            # open news site
            br.open('http://www.nikkei.com/')
            br.response()
            #print "----------------------------www.nikkei.com BODY --------------------------------------"
            #print response2.get_data()
            #print "-------------------------^^-got auto redirect form----^^--------------------------------"
            # forced redirect in default
            br.select_form(nr=0)
            br.submit()
            response3 = br.response()
            # return some cookie which should be set by Javascript
            #print response3.geturl()
            raw = response3.get_data()
            #print "---------------------------response to form --------------------------------------------"
            # grab cookie from JS and set it
            redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
            br.select_form(nr=0)

            self.temp_files.append(PersistentTemporaryFile('_fa.html'))
            self.temp_files[-1].write("#LWP-Cookies-2.0\n")

            self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
            self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
            self.temp_files[-1].close()
            cj.load(self.temp_files[-1].name)

            br.submit()

            #br.set_debug_http(False)
            #br.set_debug_redirects(False)
            #br.set_debug_responses(False)
        return br
Something went wrong with that request. Please try again.