|
| 1 | +<!doctype html> |
| 2 | +<!--[if lt IE 7]><html class="no-js lt-ie9 lt-ie8 lt-ie7" lang="en"> <![endif]--> |
| 3 | +<!--[if (IE 7)&!(IEMobile)]><html class="no-js lt-ie9 lt-ie8" lang="en"><![endif]--> |
| 4 | +<!--[if (IE 8)&!(IEMobile)]><html class="no-js lt-ie9" lang="en"><![endif]--> |
| 5 | +<!--[if gt IE 8]><!--> <html class="no-js" lang="en"><!--<![endif]--> |
| 6 | +<head> |
| 7 | +<meta charset="utf-8"> |
| 8 | +<title>如何修改结巴分词中jieba_cache文件的路径 – 叶子</title> |
| 9 | +<meta name="description" content=""> |
| 10 | +<meta name="keywords" content="结巴分词"> |
| 11 | + |
| 12 | + |
| 13 | + |
| 14 | +<!-- Open Graph --> |
| 15 | +<meta property="og:locale" content="en_US"> |
| 16 | +<meta property="og:type" content="article"> |
| 17 | +<meta property="og:title" content="如何修改结巴分词中jieba_cache文件的路径"> |
| 18 | +<meta property="og:description" content=""> |
| 19 | +<meta property="og:url" content="http://localhost:4000/articles/%E5%A6%82%E4%BD%95%E4%BF%AE%E6%94%B9%E7%BB%93%E5%B7%B4%E5%88%86%E8%AF%8D%E4%B8%ADjieba_cache%E6%96%87%E4%BB%B6%E7%9A%84%E8%B7%AF%E5%BE%84/"> |
| 20 | +<meta property="og:site_name" content="叶子"> |
| 21 | + |
| 22 | + |
| 23 | + |
| 24 | + |
| 25 | + |
| 26 | +<link rel="canonical" href="http://localhost:4000/articles/%E5%A6%82%E4%BD%95%E4%BF%AE%E6%94%B9%E7%BB%93%E5%B7%B4%E5%88%86%E8%AF%8D%E4%B8%ADjieba_cache%E6%96%87%E4%BB%B6%E7%9A%84%E8%B7%AF%E5%BE%84/"> |
| 27 | +<link href="http://localhost:4000/feed.xml" type="application/atom+xml" rel="alternate" title="叶子 Feed"> |
| 28 | + |
| 29 | + |
| 30 | +<!-- http://t.co/dKP3o1e --> |
| 31 | +<meta name="HandheldFriendly" content="True"> |
| 32 | +<meta name="MobileOptimized" content="320"> |
| 33 | +<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| 34 | + |
| 35 | +<!-- For all browsers --> |
| 36 | +<link rel="stylesheet" href="http://localhost:4000/assets/css/main.min.css"> |
| 37 | +<!-- Webfonts --> |
| 38 | +<!--<script src="//use.edgefonts.net/source-sans-pro:n2,i2,n3,i3,n4,i4,n6,i6,n7,i7,n9,i9;source-code-pro:n4,n7;volkhov.js"></script>--> |
| 39 | + |
| 40 | +<meta http-equiv="cleartype" content="on"> |
| 41 | + |
| 42 | +<!-- Load Modernizr --> |
| 43 | +<script src="http://localhost:4000/assets/js/vendor/modernizr-2.6.2.custom.min.js"></script> |
| 44 | + |
| 45 | +<!-- Icons --> |
| 46 | +<!-- 16x16 --> |
| 47 | +<link rel="shortcut icon" href="http://localhost:4000/favicon.ico"> |
| 48 | +<!-- 32x32 --> |
| 49 | +<link rel="shortcut icon" href="http://localhost:4000/favicon.png"> |
| 50 | +<!-- 57x57 (precomposed) for iPhone 3GS, pre-2011 iPod Touch and older Android devices --> |
| 51 | +<link rel="apple-touch-icon-precomposed" href="http://localhost:4000/images/apple-touch-icon-precomposed.png"> |
| 52 | +<!-- 72x72 (precomposed) for 1st generation iPad, iPad 2 and iPad mini --> |
| 53 | +<link rel="apple-touch-icon-precomposed" sizes="72x72" href="http://localhost:4000/images/apple-touch-icon-72x72-precomposed.png"> |
| 54 | +<!-- 114x114 (precomposed) for iPhone 4, 4S, 5 and post-2011 iPod Touch --> |
| 55 | +<link rel="apple-touch-icon-precomposed" sizes="114x114" href="http://localhost:4000/images/apple-touch-icon-114x114-precomposed.png"> |
| 56 | +<!-- 144x144 (precomposed) for iPad 3rd and 4th generation --> |
| 57 | +<link rel="apple-touch-icon-precomposed" sizes="144x144" href="http://localhost:4000/images/apple-touch-icon-144x144-precomposed.png"> |
| 58 | +<script> |
| 59 | +var _hmt = _hmt || []; |
| 60 | +(function() { |
| 61 | + var hm = document.createElement("script"); |
| 62 | + hm.src = "https://hm.baidu.com/hm.js?f3fbccf0789476b80fab0c58359399a5"; |
| 63 | + var s = document.getElementsByTagName("script")[0]; |
| 64 | + s.parentNode.insertBefore(hm, s); |
| 65 | +})(); |
| 66 | +</script> |
| 67 | + |
| 68 | +</head> |
| 69 | + |
| 70 | +<body id="post"> |
| 71 | + |
| 72 | +<div class="navigation-wrapper"> |
| 73 | + <nav role="navigation" id="site-nav" class="animated drop"> |
| 74 | + <ul> |
| 75 | + |
| 76 | + <li> |
| 77 | + |
| 78 | + <a href="/articles">Posts</a> |
| 79 | + |
| 80 | + </li> |
| 81 | + |
| 82 | + <li> |
| 83 | + |
| 84 | + <a href="/links">Links</a> |
| 85 | + |
| 86 | + </li> |
| 87 | + |
| 88 | + <li> |
| 89 | + |
| 90 | + <a href="http://localhost:4000/about">About</a> |
| 91 | + |
| 92 | + </li> |
| 93 | + |
| 94 | + <li><a href="http://localhost:4000/feed.xml" title="Atom/RSS feed"><i class="icon-rss"></i> Feed</a></li> |
| 95 | + <li class="dosearch"><i class="icon-search"></i> Search</li> |
| 96 | + </ul> |
| 97 | + </nav> |
| 98 | +</div><!-- /.navigation-wrapper --> |
| 99 | + |
| 100 | +<!--[if lt IE 9]><div class="upgrade"><strong><a href="http://whatbrowser.org/">Your browser is quite old!</strong> Why not upgrade to a different browser to better enjoy this site?</a></div><![endif]--> |
| 101 | + |
| 102 | +<div class="search-wrapper"> |
| 103 | + <div class="search-form"> |
| 104 | + <input type="text" class="search-field" placeholder="Search..."> |
| 105 | + <i class="icon-remove-sign icon-2x"></i> |
| 106 | + <ul class="search-results post-list"></ul><!-- /.search-results --> |
| 107 | + </div><!-- /.search-form --> |
| 108 | +</div><!-- ./search-wrapper --> |
| 109 | + |
| 110 | +<header class="masthead"> |
| 111 | + <div class="wrap"> |
| 112 | + |
| 113 | + <h1 class="site-title animated fadeIn"><a href="/">叶子</a></h1> |
| 114 | + <h2 class="site-description animated fadeIn" itemprop="description">import this</h2> |
| 115 | + </div> |
| 116 | +</header><!-- /.masthead --> |
| 117 | + |
| 118 | + |
| 119 | +<div id="main" role="main"> |
| 120 | + <article class="hentry"> |
| 121 | + |
| 122 | + <div class="entry-wrapper"> |
| 123 | + <header class="entry-header"> |
| 124 | + <span class="entry-tags">Tags:<a href="http://localhost:4000/tags/#结巴分词" title="Pages tagged 结巴分词">结巴分词</a></span> |
| 125 | + |
| 126 | + <h1 class="entry-title">如何修改结巴分词中jieba_cache文件的路径</h1> |
| 127 | + |
| 128 | + </header> |
| 129 | + <div class="entry-content"> |
| 130 | + <p>通过查看结分词源码,发现源码在全局位置初始化了一个名为 dt 的 Tokenizer 对象,初始化代码为 <code class="highlighter-rouge">dt = Tokenizer()</code>, |
| 131 | +我们使用 Python 中常用的 Monkey Patch 方式,可以修改结巴分词的 jieba.cache 文件所在目录和名字,代码如下:</p> |
| 132 | + |
| 133 | +<div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>import jieba |
| 134 | + |
| 135 | +jieba.dt.tmp_dir = 'YOUR TMP DIRECTORY' |
| 136 | +jieba.dt.cache_file = 'YOUR CACHE FILE NAME' |
| 137 | +</code></pre></div></div> |
| 138 | + |
| 139 | + |
| 140 | + <div id="disqus_thread"></div><!-- /#disqus_thread --> |
| 141 | + <!-- / disqus 评论 --> |
| 142 | + <script type="text/javascript"> |
| 143 | + /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */ |
| 144 | + var disqus_shortname = 'leafcoder'; // required: replace example with your forum shortname |
| 145 | + |
| 146 | + /* * * DON'T EDIT BELOW THIS LINE * * */ |
| 147 | + (function() { |
| 148 | + var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; |
| 149 | + dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js'; |
| 150 | + (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); |
| 151 | + })(); |
| 152 | + </script> |
| 153 | + <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript> |
| 154 | + <a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a> |
| 155 | + <!-- disqus 评论 / --> |
| 156 | + |
| 157 | + </div><!-- /.entry-content --> |
| 158 | + </div><!-- /.entry-wrapper --> |
| 159 | + <nav class="pagination" role="navigation"> |
| 160 | + |
| 161 | + <a href="http://localhost:4000/articles/Git%E5%B8%B8%E7%94%A8%E5%91%BD%E4%BB%A4%E8%AE%B0%E5%BD%95/" class="btn" title="Git常用命令记录">Previous article</a> |
| 162 | + |
| 163 | + |
| 164 | + </nav><!-- /.pagination --> |
| 165 | + </article> |
| 166 | +</div><!-- /#main --> |
| 167 | + |
| 168 | + |
| 169 | +<div class="footer-wrapper"> |
| 170 | + <footer role="contentinfo"> |
| 171 | + <span>© 2018 Leo Zhang. Powered by <a href="http://jekyllrb.com">Jekyll</a> using the <a href="http://mademistakes.com/so-simple/">So Simple Theme</a>.</span> |
| 172 | +<div class="social-icons"> |
| 173 | + |
| 174 | + |
| 175 | + |
| 176 | + |
| 177 | + |
| 178 | + |
| 179 | + |
| 180 | + <a href="http://github.com/leafcoder" title="Leo Zhang on Github" target="_blank"><i class="icon-github icon-2x"></i></a> |
| 181 | + |
| 182 | +</div><!-- /.social-icons --> |
| 183 | +<!--Google Analytics--> |
| 184 | +<script> |
| 185 | + (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ |
| 186 | + (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), |
| 187 | + m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) |
| 188 | + })(window,document,'script','//static.qiniucdn.com/public/v28677/add-on/ga/analytics.js','ga'); |
| 189 | + |
| 190 | + ga('create', 'UA-54353519-1', 'auto'); |
| 191 | + ga('send', 'pageview'); |
| 192 | + |
| 193 | +</script> |
| 194 | +<script type="text/javascript">var cnzz_protocol = (("https:" == document.location.protocol) ? " https://" : " http://");document.write(unescape("%3Cspan id='cnzz_stat_icon_1271707807'%3E%3C/span%3E%3Cscript src='" + cnzz_protocol + "s13.cnzz.com/z_stat.php%3Fid%3D1271707807%26online%3D1%26show%3Dline' type='text/javascript'%3E%3C/script%3E"));</script> |
| 195 | + </footer> |
| 196 | +</div><!-- /.footer-wrapper --> |
| 197 | + |
| 198 | +<script src="//cdn.bootcss.com/jquery/1.9.1/jquery.min.js"></script> |
| 199 | +<script>window.jQuery || document.write('<script src="http://localhost:4000/assets/js/vendor/jquery-1.9.1.min.js"><\/script>')</script> |
| 200 | +<script src="http://localhost:4000/assets/js/scripts.min.js"></script> |
| 201 | + |
| 202 | +<!-- Jekyll Simple Search option --> |
| 203 | +<script> |
| 204 | + $(document).ready(function() { |
| 205 | + $('.search-field').simpleJekyllSearch({ |
| 206 | + jsonFile : 'http://localhost:4000/search.json', |
| 207 | + searchResults : '.search-results', |
| 208 | + template : '<li><article><a href="{url}">{title} <span class="entry-date"><time datetime="{date}">{shortdate}</time></span></a></article></li>', |
| 209 | + noResults: '<p>Nothing found.</p>' |
| 210 | + }); |
| 211 | + }); |
| 212 | + |
| 213 | + (function( $, window, undefined ) { |
| 214 | + |
| 215 | + var bs = { |
| 216 | + close: $(".icon-remove-sign"), |
| 217 | + searchform: $(".search-form"), |
| 218 | + canvas: $("body"), |
| 219 | + dothis: $('.dosearch') |
| 220 | + }; |
| 221 | + |
| 222 | + bs.dothis.on('click', function() { |
| 223 | + $('.search-wrapper').css({ display: "block" }); |
| 224 | + bs.searchform.toggleClass('active'); |
| 225 | + bs.searchform.find('input').focus(); |
| 226 | + bs.canvas.toggleClass('search-overlay'); |
| 227 | + }); |
| 228 | + |
| 229 | + bs.close.on('click', function() { |
| 230 | + $('.search-wrapper').removeAttr( 'style' ); |
| 231 | + bs.searchform.toggleClass('active'); |
| 232 | + bs.canvas.removeClass('search-overlay'); |
| 233 | + }); |
| 234 | + })( jQuery, window ); |
| 235 | +</script> |
| 236 | + |
| 237 | + |
| 238 | +<script type="text/javascript"> |
| 239 | + /* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */ |
| 240 | + var disqus_shortname = '评论'; // required: replace example with your forum shortname |
| 241 | + |
| 242 | + /* * * DON'T EDIT BELOW THIS LINE * * */ |
| 243 | + (function() { |
| 244 | + var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true; |
| 245 | + dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js'; |
| 246 | + (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq); |
| 247 | + })(); |
| 248 | + |
| 249 | + /* * * DON'T EDIT BELOW THIS LINE * * */ |
| 250 | + (function () { |
| 251 | + var s = document.createElement('script'); s.async = true; |
| 252 | + s.type = 'text/javascript'; |
| 253 | + s.src = '//' + disqus_shortname + '.disqus.com/count.js'; |
| 254 | + (document.getElementsByTagName('HEAD')[0] || document.getElementsByTagName('BODY')[0]).appendChild(s); |
| 255 | + }()); |
| 256 | +</script> |
| 257 | +<noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript> |
| 258 | +<a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a> |
| 259 | + |
| 260 | + |
| 261 | +<script type="text/javascript"> |
| 262 | +/* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */ |
| 263 | +var disqus_shortname = 'leafcoder'; // required: replace example with your forum shortname |
| 264 | + |
| 265 | +/* * * DON'T EDIT BELOW THIS LINE * * */ |
| 266 | +(function () { |
| 267 | + var s = document.createElement('script'); s.async = true; |
| 268 | + s.type = 'text/javascript'; |
| 269 | + s.src = '//' + disqus_shortname + '.disqus.com/count.js'; |
| 270 | + (document.getElementsByTagName('HEAD')[0] || document.getElementsByTagName('BODY')[0]).appendChild(s); |
| 271 | +}()); |
| 272 | +</script> |
| 273 | +</body> |
| 274 | +</html> |
0 commit comments