Skip to content

Commit 2c04c61

Browse files
committed
提交新文章
1 parent 957ad18 commit 2c04c61

File tree

9 files changed

+356
-39
lines changed

9 files changed

+356
-39
lines changed
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
---
2+
layout: post
3+
title: "如何修改结巴分词中jieba_cache文件的路径"
4+
description: ""
5+
category: articles
6+
tags: [结巴分词]
7+
image:
8+
feature:
9+
credit: Michael Rose
10+
creditlink: http://mademistakes.com
11+
comments: true
12+
share: true
13+
---
14+
15+
通过查看结分词源码,发现源码在全局位置初始化了一个名为 dt 的 Tokenizer 对象,初始化代码为 `dt = Tokenizer()`
16+
我们使用 Python 中常用的 Monkey Patch 方式,可以修改结巴分词的 jieba.cache 文件所在目录和名字,代码如下:
17+
18+
```
19+
import jieba
20+
21+
jieba.dt.tmp_dir = 'YOUR TMP DIRECTORY'
22+
jieba.dt.cache_file = 'YOUR CACHE FILE NAME'
23+
```

_site/articles/Git常用命令记录/index.html

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,8 @@ <h2 id="已推送">已推送</h2>
265265
<a href="http://localhost:4000/articles/%E7%BC%96%E8%AF%91%E9%94%99%E8%AF%AF_possibly_undefined_macro_AC_PROG_LIBTOOL_%E7%9A%84%E8%A7%A3%E5%86%B3%E6%96%B9%E6%B3%95/" class="btn" title="编译错误 possibly undefined macro: AC_PROG_LIBTOOL 的解决方法">Previous article</a>
266266

267267

268+
<a href="http://localhost:4000/articles/%E5%A6%82%E4%BD%95%E4%BF%AE%E6%94%B9%E7%BB%93%E5%B7%B4%E5%88%86%E8%AF%8D%E4%B8%ADjieba_cache%E6%96%87%E4%BB%B6%E7%9A%84%E8%B7%AF%E5%BE%84/" class="btn" title="如何修改结巴分词中jieba_cache文件的路径">Next article</a>
269+
268270
</nav><!-- /.pagination -->
269271
</article>
270272
</div><!-- /#main -->

_site/articles/index.html

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@ <h1 class="entry-title">Articles</h1>
123123
<div class="entry-content">
124124
<ul class="post-list">
125125

126+
<li><article><a href="http://localhost:4000/articles/%E5%A6%82%E4%BD%95%E4%BF%AE%E6%94%B9%E7%BB%93%E5%B7%B4%E5%88%86%E8%AF%8D%E4%B8%ADjieba_cache%E6%96%87%E4%BB%B6%E7%9A%84%E8%B7%AF%E5%BE%84/">如何修改结巴分词中jieba_cache文件的路径 <span class="entry-date"><time datetime="2018-01-24T00:00:00+00:00">January 24, 2018</time></span></a></article></li>
127+
126128
<li><article><a href="http://localhost:4000/articles/Git%E5%B8%B8%E7%94%A8%E5%91%BD%E4%BB%A4%E8%AE%B0%E5%BD%95/">Git常用命令记录 <span class="entry-date"><time datetime="2018-01-16T00:00:00+00:00">January 16, 2018</time></span></a></article></li>
127129

128130
<li><article><a href="http://localhost:4000/articles/%E7%BC%96%E8%AF%91%E9%94%99%E8%AF%AF_possibly_undefined_macro_AC_PROG_LIBTOOL_%E7%9A%84%E8%A7%A3%E5%86%B3%E6%96%B9%E6%B3%95/">编译错误 possibly undefined macro: AC_PROG_LIBTOOL 的解决方法 <span class="entry-date"><time datetime="2017-12-01T00:00:00+00:00">December 01, 2017</time></span></a></article></li>
Lines changed: 274 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,274 @@
1+
<!doctype html>
2+
<!--[if lt IE 7]><html class="no-js lt-ie9 lt-ie8 lt-ie7" lang="en"> <![endif]-->
3+
<!--[if (IE 7)&!(IEMobile)]><html class="no-js lt-ie9 lt-ie8" lang="en"><![endif]-->
4+
<!--[if (IE 8)&!(IEMobile)]><html class="no-js lt-ie9" lang="en"><![endif]-->
5+
<!--[if gt IE 8]><!--> <html class="no-js" lang="en"><!--<![endif]-->
6+
<head>
7+
<meta charset="utf-8">
8+
<title>如何修改结巴分词中jieba_cache文件的路径 &#8211; 叶子</title>
9+
<meta name="description" content="">
10+
<meta name="keywords" content="结巴分词">
11+
12+
13+
14+
<!-- Open Graph -->
15+
<meta property="og:locale" content="en_US">
16+
<meta property="og:type" content="article">
17+
<meta property="og:title" content="如何修改结巴分词中jieba_cache文件的路径">
18+
<meta property="og:description" content="">
19+
<meta property="og:url" content="http://localhost:4000/articles/%E5%A6%82%E4%BD%95%E4%BF%AE%E6%94%B9%E7%BB%93%E5%B7%B4%E5%88%86%E8%AF%8D%E4%B8%ADjieba_cache%E6%96%87%E4%BB%B6%E7%9A%84%E8%B7%AF%E5%BE%84/">
20+
<meta property="og:site_name" content="叶子">
21+
22+
23+
24+
25+
26+
<link rel="canonical" href="http://localhost:4000/articles/%E5%A6%82%E4%BD%95%E4%BF%AE%E6%94%B9%E7%BB%93%E5%B7%B4%E5%88%86%E8%AF%8D%E4%B8%ADjieba_cache%E6%96%87%E4%BB%B6%E7%9A%84%E8%B7%AF%E5%BE%84/">
27+
<link href="http://localhost:4000/feed.xml" type="application/atom+xml" rel="alternate" title="叶子 Feed">
28+
29+
30+
<!-- http://t.co/dKP3o1e -->
31+
<meta name="HandheldFriendly" content="True">
32+
<meta name="MobileOptimized" content="320">
33+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
34+
35+
<!-- For all browsers -->
36+
<link rel="stylesheet" href="http://localhost:4000/assets/css/main.min.css">
37+
<!-- Webfonts -->
38+
<!--<script src="//use.edgefonts.net/source-sans-pro:n2,i2,n3,i3,n4,i4,n6,i6,n7,i7,n9,i9;source-code-pro:n4,n7;volkhov.js"></script>-->
39+
40+
<meta http-equiv="cleartype" content="on">
41+
42+
<!-- Load Modernizr -->
43+
<script src="http://localhost:4000/assets/js/vendor/modernizr-2.6.2.custom.min.js"></script>
44+
45+
<!-- Icons -->
46+
<!-- 16x16 -->
47+
<link rel="shortcut icon" href="http://localhost:4000/favicon.ico">
48+
<!-- 32x32 -->
49+
<link rel="shortcut icon" href="http://localhost:4000/favicon.png">
50+
<!-- 57x57 (precomposed) for iPhone 3GS, pre-2011 iPod Touch and older Android devices -->
51+
<link rel="apple-touch-icon-precomposed" href="http://localhost:4000/images/apple-touch-icon-precomposed.png">
52+
<!-- 72x72 (precomposed) for 1st generation iPad, iPad 2 and iPad mini -->
53+
<link rel="apple-touch-icon-precomposed" sizes="72x72" href="http://localhost:4000/images/apple-touch-icon-72x72-precomposed.png">
54+
<!-- 114x114 (precomposed) for iPhone 4, 4S, 5 and post-2011 iPod Touch -->
55+
<link rel="apple-touch-icon-precomposed" sizes="114x114" href="http://localhost:4000/images/apple-touch-icon-114x114-precomposed.png">
56+
<!-- 144x144 (precomposed) for iPad 3rd and 4th generation -->
57+
<link rel="apple-touch-icon-precomposed" sizes="144x144" href="http://localhost:4000/images/apple-touch-icon-144x144-precomposed.png">
58+
<script>
59+
var _hmt = _hmt || [];
60+
(function() {
61+
var hm = document.createElement("script");
62+
hm.src = "https://hm.baidu.com/hm.js?f3fbccf0789476b80fab0c58359399a5";
63+
var s = document.getElementsByTagName("script")[0];
64+
s.parentNode.insertBefore(hm, s);
65+
})();
66+
</script>
67+
68+
</head>
69+
70+
<body id="post">
71+
72+
<div class="navigation-wrapper">
73+
<nav role="navigation" id="site-nav" class="animated drop">
74+
<ul>
75+
76+
<li>
77+
78+
<a href="/articles">Posts</a>
79+
80+
</li>
81+
82+
<li>
83+
84+
<a href="/links">Links</a>
85+
86+
</li>
87+
88+
<li>
89+
90+
<a href="http://localhost:4000/about">About</a>
91+
92+
</li>
93+
94+
<li><a href="http://localhost:4000/feed.xml" title="Atom/RSS feed"><i class="icon-rss"></i> Feed</a></li>
95+
<li class="dosearch"><i class="icon-search"></i> Search</li>
96+
</ul>
97+
</nav>
98+
</div><!-- /.navigation-wrapper -->
99+
100+
<!--[if lt IE 9]><div class="upgrade"><strong><a href="http://whatbrowser.org/">Your browser is quite old!</strong> Why not upgrade to a different browser to better enjoy this site?</a></div><![endif]-->
101+
102+
<div class="search-wrapper">
103+
<div class="search-form">
104+
<input type="text" class="search-field" placeholder="Search...">
105+
<i class="icon-remove-sign icon-2x"></i>
106+
<ul class="search-results post-list"></ul><!-- /.search-results -->
107+
</div><!-- /.search-form -->
108+
</div><!-- ./search-wrapper -->
109+
110+
<header class="masthead">
111+
<div class="wrap">
112+
113+
<h1 class="site-title animated fadeIn"><a href="/">叶子</a></h1>
114+
<h2 class="site-description animated fadeIn" itemprop="description">import this</h2>
115+
</div>
116+
</header><!-- /.masthead -->
117+
118+
119+
<div id="main" role="main">
120+
<article class="hentry">
121+
122+
<div class="entry-wrapper">
123+
<header class="entry-header">
124+
<span class="entry-tags">Tags:<a href="http://localhost:4000/tags/#结巴分词" title="Pages tagged 结巴分词">结巴分词</a></span>
125+
126+
<h1 class="entry-title">如何修改结巴分词中jieba_cache文件的路径</h1>
127+
128+
</header>
129+
<div class="entry-content">
130+
<p>通过查看结分词源码,发现源码在全局位置初始化了一个名为 dt 的 Tokenizer 对象,初始化代码为 <code class="highlighter-rouge">dt = Tokenizer()</code>
131+
我们使用 Python 中常用的 Monkey Patch 方式,可以修改结巴分词的 jieba.cache 文件所在目录和名字,代码如下:</p>
132+
133+
<div class="highlighter-rouge"><div class="highlight"><pre class="highlight"><code>import jieba
134+
135+
jieba.dt.tmp_dir = 'YOUR TMP DIRECTORY'
136+
jieba.dt.cache_file = 'YOUR CACHE FILE NAME'
137+
</code></pre></div></div>
138+
139+
140+
<div id="disqus_thread"></div><!-- /#disqus_thread -->
141+
<!-- / disqus 评论 -->
142+
<script type="text/javascript">
143+
/* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
144+
var disqus_shortname = 'leafcoder'; // required: replace example with your forum shortname
145+
146+
/* * * DON'T EDIT BELOW THIS LINE * * */
147+
(function() {
148+
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
149+
dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
150+
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
151+
})();
152+
</script>
153+
<noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript>
154+
<a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a>
155+
<!-- disqus 评论 / -->
156+
157+
</div><!-- /.entry-content -->
158+
</div><!-- /.entry-wrapper -->
159+
<nav class="pagination" role="navigation">
160+
161+
<a href="http://localhost:4000/articles/Git%E5%B8%B8%E7%94%A8%E5%91%BD%E4%BB%A4%E8%AE%B0%E5%BD%95/" class="btn" title="Git常用命令记录">Previous article</a>
162+
163+
164+
</nav><!-- /.pagination -->
165+
</article>
166+
</div><!-- /#main -->
167+
168+
169+
<div class="footer-wrapper">
170+
<footer role="contentinfo">
171+
<span>&copy; 2018 Leo Zhang. Powered by <a href="http://jekyllrb.com">Jekyll</a> using the <a href="http://mademistakes.com/so-simple/">So Simple Theme</a>.</span>
172+
<div class="social-icons">
173+
174+
175+
176+
177+
178+
179+
180+
<a href="http://github.com/leafcoder" title="Leo Zhang on Github" target="_blank"><i class="icon-github icon-2x"></i></a>
181+
182+
</div><!-- /.social-icons -->
183+
<!--Google Analytics-->
184+
<script>
185+
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
186+
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
187+
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
188+
})(window,document,'script','//static.qiniucdn.com/public/v28677/add-on/ga/analytics.js','ga');
189+
190+
ga('create', 'UA-54353519-1', 'auto');
191+
ga('send', 'pageview');
192+
193+
</script>
194+
<script type="text/javascript">var cnzz_protocol = (("https:" == document.location.protocol) ? " https://" : " http://");document.write(unescape("%3Cspan id='cnzz_stat_icon_1271707807'%3E%3C/span%3E%3Cscript src='" + cnzz_protocol + "s13.cnzz.com/z_stat.php%3Fid%3D1271707807%26online%3D1%26show%3Dline' type='text/javascript'%3E%3C/script%3E"));</script>
195+
</footer>
196+
</div><!-- /.footer-wrapper -->
197+
198+
<script src="//cdn.bootcss.com/jquery/1.9.1/jquery.min.js"></script>
199+
<script>window.jQuery || document.write('<script src="http://localhost:4000/assets/js/vendor/jquery-1.9.1.min.js"><\/script>')</script>
200+
<script src="http://localhost:4000/assets/js/scripts.min.js"></script>
201+
202+
<!-- Jekyll Simple Search option -->
203+
<script>
204+
$(document).ready(function() {
205+
$('.search-field').simpleJekyllSearch({
206+
jsonFile : 'http://localhost:4000/search.json',
207+
searchResults : '.search-results',
208+
template : '<li><article><a href="{url}">{title} <span class="entry-date"><time datetime="{date}">{shortdate}</time></span></a></article></li>',
209+
noResults: '<p>Nothing found.</p>'
210+
});
211+
});
212+
213+
(function( $, window, undefined ) {
214+
215+
var bs = {
216+
close: $(".icon-remove-sign"),
217+
searchform: $(".search-form"),
218+
canvas: $("body"),
219+
dothis: $('.dosearch')
220+
};
221+
222+
bs.dothis.on('click', function() {
223+
$('.search-wrapper').css({ display: "block" });
224+
bs.searchform.toggleClass('active');
225+
bs.searchform.find('input').focus();
226+
bs.canvas.toggleClass('search-overlay');
227+
});
228+
229+
bs.close.on('click', function() {
230+
$('.search-wrapper').removeAttr( 'style' );
231+
bs.searchform.toggleClass('active');
232+
bs.canvas.removeClass('search-overlay');
233+
});
234+
})( jQuery, window );
235+
</script>
236+
237+
238+
<script type="text/javascript">
239+
/* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
240+
var disqus_shortname = '评论'; // required: replace example with your forum shortname
241+
242+
/* * * DON'T EDIT BELOW THIS LINE * * */
243+
(function() {
244+
var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
245+
dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
246+
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
247+
})();
248+
249+
/* * * DON'T EDIT BELOW THIS LINE * * */
250+
(function () {
251+
var s = document.createElement('script'); s.async = true;
252+
s.type = 'text/javascript';
253+
s.src = '//' + disqus_shortname + '.disqus.com/count.js';
254+
(document.getElementsByTagName('HEAD')[0] || document.getElementsByTagName('BODY')[0]).appendChild(s);
255+
}());
256+
</script>
257+
<noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by Disqus.</a></noscript>
258+
<a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a>
259+
260+
261+
<script type="text/javascript">
262+
/* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
263+
var disqus_shortname = 'leafcoder'; // required: replace example with your forum shortname
264+
265+
/* * * DON'T EDIT BELOW THIS LINE * * */
266+
(function () {
267+
var s = document.createElement('script'); s.async = true;
268+
s.type = 'text/javascript';
269+
s.src = '//' + disqus_shortname + '.disqus.com/count.js';
270+
(document.getElementsByTagName('HEAD')[0] || document.getElementsByTagName('BODY')[0]).appendChild(s);
271+
}());
272+
</script>
273+
</body>
274+
</html>

0 commit comments

Comments
 (0)