Skip to content
Newer
Older
100644 202 lines (156 sloc) 6.36 KB
e2bfa2b @runpaint Initial import.
authored
1 <?xml version="1.0" encoding="UTF-8"?>
2 <!--
3 sitemap_gen.py example configuration script
4
5 This file specifies a set of sample input parameters for the
6 sitemap_gen.py client.
7
8 You should copy this file into "config.xml" and modify it for
9 your server.
10
11
12 ********************************************************* -->
13
14
15 <!-- ** MODIFY **
16 The "site" node describes your basic web site.
17
18 Required attributes:
19 base_url - the top-level URL of the site being mapped
20 store_into - the webserver path to the desired output file.
21 This should end in '.xml' or '.xml.gz'
22 (the script will create this file)
23
24 Optional attributes:
25 verbose - an integer from 0 (quiet) to 3 (noisy) for
26 how much diagnostic output the script gives
27 suppress_search_engine_notify="1"
28 - disables notifying search engines about the new map
29 (same as the "testing" command-line argument.)
30 default_encoding
31 - names a character encoding to use for URLs and
32 file paths. (Example: "UTF-8")
33 sitemap_type
34 - declares the Sitemap type, Common values are
35 web, mobile and news. 'web" Sitemap is default.
36 (Example: sitemap_type="news")
37 -->
38 <site
39 base_url="http://www.example.com/"
40 store_into="/var/www/docroot/sitemap.xml"
41 verbose="1"
42 sitemap_type="web"
43 >
44
45 <!--
46 <site
47 base_url="http://www.example.com/"
48 store_into="/var/www/docroot/sitemap.xml"
49 verbose="2"
50 sitemap_type="news"
51 >
52
53 -->
54
55
56 <!-- ********************************************************
57 INPUTS
58
59 All the various nodes in this section control where the script
60 looks to find URLs.
61
62 MODIFY or DELETE these entries as appropriate for your server.
63 ********************************************************* -->
64
65 <!-- ** MODIFY or DELETE **
66 "url" nodes specify individual URLs to include in the map.
67
68 Required attributes:
69 href - the URL
70
71 Optional attributes:
72 lastmod - timestamp of last modification (ISO8601 format)
73 changefreq - how often content at this URL is usually updated
74 priority - value 0.0 to 1.0 of relative importance in your site
75 -->
76
77 <!--
78 <url href="http://www.example.com/stats?q=name" />
79 <url
80 href="http://www.example.com/stats?q=age"
81 lastmod="2004-11-14T01:00:00-07:00"
82 changefreq="yearly"
83 priority="0.3"
84 />
85 -->
86
87
88 <!-- ** MODIFY or DELETE **
89 "urllist" nodes name text files with lists of URLs.
90 An example file "example_urllist.txt" is provided.
91
92 Required attribute for all Sitemap types:
93 path - path to the file
94
95 Required attribute for News Sitemaps
96 tag_order - News Sitemaps metatag order, comma-separated.
97 (Example: tag_order="loc, changefreq, lastmod,
98 publication_date, keywords")
99
100 Optional attributes:
101 encoding - encoding of the file if not US-ASCII
102
103 -->
104
105 <!--
106 <urllist
107 path="news_input.txt"
108 encoding="UTF-8"
109 tag_order="loc, changefreq, priority, lastmod, publication_date, \
110 keywords, stock_tickers"
111 />
112
113 <urllist path="web_urls.txt" encoding="UTF-8" />
114 -->
115
116 <!-- ** MODIFY or DELETE **
117 "directory" nodes tell the script to walk the file system
118 and include all files and directories in the Sitemap.
119
120 Required attributes:
121 path - path to begin walking from
122 url - URL equivalent of that path
123
124 Optional attributes:
125 default_file - name of the index or default file for directory URLs
126 remove_empty_directories - Values are true or false. Default is false.
127 true=remove empty directories
128 -->
129
130 <!--
131 <directory path="/var/www/icons" url="http://www.example.com/images/" />
132 <directory
133 path="/var/www/docroot"
134 url="http://www.example.com/"
135 default_file="index.html"
136 remove_empty_directories="true"
137 />
138 -->
139
140 <!--
141 "accesslog" nodes tell the script to scan webserver log files to
142 extract URLs on your site. Both Common Logfile Format (Apache's default
143 logfile) and Extended Logfile Format (IIS's default logfile) can be read.
144
145 Required attributes:
146 path - path to the file
147
148 Optional attributes:
149 encoding - encoding of the file if not US-ASCII
150 -->
151
152 <!--
153 <accesslog path="/etc/httpd/logs/access.log" encoding="UTF-8" />
154 <accesslog path="/etc/httpd/logs/access.log.0" encoding="UTF-8" />
155 <accesslog path="/etc/httpd/logs/access.log.1.gz" encoding="UTF-8" />
156 -->
157
158
159 <!-- ********************************************************
160 FILTERS
161
162 Filters specify wild-card patterns that the script compares
163 against all URLs it finds. Filters can be used to exclude
164 certain URLs from your Sitemap, for instance if you have
165 hidden content that you hope the search engines don't find.
166
167 Filters can be either type="wildcard", which means standard
168 path wildcards (* and ?) are used to compare against URLs,
169 or type="regexp", which means regular expressions are used
170 to compare.
171
172 Filters are applied in the order specified in this file.
173
174 An action="drop" filter causes exclusion of matching URLs.
175 An action="pass" filter causes inclusion of matching URLs,
176 shortcutting any other later filters that might also match.
177 If no filter at all matches a URL, the URL will be included.
178 Together you can build up fairly complex rules.
179
180 The default action is "drop".
181 The default type is "wildcard".
182
183 You can MODIFY or DELETE these entries as appropriate for
184 your site. However, unlike above, the example entries in
185 this section are not contrived and may be useful to you as
186 they are.
187 ********************************************************* -->
188
189 <!-- Exclude URLs that end with a '~' (IE: emacs backup files) -->
190 <filter action="drop" type="wildcard" pattern="*~" />
191
192 <!-- Exclude URLs within UNIX-style hidden files or directories -->
193 <filter action="drop" type="regexp" pattern="/\.[^/]*" />
194 <filter action="drop" type="wildcard" pattern="*icons*" />
195 <filter action="drop" type="wildcard" pattern="*logos*" />
196 <filter action="drop" type="wildcard" pattern="*todo*" />
197 <filter action="drop" type="wildcard" pattern="*Easter*" />
198 <filter action="drop" type="wildcard" pattern="*/help/help/*" />
199 <filter action="drop" type="wildcard" pattern="*/press/*.gif" />
200
201 </site>
Something went wrong with that request. Please try again.