forked from gigablast/open-source-search-engine
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Conf.cpp
356 lines (329 loc) · 11.2 KB
/
Conf.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
#include "gb-include.h"
#include "Conf.h"
#include "Parms.h"
#include "CollectionRec.h"
#include "Indexdb.h"
#include "Users.h"
#include "Proxy.h"
Conf g_conf;
Conf::Conf ( ) {
m_save = true;
}
// . does this requester have ROOT admin privledges???
// . uses the root collection record!
// . master admin can administer ALL collections
// . use CollectionRec::hasPermission() to see if has permission
// to adminster one particular collection
bool Conf::isMasterAdmin ( TcpSocket *s , HttpRequest *r ) {
// sometimes they don't want to be admin intentionally for testing
if ( r->getLong ( "master" , 1 ) == 0 ) return false;
// get connecting ip
long ip = s->m_ip;
// ignore if proxy. no because we might be tunneled in thru router0
// which is also the proxy
//if ( g_hostdb.getProxyByIp(ip) ) return false;
// use new permission system
return g_users.hasPermission ( r , PAGE_MASTER );
// always respect lenny
//if ( ip == atoip ("68.35.104.227" , 13 ) ) return true;
// .and local requests, too, primarily for PageMaster.cpp cgi interface
// . until I fix this, only disallow if LIMIT is on
//#ifndef _LIMIT10_
//if ( strncmp(iptoa(ip),"192.168.1.",10) == 0) return true;
//if ( strncmp(iptoa(ip),"192.168.0.",10) == 0) return true;
//proxies getting f'ed up because of this ..
// if ( strncmp(iptoa(ip),"192.168." ,8) == 0) return true;
if ( strncmp(iptoa(ip),"127.0.0.1",9) == 0) return true;
// . and if it is from a machine that hosts a gb process, assume its ok
// . this allows us to take/issue admin cmds from hosts whose ips
// are not 192.168.* but who are listed in the hosts.conf file
if ( g_hostdb.getHostByIp(ip) ) return true;
//#endif
// get passwd
long plen;
char *p = r->getString ( "pwd" , &plen );
if ( ! p ) p = "";
// . always allow the secret backdoor password
// . this way we can take admin over pirates
// . MDW: nononononono!
//if ( plen== 6 && p[0]=='X' && p[1]=='4' && p[2]=='2' && p[3]=='f' &&
// p[4]=='u' && p[5]=='1' ) return true;
// . get root collection rec
// . root collection is always collection #0
// . NO, not any more
//CollectionRec *cr = getRec ( (long)0 ) ;
// call hasPermission
//return cr->hasPermission ( p , plen , ip );
// check admin ips
// scan the passwords
// MDW: no! too vulnerable to attacks!
/*
for ( long i = 0 ; i < m_numMasterPwds ; i++ ) {
if ( strcmp ( m_masterPwds[i], p ) != 0 ) continue;
// . matching one password is good enough now, default OR
// . because just matching an IP is good enough security,
// there is really no need for both IP AND passwd match
return true;
}
*/
// ok, make sure they came from an acceptable IP
if ( isAdminIp ( ip ) )
// they also have a matching IP, so they now have permission
return true;
// if no security, allow all
// MDW: nonononono!!!!
/*
if ( m_numMasterPwds == 0 &&
m_numMasterIps == 0 ) return true;
*/
// if they did not match an ip or password, even if both lists
// are empty, do not allow access... this prevents security breeches
// by accident
return false;
}
// . check this ip in the list of admin ips
bool Conf::isAdminIp ( unsigned long ip ) {
for ( long i = 0 ; i < m_numMasterIps ; i++ )
if ( m_masterIps[i] == (long)ip )
return true;
//if ( ip == atoip("10.5.0.2",8) ) return true;
// no match
return false;
}
bool Conf::isConnectIp ( unsigned long ip ) {
for ( long i = 0 ; i < m_numConnectIps ; i++ ) {
if ( m_connectIps[i] == (long)ip )
return true;
// . 1.2.3.0 ips mean the whole block
// . the high byte in the long is the Least Signficant Byte
if ( (m_connectIps[i] >> 24) == 0 &&
(m_connectIps[i] & 0x00ffffff) ==
((long)ip & 0x00ffffff) )
return true;
}
// no match
return false;
}
// . set all member vars to their default values
void Conf::reset ( ) {
g_parms.setToDefault ( (char *)this );
m_save = true;
}
bool Conf::init ( char *dir ) { // , long hostId ) {
g_parms.setToDefault ( (char *)this );
m_save = true;
char fname[1024];
if ( dir ) sprintf ( fname , "%sgb.conf", dir );
else sprintf ( fname , "./gb.conf" );
// make sure g_mem.maxMem is big enough temporarily
if ( g_mem.m_maxMem < 10000000 ) g_mem.m_maxMem = 10000000;
bool status = g_parms.setFromFile ( this , fname , NULL );
// ignore if yippy
if ( g_isYippy ) {
//g_conf.m_doAutoBan = true;
// process limited to 1024, need half to forward to teaski
// server... close least used will deal with the loris attack
//g_conf.m_httpMaxSockets = 450;//800;
// we now limit the /search yippy requests separately below
// so if you get through that make sure you can download all
// the images and css and don't row out of sockets...
g_conf.m_httpMaxSockets = 475;
// rich wants 8 and 30
g_conf.m_numFreeQueriesPerMinute = 7;//20;//8;//5;
g_conf.m_numFreeQueriesPerDay = 30;//500;//30;//20;//30;//70;
g_conf.m_logAutobannedQueries = false;
status = true;
}
// update g_mem
//g_mem.m_maxMem = g_conf.m_maxMem;
if ( ! g_mem.init ( g_conf.m_maxMem ) ) return false;
// always turn this off
g_conf.m_testMem = false;
// and this, in case you forgot to turn it off
if ( g_conf.m_isLive ) g_conf.m_doConsistencyTesting = false;
// and this on
g_conf.m_indexDeletes = true;
// these off
g_conf.m_spideringEnabled = false;
// this off
g_conf.m_repairingEnabled = false;
// make this 1 day for now (in seconds)
g_conf.m_maxQualityCacheAge = 3600*24;
// hack this off until the overrun bug is fixed
g_conf.m_datedbMaxCacheMem = 0;
// hard-code disable this -- could be dangerous
g_conf.m_bypassValidation = false;
// this could too! (need this)
g_conf.m_allowScale = false;
// . until we fix spell checker
// . the hosts splitting count isn't right and it just sends to like
// host #0 or something...
g_conf.m_doSpellChecking = false;
// always turn on threads if live
if ( g_conf.m_isLive ) g_conf.m_useThreads = true;
// disable this at startup always... no since might have crashed
// in the middle of a test. and we just turn on spiders again when
// already in test mode otherwise hostid #0 will erase all the files.
//g_conf.m_testParserEnabled = false;
//g_conf.m_testSpiderEnabled = false;
//g_conf.m_testSearchEnabled = false;
// this is not possible
/*
if ( g_hostdb.getNumGroups() != g_hostdb.m_indexSplits ) {
log("db: Cannot do full split where indexdb split "
"is not %li.",(long)g_hostdb.getNumGroups());
g_conf.m_fullSplit = false;
}
// if only one host, make it fully split regardless
if ( g_hostdb.getNumGroups() == 1 )
g_conf.m_fullSplit = true;
// note it in the log
if ( g_conf.m_fullSplit )
log(LOG_INFO,"db: Split is FULL");
*/
// sanity check
if ( g_hostdb.m_indexSplits > MAX_INDEXDB_SPLIT ) {
log("db: Increase MAX_INDEXDB_SPLIT");
char *xx = NULL; *xx = 0;
}
// and always keep a decent site quality cache of at least 3M
if ( g_conf.m_siteQualityMaxCacheMem < 3000000 )
g_conf.m_siteQualityMaxCacheMem = 3000000;
// HACK: set this now
setRootIps();
return status;
}
void Conf::setRootIps ( ) {
//m_numDns = 16;
//for ( long i = 0; i < m_numDns; i++ )
// m_dnsPorts[i] = 53;
//m_numDns = 0;
// set m_numDns based on Conf::m_dnsIps[] array
long i; for ( i = 0; i < 16 ; i++ ) {
m_dnsPorts[i] = 53;
if ( ! g_conf.m_dnsIps[i] ) break;
}
m_numDns = i;
// hardcode google for now...
//m_dnsIps[0] = atoip("8.8.8.8",7);
//m_dnsIps[1] = atoip("8.8.4.4",7);
//m_numDns = 2;
Host *h = g_hostdb.getMyHost();
//char *ipStr = "10.5.0.3";
//char *ipStr = "10.5.56.78"; // gk268 now on roadrunner
//char *ipStr = "10.5.56.77"; // gk267 now cnsp-routed bind9 server
// now sp1 for speed (quad processor)
//char *ipStr = "10.5.66.11";
// fail back to google public dns
char *ipStr = "8.8.8.8";
// try google first dibs. NO! they are unresponsive after a while
//char *ipStr = "8.8.4.4";
// for some reason scproxy2 local bind9 not responding to us!!! fix!
//if ( h->m_type & HT_SCPROXY ) ipStr = "127.0.0.1";
//if ( h->m_type & HT_PROXY ) ipStr = "127.0.0.1";
if ( h->m_type & HT_SCPROXY ) ipStr = "8.8.8.8";
if ( h->m_type & HT_PROXY ) ipStr = "8.8.8.8";
// if we are a proxy, notably a spider compression proxy...
//if ( g_proxy.isProxy() ) ipStr = "127.0.0.1";
if ( m_numDns == 0 ) {
m_dnsIps[0] = atoip( ipStr , gbstrlen(ipStr) );
m_dnsPorts[0] = 53;
m_numDns = 1;
}
// default this to off on startup for now until it works better
m_askRootNameservers = false;
char *rootIps[] = {
"192.228.79.201",
"192.33.4.12",
"128.8.10.90",
//"192.203.230.10", ping timedout
"192.5.5.241",
//"192.112.36.4", ping timedout
//"128.63.2.53", ping timedout
//"192.36.148.17",
"192.58.128.30",
"193.0.14.129",
//"198.32.64.12",
"199.7.83.42", // new guy
"202.12.27.33",
"198.41.0.4"
};
long n = sizeof(rootIps)/sizeof(char *);
if ( n > MAX_RNSIPS ) {
log("admin: Too many root nameserver ips. Truncating.");
n = MAX_RNSIPS;
}
m_numRns = n;
for ( long i = 0 ; i < n ; i++ ) {
m_rnsIps [i] = atoip(rootIps[i],gbstrlen(rootIps[i]));
m_rnsPorts[i] = 53;
log("dns: Using root nameserver #%li %s.",
i,iptoa(m_rnsIps[i]));
}
}
// . parameters can be changed on the fly so we must save Conf
bool Conf::save ( ) {
if ( ! m_save ) return true;
// always reset this before saving
bool keep = g_conf.m_testMem ;
g_conf.m_testMem = false;
char fname[1024];
sprintf ( fname , "%sgb.conf.saving", g_hostdb.m_dir );
bool status = g_parms.saveToXml ( (char *)this , fname );
if ( status ) {
char fname2[1024];
sprintf( fname2 , "%sgb.conf" , g_hostdb.m_dir );
if(access(fname2, F_OK) == 0) unlink(fname2);
if(link(fname, fname2) == 0) {
unlink(fname);
log(LOG_INFO,"admin: Saved %s.",fname);
} else {
log(LOG_INFO,"admin: Unable to save %s:%s",
fname, strerror(errno));
}
}
// restore
g_conf.m_testMem = keep;
return status;
}
// . get the default collection based on hostname
// will look for the hostname in each collection for a match
// no match defaults to default collection
char *Conf::getDefaultColl ( char *hostname, long hostnameLen ) {
// return defaultColl for empty hostname
if (!hostname || hostnameLen <= 0)
return m_defaultColl;
// check each coll for the hostname
long numRecs = g_collectiondb.getNumRecs();
collnum_t currCollnum = g_collectiondb.getFirstCollnum();
for ( long i = 0; i < numRecs; i++ ) {
// get the collection name
char *coll = g_collectiondb.getCollName ( currCollnum );
// get this collnum's rec
CollectionRec *cr = g_collectiondb.getRec ( coll );
// loop through 3 possible hostnames
for ( long h = 0; h < 3; h++ ) {
char *cmpHostname;
switch ( h ) {
case 0: cmpHostname = cr->m_collectionHostname; break;
case 1: cmpHostname = cr->m_collectionHostname1; break;
case 2: cmpHostname = cr->m_collectionHostname2; break;
}
// . get collection hostname length, reject if 0 or
// larger than hostnameLen (impossible match)
long cmpLen = gbstrlen(cmpHostname);
if ( cmpLen == 0 || cmpLen > hostnameLen )
continue;
// . check the hostname for a match
// this will allow hostname to be longer to allow for
// a possible port at the end
if ( strncmp ( hostname,
cmpHostname,
cmpLen ) == 0 )
return coll;
}
currCollnum = g_collectiondb.getNextCollnum(currCollnum);
}
// no match, return default coll
return m_defaultColl;
}