Skip to content

Commit

Permalink
verify googlebot
Browse files Browse the repository at this point in the history
  • Loading branch information
bchavet committed Jul 24, 2019
1 parent 4e9e9e8 commit 7f11d6e
Showing 1 changed file with 47 additions and 2 deletions.
49 changes: 47 additions & 2 deletions src/analysisd/compiled_rules/generic_samples.c
Expand Up @@ -11,6 +11,9 @@
#include "shared.h"
#include "config.h"

#include <stdio.h>
#include <netdb.h>
#include <string.h>

/* Note: If the rule fails to match it should return NULL.
* If you want processing to continue, return lf (the eventinfo structure).
Expand Down Expand Up @@ -127,8 +130,7 @@ void *is_simple_http_request(Eventinfo *lf)
/* Example 5: Checking if the source IP is from a valid bot */
void *is_valid_crawler(Eventinfo *lf)
{
if ((strncmp(lf->log, "66.249.", 7) == 0) || /* Google bot */
(strncmp(lf->log, "72.14.", 6) == 0) || /* Feedfetcher-Google */
if((strncmp(lf->log, "72.14.",6) == 0)|| /* Feedfetcher-Google */
(strncmp(lf->log, "209.85.", 7) == 0) || /* Feedfetcher-Google */
(strncmp(lf->log, "65.55.", 6) == 0) || /* MSN/Bing */
(strncmp(lf->log, "207.46.", 7) == 0) || /* MSN/Bing */
Expand All @@ -139,6 +141,49 @@ void *is_valid_crawler(Eventinfo *lf)
return (lf);
}

// In order to verify a googlebot crawler, Google recommends doing a
// reverse DNS lookup, and then a forward DNS lookup to verify that
//
// 1. The host is in the googlebot.com domain
// 2. The forward and revers lookups match.
//
// https://support.google.com/webmasters/answer/80553

struct sockaddr_in sa;
char hostname[NI_MAXHOST];

sa.sin_family = AF_INET;
inet_aton(lf->log, &sa.sin_addr);

// Do the reverse DNS lookup
if (getnameinfo((struct sockaddr*)&sa, sizeof sa, hostname, sizeof hostname, NULL, 0, 0) == 0) {

// verify that the host is in the googlebot.com domain.
char *start = strstr(hostname, "googlebot.com");
if (start != NULL) {
struct addrinfo hints, *res, *p;
char ipstr[INET6_ADDRSTRLEN];

memset(&hints, 0, sizeof hints);
hints.ai_family = AF_INET;
hints.ai_socktype = SOCK_STREAM;

// Do the forward DNS lookup
if (getaddrinfo(hostname, NULL, &hints, &res) == 0) {
for (p = res; p != NULL; p = p->ai_next) {
struct sockaddr_in *ipv4 = (struct sockaddr_in*)p->ai_addr;
void *addr = &(ipv4->sin_addr);
inet_ntop(p->ai_family, addr, ipstr, sizeof ipstr);
// Make sure the forward & reverse lookups match
if (strcmp(ipstr, lf->log) == 0) {
return(lf);
}
}
freeaddrinfo(res);
}
}
}

return (NULL);
}

0 comments on commit 7f11d6e

Please sign in to comment.