Permalink
Browse files

Disambiguate .t files instead of directly assuming Perl

This is the reason Mercurial is wrongly classified as "mostly Perl" on
Ohloh[1]. It uses ".t" files for test cases, which are wrapped shell
scripts and not Perl.

It would be even better if Ohcount would detect them as shell scripts,
but that seems hard given that the contents (see e.g. [2]) don't have
many defining characteristics.

[1] https://www.ohloh.net/p/mercurial/analyses/latest/languages_summary
[2] http://selenic.com/hg/file/ccd28eca37f6/tests/test-add.t
  • Loading branch information...
robinst committed Sep 22, 2012
1 parent 6cb04fe commit 6be21cf69f5f769f53dc0b1b91864f234c1b2ead
Showing with 19 additions and 1 deletion.
  1. +16 −0 src/detector.c
  2. +2 −0 src/hash/disambiguatefuncs.gperf
  3. +1 −1 src/hash/extensions.gperf
View
@@ -901,6 +901,22 @@ const char *disambiguate_st(SourceFile *sourcefile) {
return NULL;
}
+const char *disambiguate_t(SourceFile *sourcefile) {
+ char *contents = ohcount_sourcefile_get_contents(sourcefile);
+ if (!contents)
+ return NULL;
+
+ // Check for a perl shebang on first line of file
+ const char *error;
+ int erroffset;
+ pcre *re = pcre_compile("#![^\\n]*perl", PCRE_CASELESS, &error, &erroffset, NULL);
+ if (pcre_exec(re, NULL, contents, mystrnlen(contents, 100), 0, PCRE_ANCHORED, NULL, 0) > -1)
+ return LANG_PERL;
+
+ // May be something else, e.g. a test shell script
+ return NULL;
+}
+
int ohcount_is_binary_filename(const char *filename) {
char *p = (char *)filename + strlen(filename);
while (p > filename && *(p - 1) != '.') p--;
@@ -18,6 +18,7 @@ const char *disambiguate_pp(SourceFile *sourcefile);
const char *disambiguate_pro(SourceFile *sourcefile);
const char *disambiguate_r(SourceFile *sourcefile);
const char *disambiguate_st(SourceFile *sourcefile);
+const char *disambiguate_t(SourceFile *sourcefile);
%}
struct DisambiguateFuncsMap { const char *key; const char* (*value)(SourceFile*); };
%%
@@ -37,3 +38,4 @@ pp, disambiguate_pp
pro, disambiguate_pro
r, disambiguate_r
st, disambiguate_st
+t, disambiguate_t
@@ -194,7 +194,7 @@ svg, BINARY
svgz, BINARY
svn, BINARY
swf, BINARY
-t, LANG_PERL
+t, DISAMBIGUATE("t")
tar, BINARY
tcl, LANG_TCL
tex, LANG_TEX

0 comments on commit 6be21cf

Please sign in to comment.