Skip to content

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also compare across forks.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also compare across forks.
...
  • 2 commits
  • 2 files changed
  • 0 commit comments
  • 1 contributor
Commits on Mar 17, 2012
@onlyu llvm 3f2b8f2
@onlyu delete files 69eaa5a
Showing with 83 additions and 3 deletions.
  1. +52 −0 learn/llvm/lex.cc
  2. +31 −3 learn/npl/pa1-spamlord-v2/python/SpamLord.py
View
52 learn/llvm/lex.cc
@@ -0,0 +1,52 @@
+enum Token {
+ tok_eof = -1,
+ tok_def = -2,
+ tok_extern = -3,
+ tok_identifier = -4,
+ tok_number = -5,
+};
+
+static std::string IdentifierStr;
+static double NumVal;
+
+static gettok()
+{
+ static int LastChar = ' ';
+
+ while (isspace(LastChar)) LastChar = getchar();
+
+ if (isalpha(LastChar)) {
+ IdentifierStr = LastChar;
+ while (isalnum((LastChar = getchar())))
+ IdentifierStr += LastChar;
+
+ if (IdentifierStr == "def") return tok_def;
+ if (IdentifierStr == "extern") return tok_extern;
+ return tok_identifier;
+ }
+
+ if (isdigit(LastChar) || LastChar == '.') {
+ std::string NumStr;
+
+ do {
+ NumStr += LastChar;
+ LastChar = getchar();
+ } while (isdigit(LastChar) || LastChar == '.');
+
+ NumVal = strtod(NumStr.c_str(), 0);
+ return tok_number;
+ }
+
+ if (LastChar == '#') {
+ do LastChar = getchar();
+ while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+
+ if (LastChar != EOF) return gettok();
+ }
+
+ if (LastChar == EOF) return tok_eof;
+
+ int ThisChar = LastChar;
+ LastChar = getchar();
+ return ThisChar;
+}
View
34 learn/npl/pa1-spamlord-v2/python/SpamLord.py
@@ -3,7 +3,7 @@
import re
import pprint
-my_first_pat = '(\w+)@(\w+).edu'
+
"""
TODO
@@ -20,15 +20,43 @@
NOTE: ***don't change this interface***, as it will be called directly by
the submit script
"""
+my_first_pat = '([a-z0-9.]+) *(@|\(at\)|@) *((\w+)\.)*(\w+)\.(edu|com|cn|org|net|jp)'
+my_first_pat1 = '([a-z0-9.]+)( at | where )((\w+) *(dot|dt|dom|;) *)*(\w+) *(dot|dt|dom|;) *(edu|com|cn|org|net|jp)'
+my_first_pat2 = 'obfuscate\([\'"]([^\']+)[\'"],[\'"]([a-z0-9.]+)[\'"]\)'
+phone_pats = [
+ '[(](\d{3})[)] *(\d{3})[- ]?(\d{4})',
+ '(\d{3})[- ](\d{3})[- ](\d{4})',
+ ]
def process_file(name, f):
# note that debug info should be printed to stderr
# sys.stderr.write('[process_file]\tprocessing file: %s\n' % (path))
- res = []
+ res = []
for line in f:
+ line = line.lower()
+ for pat in phone_pats:
+ matches = re.findall(pat, line)
+ for m in matches:
+ print "phone", m
+ res.append((name, 'p', "%s-%s-%s"%(m[0], m[1], m[2])))
+
+ line = line.replace('-', '')
matches = re.findall(my_first_pat,line)
for m in matches:
- email = '%s@%s.edu' % m
+ if not m[2] == '':
+ email = '%s@%s.%s.%s' % (m[0], m[3], m[4], m[5])
+ else:
+ email = '%s@%s.%s' % (m[0], m[4], m[5])
res.append((name,'e',email))
+ matches = re.findall(my_first_pat1, line)
+ for m in matches:
+ if not m[2] == '':
+ email = '%s@%s.%s.%s' % (m[0], m[3], m[5], m[7])
+ else:
+ email = '%s@%s.%s' % (m[0], m[5], m[7])
+ res.append((name,'e',email))
+ matches = re.findall(my_first_pat2, line)
+ for m in matches:
+ res.append((name, 'e', "%s@%s"%(m[1],m[0])))
return res
"""

No commit comments for this range

Something went wrong with that request. Please try again.