diff --git a/quiz7/q3.py b/quiz7/q3.py index 12794f8..dfbfba4 100644 --- a/quiz7/q3.py +++ b/quiz7/q3.py @@ -1,7 +1,7 @@ from __future__ import division import math -def L(x): +def log10(x): return math.log(x, 10) def norm(lst): @@ -49,13 +49,13 @@ def norm(lst): # N = total number of docs N = len(docs) # Inverse doc frequency -idf = dict((term, L(N) - L(doc_counts[term])) for term in terms) +idf = dict((term, log10(N) - log10(doc_counts[term])) for term in terms) print 'N = %d' % N print 'doc_counts = %s' % doc_counts print 'idf = %s' % idf -def get_tf(count): return 1.0 + L(count) if count else 0.0 +def get_tf(count): return 1.0 + log10(count) if count else 0.0 # Compute tf-idf tfidf = dict( diff --git a/quiz7/q4.py b/quiz7/q4.py index b06059b..a456e2b 100644 --- a/quiz7/q4.py +++ b/quiz7/q4.py @@ -13,6 +13,6 @@ def get_precision(n): relevant_precisions = [precisions[i] for i in range(N) if results[i]] average_precision = sum(relevant_precisions)/len(relevant_precisions) -print 'Average precsion = %.3f' % average_precision +print 'Average precision = %.3f' % average_precision