-
Notifications
You must be signed in to change notification settings - Fork 1
/
score.py
38 lines (34 loc) · 1.03 KB
/
score.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import os
import math
def main():
base = "/home/varunwachaspati/Downloads/Information Retrieval /WhatsintheVector/"
os.chdir(base)
listdir = os.listdir(base + 'tokens/')
total_docs = len(listdir)
for d in listdir:
if d == "__init__.py" or ".pyc" in d:
continue
doc = []
listd = os.listdir(base + 'indices/')
listd.sort()
for tok in listd:
if tok == "__init__.py" or ".pyc" in tok:
continue
exec("from indices." + tok[0:-3] + " import ii")
df = len(ii)
if df == 0:
idf = 0
else:
idf = math.log(total_docs/df)
tf = 0
for tup in ii:
if tup[0][:-3] == d[:-3]:
tf = math.log(1 + tup[1],2)
score = tf * idf
doc.append(score)
os.chdir(base+"doc_vector/")
f = open(d,"w")
f.write("vect = " + str(doc))
f.close()
if __name__ == '__main__':
main()