-
-
Notifications
You must be signed in to change notification settings - Fork 4.4k
/
test_normmodel.py
155 lines (126 loc) · 5.63 KB
/
test_normmodel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2010 Radim Rehurek <radimrehurek@seznam.cz>
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
"""
Automated tests for checking transformation algorithms (the models package).
"""
import logging
import unittest
import numpy as np
from scipy.sparse import csr_matrix
from scipy.sparse import issparse
from gensim.corpora import mmcorpus
from gensim.models import normmodel
from gensim.test.utils import datapath, get_tmpfile
class TestNormModel(unittest.TestCase):
def setUp(self):
self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm'))
# Choose doc to be normalized. [3] chosen to demonstrate different results for l1 and l2 norm.
# doc is [(1, 1.0), (5, 2.0), (8, 1.0)]
self.doc = list(self.corpus)[3]
self.model_l1 = normmodel.NormModel(self.corpus, norm='l1')
self.model_l2 = normmodel.NormModel(self.corpus, norm='l2')
def test_tupleInput_l1(self):
"""Test tuple input for l1 transformation"""
normalized = self.model_l1.normalize(self.doc)
expected = [(1, 0.25), (5, 0.5), (8, 0.25)]
self.assertTrue(np.allclose(normalized, expected))
def test_sparseCSRInput_l1(self):
"""Test sparse csr matrix input for l1 transformation"""
row = np.array([0, 0, 1, 2, 2, 2])
col = np.array([0, 2, 2, 0, 1, 2])
data = np.array([1, 2, 3, 4, 5, 6])
sparse_matrix = csr_matrix((data, (row, col)), shape=(3, 3))
normalized = self.model_l1.normalize(sparse_matrix)
# Check if output is of same type
self.assertTrue(issparse(normalized))
# Check if output is correct
expected = np.array([[0.04761905, 0., 0.0952381],
[0., 0., 0.14285714],
[0.19047619, 0.23809524, 0.28571429]])
self.assertTrue(np.allclose(normalized.toarray(), expected))
def test_numpyndarrayInput_l1(self):
"""Test for np ndarray input for l1 transformation"""
ndarray_matrix = np.array([
[1, 0, 2],
[0, 0, 3],
[4, 5, 6]
])
normalized = self.model_l1.normalize(ndarray_matrix)
# Check if output is of same type
self.assertTrue(isinstance(normalized, np.ndarray))
# Check if output is correct
expected = np.array([
[0.04761905, 0., 0.0952381],
[0., 0., 0.14285714],
[0.19047619, 0.23809524, 0.28571429]
])
self.assertTrue(np.allclose(normalized, expected))
# Test if error is raised on unsupported input type
self.assertRaises(ValueError, lambda model, doc: model.normalize(doc), self.model_l1, [1, 2, 3])
def test_tupleInput_l2(self):
"""Test tuple input for l2 transformation"""
normalized = self.model_l2.normalize(self.doc)
expected = [(1, 0.4082482904638631), (5, 0.8164965809277261), (8, 0.4082482904638631)]
self.assertTrue(np.allclose(normalized, expected))
def test_sparseCSRInput_l2(self):
"""Test sparse csr matrix input for l2 transformation"""
row = np.array([0, 0, 1, 2, 2, 2])
col = np.array([0, 2, 2, 0, 1, 2])
data = np.array([1, 2, 3, 4, 5, 6])
sparse_matrix = csr_matrix((data, (row, col)), shape=(3, 3))
normalized = self.model_l2.normalize(sparse_matrix)
# Check if output is of same type
self.assertTrue(issparse(normalized))
# Check if output is correct
expected = np.array([
[0.10482848, 0., 0.20965697],
[0., 0., 0.31448545],
[0.41931393, 0.52414242, 0.6289709]
])
self.assertTrue(np.allclose(normalized.toarray(), expected))
def test_numpyndarrayInput_l2(self):
"""Test for np ndarray input for l2 transformation"""
ndarray_matrix = np.array([
[1, 0, 2],
[0, 0, 3],
[4, 5, 6]
])
normalized = self.model_l2.normalize(ndarray_matrix)
# Check if output is of same type
self.assertTrue(isinstance(normalized, np.ndarray))
# Check if output is correct
expected = np.array([
[0.10482848, 0., 0.20965697],
[0., 0., 0.31448545],
[0.41931393, 0.52414242, 0.6289709]
])
self.assertTrue(np.allclose(normalized, expected))
# Test if error is raised on unsupported input type
self.assertRaises(ValueError, lambda model, doc: model.normalize(doc), self.model_l2, [1, 2, 3])
def testInit(self):
"""Test if error messages raised on unsupported norm"""
self.assertRaises(ValueError, normmodel.NormModel, self.corpus, 'l0')
def testPersistence(self):
fname = get_tmpfile('gensim_models.tst')
model = normmodel.NormModel(self.corpus)
model.save(fname)
model2 = normmodel.NormModel.load(fname)
self.assertTrue(model.norms == model2.norms)
tstvec = []
# try projecting an empty vector
self.assertTrue(np.allclose(model.normalize(tstvec), model2.normalize(tstvec)))
def testPersistenceCompressed(self):
fname = get_tmpfile('gensim_models.tst.gz')
model = normmodel.NormModel(self.corpus)
model.save(fname)
model2 = normmodel.NormModel.load(fname, mmap=None)
self.assertTrue(model.norms == model2.norms)
tstvec = []
# try projecting an empty vector
self.assertTrue(np.allclose(model.normalize(tstvec), model2.normalize(tstvec)))
if __name__ == '__main__':
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)
unittest.main()