Error #3

Ahlycya · 2022-04-19T18:17:47Z

tfidf_train=tfidf_vectorizer.fit_transform(x_train)

ValueError Traceback (most recent call last)
Input In [38], in <cell line: 1>()
----> 1 tfidf_train=tfidf_vectorizer.fit_transform(x_train)

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\feature_extraction\text.py:2077, in TfidfVectorizer.fit_transform(self, raw_documents, y)
2058 """Learn vocabulary and idf, return document-term matrix.
2059
2060 This is equivalent to fit followed by transform, but more efficiently
(...)
2074 Tf-idf-weighted document-term matrix.
2075 """
2076 self._check_params()
-> 2077 X = super().fit_transform(raw_documents)
2078 self._tfidf.fit(X)
2079 # X is already a transformed view of raw_documents so
2080 # we set copy to False

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\feature_extraction\text.py:1330, in CountVectorizer.fit_transform(self, raw_documents, y)
1322 warnings.warn(
1323 "Upper case characters found in"
1324 " vocabulary while 'lowercase'"
1325 " is True. These entries will not"
1326 " be matched with any documents"
1327 )
1328 break
-> 1330 vocabulary, X = self.count_vocab(raw_documents, self.fixed_vocabulary)
1332 if self.binary:
1333 X.data.fill(1)

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\feature_extraction\text.py:1201, in CountVectorizer._count_vocab(self, raw_documents, fixed_vocab)
1199 for doc in raw_documents:
1200 feature_counter = {}
-> 1201 for feature in analyze(doc):
1202 try:
1203 feature_idx = vocabulary[feature]

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\feature_extraction\text.py:108, in _analyze(doc, analyzer, tokenizer, ngrams, preprocessor, decoder, stop_words)
86 """Chain together an optional series of text processing steps to go from
87 a single document to ngrams, with or without tokenizing or preprocessing.
88
(...)
104 A sequence of tokens, possibly with pairs, triples, etc.
105 """
107 if decoder is not None:
--> 108 doc = decoder(doc)
109 if analyzer is not None:
110 doc = analyzer(doc)

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\feature_extraction\text.py:226, in _VectorizerMixin.decode(self, doc)
223 doc = doc.decode(self.encoding, self.decode_error)
225 if doc is np.nan:
--> 226 raise ValueError(
227 "np.nan is an invalid document, expected byte or unicode string."
228 )
230 return doc

ValueError: np.nan is an invalid document, expected byte or unicode string.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Error #3

Error #3

Ahlycya commented Apr 19, 2022

Error #3

Error #3

Comments

Ahlycya commented Apr 19, 2022