You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
ValueError Traceback (most recent call last)
Input In [38], in <cell line: 1>()
----> 1 tfidf_train=tfidf_vectorizer.fit_transform(x_train)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\feature_extraction\text.py:2077, in TfidfVectorizer.fit_transform(self, raw_documents, y)
2058 """Learn vocabulary and idf, return document-term matrix.
2059
2060 This is equivalent to fit followed by transform, but more efficiently
(...)
2074 Tf-idf-weighted document-term matrix.
2075 """
2076 self._check_params()
-> 2077 X = super().fit_transform(raw_documents)
2078 self._tfidf.fit(X)
2079 # X is already a transformed view of raw_documents so
2080 # we set copy to False
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\feature_extraction\text.py:1330, in CountVectorizer.fit_transform(self, raw_documents, y)
1322 warnings.warn(
1323 "Upper case characters found in"
1324 " vocabulary while 'lowercase'"
1325 " is True. These entries will not"
1326 " be matched with any documents"
1327 )
1328 break
-> 1330 vocabulary, X = self.count_vocab(raw_documents, self.fixed_vocabulary)
1332 if self.binary:
1333 X.data.fill(1)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\feature_extraction\text.py:1201, in CountVectorizer._count_vocab(self, raw_documents, fixed_vocab)
1199 for doc in raw_documents:
1200 feature_counter = {}
-> 1201 for feature in analyze(doc):
1202 try:
1203 feature_idx = vocabulary[feature]
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\feature_extraction\text.py:108, in _analyze(doc, analyzer, tokenizer, ngrams, preprocessor, decoder, stop_words)
86 """Chain together an optional series of text processing steps to go from
87 a single document to ngrams, with or without tokenizing or preprocessing.
88
(...)
104 A sequence of tokens, possibly with pairs, triples, etc.
105 """
107 if decoder is not None:
--> 108 doc = decoder(doc)
109 if analyzer is not None:
110 doc = analyzer(doc)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\feature_extraction\text.py:226, in _VectorizerMixin.decode(self, doc)
223 doc = doc.decode(self.encoding, self.decode_error)
225 if doc is np.nan:
--> 226 raise ValueError(
227 "np.nan is an invalid document, expected byte or unicode string."
228 )
230 return doc
ValueError: np.nan is an invalid document, expected byte or unicode string.
The text was updated successfully, but these errors were encountered:
tfidf_train=tfidf_vectorizer.fit_transform(x_train)
ValueError Traceback (most recent call last)
Input In [38], in <cell line: 1>()
----> 1 tfidf_train=tfidf_vectorizer.fit_transform(x_train)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\feature_extraction\text.py:2077, in TfidfVectorizer.fit_transform(self, raw_documents, y)
2058 """Learn vocabulary and idf, return document-term matrix.
2059
2060 This is equivalent to fit followed by transform, but more efficiently
(...)
2074 Tf-idf-weighted document-term matrix.
2075 """
2076 self._check_params()
-> 2077 X = super().fit_transform(raw_documents)
2078 self._tfidf.fit(X)
2079 # X is already a transformed view of raw_documents so
2080 # we set copy to False
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\feature_extraction\text.py:1330, in CountVectorizer.fit_transform(self, raw_documents, y)
1322 warnings.warn(
1323 "Upper case characters found in"
1324 " vocabulary while 'lowercase'"
1325 " is True. These entries will not"
1326 " be matched with any documents"
1327 )
1328 break
-> 1330 vocabulary, X = self.count_vocab(raw_documents, self.fixed_vocabulary)
1332 if self.binary:
1333 X.data.fill(1)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\feature_extraction\text.py:1201, in CountVectorizer._count_vocab(self, raw_documents, fixed_vocab)
1199 for doc in raw_documents:
1200 feature_counter = {}
-> 1201 for feature in analyze(doc):
1202 try:
1203 feature_idx = vocabulary[feature]
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\feature_extraction\text.py:108, in _analyze(doc, analyzer, tokenizer, ngrams, preprocessor, decoder, stop_words)
86 """Chain together an optional series of text processing steps to go from
87 a single document to ngrams, with or without tokenizing or preprocessing.
88
(...)
104 A sequence of tokens, possibly with pairs, triples, etc.
105 """
107 if decoder is not None:
--> 108 doc = decoder(doc)
109 if analyzer is not None:
110 doc = analyzer(doc)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\feature_extraction\text.py:226, in _VectorizerMixin.decode(self, doc)
223 doc = doc.decode(self.encoding, self.decode_error)
225 if doc is np.nan:
--> 226 raise ValueError(
227 "np.nan is an invalid document, expected byte or unicode string."
228 )
230 return doc
ValueError: np.nan is an invalid document, expected byte or unicode string.
The text was updated successfully, but these errors were encountered: