## Textheroについて
### 次のように構成されています。
* Preprecessing
* NLP
* Representation
* Visualization


### Preprocessing
* preprocessing.clean
* preprocessing.drop_no_content
* preprocessing.get_default
* preprocessing.has_content
* preprocessing.remove_angle_brackets
* preprocessing.remove_curly_brackets
* preprocessing.remove_diacritics
* preprocessing.remove_digits
* preprocessing.remove_html_tag
* preprocessing.remove_punctuation
* preprocessing.remove_round_brackets
* preprocessing.remove_square_brackets
* preprocessing.remove_stopwords
* preprocessing.remove_urls
* preprocessing.remove_whitespace
* preprocessing.replace_punctuation
* preprocessing.replace_stopwords
* preprocessing.replace_urls
* preprocessing.stem
* preprocessing.tokenize
* preprocessing.remove_brackets

In [29]:
import numpy as np
import pandas as pd
import texthero as hero
import warnings
warnings.filterwarnings("ignore")

In [30]:
text = pd.Series("You say goodbye and I say hello.")
print(hero.preprocessing.lowercase(text))

0    you say goodbye and i say hello.
dtype: object


In [31]:
text = pd.Series("My phone number is 123-4567-8901. See you!")
print(hero.preprocessing.remove_digits(text))

0    My phone number is  - - . See you!
dtype: object


In [32]:
print(hero.preprocessing.remove_punctuation(text))

0    My phone number is 123 4567 8901  See you 
dtype: object


In [33]:
text = pd.Series("Noël means Christmas in French")
print(hero.preprocessing.remove_diacritics(text))

0    Noel means Christmas in French
dtype: object


In [34]:
text = pd.Series("You like the hero and I do not like the hero.")
print(hero.preprocessing.remove_stopwords(text))

0    You like  hero  I   like  hero.
dtype: object


In [35]:
text = pd.Series("You say goodbye \n and I say hello.")
print(text)
print(hero.preprocessing.remove_whitespace(text))

0    You say goodbye \n and I say hello.
dtype: object
0    You say goodbye and I say hello.
dtype: object


In [36]:
text = pd.Series(["Python", "Java", np.nan, "", " "])
print(text)
print(hero.preprocessing.drop_no_content(text))

0    Python
1      Java
2       NaN
3          
4          
dtype: object
0    Python
1      Java
dtype: object


In [37]:
print(hero.preprocessing.has_content(text))

0     True
1     True
2    False
3    False
4    False
dtype: bool


In [38]:
text = pd.Series("My age is twenty(Truly my age is forty)")
print(text)
print(hero.preprocessing.remove_round_brackets(text))

0    My age is twenty(Truly my age is forty)
dtype: object
0    My age is twenty
dtype: object


In [39]:
text = pd.Series("Texthero (round) [square] [curly] [angle]")
print(hero.preprocessing.remove_brackets(text))

0    Texthero    
dtype: object


In [41]:
text = pd.Series("<html><h1>Title</h1>, <p>subtitle</p></html>")
print(text)
print(hero.remove_html_tags(text))

0    <html><h1>Title</h1>, <p>subtitle</p></html>
dtype: object
0    Title, subtitle
dtype: object


In [42]:
text = pd.Series("Please click this urls: https://test.com")
print(text)
print(hero.preprocessing.remove_urls(text))

0    Please click this urls: https://test.com
dtype: object
0    Please click this urls:  
dtype: object


In [46]:
text = pd.Series("Are you hungry*")
print(hero.preprocessing.replace_punctuation(text, "(Yes)"))

0    Are you hungry(Yes)
dtype: object


In [47]:
text = pd.Series("Please click this urls: https://test.com")
print(hero.preprocessing.replace_urls(text, "Deleted."))

0    Please click this urls: Deleted.
dtype: object
