Skip to content
This repository has been archived by the owner on Aug 16, 2022. It is now read-only.

Commit

Permalink
update whatsapp_wordcloud.ipynb
Browse files Browse the repository at this point in the history
  • Loading branch information
amirshnll committed Jun 10, 2022
1 parent 71eed96 commit 8de978f
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 149 deletions.
138 changes: 9 additions & 129 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,129 +1,9 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
.python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/
/venv/
tests.py
dist
build
Sources
resmem.egg-info
.gitignore
.Rproj.user
.DS_Store
43 changes: 23 additions & 20 deletions whatsapp_wordcloud.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -60,25 +60,28 @@
"# Remove emoji\n",
"def remove_emoji(inputString):\n",
" emoji_pattern = re.compile(\"[\"\n",
" u\"\\U0001F600-\\U0001F64F\" # emoticons\n",
" u\"\\U0001F300-\\U0001F5FF\" # symbols & pictographs\n",
" u\"\\U0001F680-\\U0001F6FF\" # transport & map symbols\n",
" u\"\\U0001F1E0-\\U0001F1FF\" # flags (iOS)\n",
" u\"\\U00002500-\\U00002BEF\" # chinese char\n",
" u\"\\U00002702-\\U000027B0\"\n",
" u\"\\U00002702-\\U000027B0\"\n",
" u\"\\U000024C2-\\U0001F251\"\n",
" u\"\\U0001f926-\\U0001f937\"\n",
" u\"\\U00010000-\\U0010ffff\"\n",
" u\"\\u2640-\\u2642\"\n",
" u\"\\u2600-\\u2B55\"\n",
" u\"\\u200d\"\n",
" u\"\\u23cf\"\n",
" u\"\\u23e9\"\n",
" u\"\\u231a\"\n",
" u\"\\ufe0f\" # dingbats\n",
" u\"\\u3030\"\n",
" \"]+\", flags=re.UNICODE)\n",
" u\"\\U0001F600-\\U0001F64F\" # emoticons\n",
" u\"\\U0001F300-\\U0001F5FF\" # symbols & pictographs\n",
" u\"\\U0001F680-\\U0001F6FF\" # transport & map symbols\n",
" u\"\\U0001F1E0-\\U0001F1FF\" # flags (iOS)\n",
" u\"\\U00002702-\\U000027B0\"\n",
" u\"\\U000024C2-\\U0001F251\"\n",
" u\"\\U0001f926-\\U0001f937\"\n",
" u'\\U00010000-\\U0010ffff'\n",
" u\"\\u200d\"\n",
" u\"\\u2640-\\u2642\"\n",
" u\"\\u2600-\\u2B55\"\n",
" u\"\\u23cf\"\n",
" u\"\\u23e9\"\n",
" u\"\\u231a\"\n",
" u\"\\u3030\"\n",
" u\"\\ufe0f\"\n",
" u\"\\u2069\"\n",
" u\"\\u2066\"\n",
" u\"\\u200c\"\n",
" u\"\\u2068\"\n",
" u\"\\u2067\"\n",
" \"]+\", flags=re.UNICODE)\n",
" return emoji_pattern.sub(r'', inputString)"
]
},
Expand All @@ -91,7 +94,7 @@
"source": [
"# Remove garbage character (punctuations, english characters, numbers)\n",
"def remove_garbage(inputString):\n",
" garbage_character = \"«»!\"\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~،,؛0123456789؟abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ\"\n",
" garbage_character = \"«»!\"\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~،,؛0123456789؟abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ`''\"\n",
" no_garbage = \"\"\n",
" for char in inputString:\n",
" if char not in garbage_character:\n",
Expand Down

0 comments on commit 8de978f

Please sign in to comment.