In [1]:
%%bash

# When running from inside this repo:
cd ../..
# python setup.py install

pip install --prefix -e .

# When running independently
# pip install -U git+https://github.com/neomatrix369/nlp_profiler.git
echo "Once successfully installed, please RESTART your Jupyter kernels or Colab runtimes for the changes to take effect"

Processing /home/satyasai/git-repos/ai-ml-dl-stuff/nlp_profiler
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
    Preparing wheel metadata: started
    Preparing wheel metadata: finished with status 'done'
Building wheels for collected packages: nlp-profiler
  Building wheel for nlp-profiler (PEP 517): started
  Building wheel for nlp-profiler (PEP 517): finished with status 'done'
  Created wheel for nlp-profiler: filename=nlp_profiler-0.0.1-py3-none-any.whl size=29176 sha256=2d70276bbeefc2d7d35812a28e5db75d89db9a6598e78237e3fd6404754e20c8
  Stored in directory: /tmp/pip-ephem-wheel-cache-5pt8ew28/wheels/c3/37/38/edb4b584186d5fd35cdc7b72765631b03c281241ea480af1fd
Successfully built nlp-profiler
Installing collected packages: nlp-profiler
  Attempting uninstall: nlp-profiler
    Found existing installation: nlp-profile

You should consider upgrading via the '/usr/local/bin/python3.7 -m pip install --upgrade pip' command.


In [2]:
import pandas as pd

import sys
sys.path.insert(0, '../..')

from nlp_profiler.core import apply_text_profiling

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/satyasai/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /home/satyasai/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


#### Text messages with Emojis, Numbers, Alphanumeric, None-alphanumeric, Punctuations, Dates and Duplicates

In [3]:
text_dataframe = pd.read_csv('../../datasets/googleplaystore_user_reviews.csv')
text_dataframe

Unnamed: 0,App,Translated_Review,Sentiment,Sentiment_Polarity,Sentiment_Subjectivity
0,10 Best Foods for You,I like eat delicious food. That's I'm cooking ...,Positive,1.000000,0.533333
1,10 Best Foods for You,This help eating healthy exercise regular basis,Positive,0.250000,0.288462
2,10 Best Foods for You,,,,
3,10 Best Foods for You,Works great especially going grocery store,Positive,0.400000,0.875000
4,10 Best Foods for You,Best idea us,Positive,1.000000,0.300000
5,10 Best Foods for You,Best way,Positive,1.000000,0.300000
6,10 Best Foods for You,Amazing,Positive,0.600000,0.900000
7,10 Best Foods for You,,,,
8,10 Best Foods for You,"Looking forward app,",Neutral,0.000000,0.000000
9,10 Best Foods for You,It helpful site ! It help foods get !,Neutral,0.000000,0.000000


## Pandas describe() function

In [4]:
text_dataframe.describe()

Unnamed: 0,Sentiment_Polarity,Sentiment_Subjectivity
count,37432.0,37432.0
mean,0.182146,0.492704
std,0.351301,0.259949
min,-1.0,0.0
25%,0.0,0.357143
50%,0.15,0.514286
75%,0.4,0.65
max,1.0,1.0


## NLP profiler's equivalent to that

In [None]:
%%time
profiled_text_dataframe = apply_text_profiling(text_dataframe, 'Translated_Review', params={'parallelism_method': 'default'})

In [None]:
profiled_text_dataframe.head()

In [None]:
%%time
profiled_text_dataframe = apply_text_profiling(text_dataframe, 'Translated_Review', params={'parallelism_method': 'using_swifter'})
profiled_text_dataframe.head()

### Sentiment Analysis

In [None]:
profiled_text_dataframe['sentiment_polarity'].hist()

In [None]:
profiled_text_dataframe['sentiment_subjectivity'].hist()

### Spelling quality check

#### The spelling score and spelling quality check is upto 70% accurate, it's using Peter Norvig’s “How to Write a Spelling Corrector".

In [None]:
profiled_text_dataframe['spelling_quality_score'].hist()

In [None]:
profiled_text_dataframe['spelling_quality'].hist()

### Running describe() on the new NLP profiler generated dataframe

In [None]:
profiled_text_dataframe.describe()

### Resources

- https://www.kaggle.com/raenish/cheatsheet-text-helper-functions
- https://textblob.readthedocs.io/en/dev/quickstart.html
- [Approaching (Almost) Any NLP Problem on Kaggle](https://www.kaggle.com/abhishek/approaching-almost-any-nlp-problem-on-kaggle)
- [Words of estimative probability](https://en.wikipedia.org/wiki/Words_of_estimative_probability)
- [NLP Profiler source file](https://github.com/neomatrix369/awesome-ai-ml-dl/blob/master/examples/better-nlp/library/org/neomatrix369/nlp_profiler.py)
- [Kaggle Utility script](https://www.kaggle.com/neomatrix369/nlp-profiler-class)
- [Kaggle kernel](https://www.kaggle.com/neomatrix369/nlp-profiler-simple-dataset)
- [Awesome AI-ML-DL: Better NLP library](https://bit.ly/better-nlp-launch)
- [Awesome AI-ML-DL: NLP Resources](https://github.com/neomatrix369/awesome-ai-ml-dl/tree/master/natural-language-processing)
- [Awesome AI-ML-DL Github](https://github.com/neomatrix369/awesome-ai-ml-dl/blob/)