In [23]:
import pandas as pd
import numpy as np
from nrclex import NRCLex

In [45]:
from nltk.corpus import stopwords
stopwords = stopwords.words('english')

In [24]:
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

sid = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/johnloyd/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


## Dataset

In [36]:
df = pd.read_csv('abuse.csv')

In [37]:
df.head()

Unnamed: 0.1,Unnamed: 0,author,created_utc,selftext,subreddit,title,url,created
0,0,undergroundagent2,1597591161,Have you or someone you know been a victim to...,abuse,join the underground railroad,https://www.reddit.com/r/abuse/comments/iau3kh...,1597562000.0
1,1,Maniacalmind0000,1597584684,So I’ve been with my husband for three years a...,abuse,I don’t know what to do,https://www.reddit.com/r/abuse/comments/iasdhp...,1597556000.0
2,2,blackoutjason1,1597584355,Did anyone else after different types of abuse...,abuse,Imagining and Fearing the Worst,https://www.reddit.com/r/abuse/comments/iasavg...,1597556000.0
3,3,wastedyovth_,1597577713,what people who were lucky &amp; don’t underst...,abuse,what others don’t understand,https://www.reddit.com/r/abuse/comments/iaqv3q...,1597549000.0
4,4,fish_breath123,1597576111,,abuse,penis,https://www.reddit.com/r/abuse/comments/iaqk91...,1597547000.0


## Cleaning

In [50]:
def clean_text(df, column_name):
    df['cleaned_text'] = df[column_name].fillna('')
    df['cleaned_text'] = df['cleaned_text'].str.lower()
    df['cleaned_text'] = df['cleaned_text'].str.replace(r'(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|rt|\d+', '')
    df['cleaned_text'] = df['cleaned_text'].str.replace(r'^\s+|\s+$', '') 
    df['cleaned_text'] = df['cleaned_text'].apply(lambda x: ' '.join([w for w in x.split() if w not in (stopwords)]))
    return df

In [51]:
df_cleaned = clean_text(df, "selftext")

## Tagging

In [63]:
def tag_sentiment(string):
    result = sid.polarity_scores(string)
    if (result['compound'] > 0):
        return "POS"
    elif (result['compound'] == 0):
        return "NEU"
    else:
        return "NEG"

In [64]:
df_cleaned["sentiment_tag"] = df_cleaned["cleaned_text"].apply(lambda x : tag_sentiment(x))

## Results

In [65]:
df_cleaned['sentiment_tag'].value_counts()

NEG    3002
POS     930
NEU     735
Name: sentiment_tag, dtype: int64

In [68]:
df.head()

Unnamed: 0.1,Unnamed: 0,author,created_utc,selftext,subreddit,title,url,created,cleaned_text,cleaned_split,sentiment_tag
0,0,undergroundagent2,1597591161,Have you or someone you know been a victim to...,abuse,join the underground railroad,https://www.reddit.com/r/abuse/comments/iau3kh...,1597562000.0,someone know victim domestic violence ever wan...,"[someone, know, victim, domestic, violence, ev...",POS
1,1,Maniacalmind0000,1597584684,So I’ve been with my husband for three years a...,abuse,I don’t know what to do,https://www.reddit.com/r/abuse/comments/iasdhp...,1597556000.0,ive husband three years one year old daughter ...,"[ive, husband, three, years, one, year, old, d...",POS
2,2,blackoutjason1,1597584355,Did anyone else after different types of abuse...,abuse,Imagining and Fearing the Worst,https://www.reddit.com/r/abuse/comments/iasavg...,1597556000.0,anyone else different types abuse fear things ...,"[anyone, else, different, types, abuse, fear, ...",NEG
3,3,wastedyovth_,1597577713,what people who were lucky &amp; don’t underst...,abuse,what others don’t understand,https://www.reddit.com/r/abuse/comments/iaqv3q...,1597549000.0,people lucky amp dont understand abusive paren...,"[people, lucky, amp, dont, understand, abusive...",POS
4,4,fish_breath123,1597576111,,abuse,penis,https://www.reddit.com/r/abuse/comments/iaqk91...,1597547000.0,,[],NEU


## Filtering

In [81]:
def filter_sentiment(row):
    if (row["sentiment_tag"] == "NEG"):
        text_object = NRCLex(row["cleaned_text"])
        return text_object.affect_frequencies
    else:
        return "Need further psychosocial assessment."

In [82]:
df_cleaned["sentiments"] = df_cleaned.apply(filter_sentiment, axis = 1)

In [83]:
df_cleaned.head()

Unnamed: 0.1,Unnamed: 0,author,created_utc,selftext,subreddit,title,url,created,cleaned_text,cleaned_split,sentiment_tag,sentiments
0,0,undergroundagent2,1597591161,Have you or someone you know been a victim to...,abuse,join the underground railroad,https://www.reddit.com/r/abuse/comments/iau3kh...,1597562000.0,someone know victim domestic violence ever wan...,"[someone, know, victim, domestic, violence, ev...",POS,Need further psychosocial assessment.
1,1,Maniacalmind0000,1597584684,So I’ve been with my husband for three years a...,abuse,I don’t know what to do,https://www.reddit.com/r/abuse/comments/iasdhp...,1597556000.0,ive husband three years one year old daughter ...,"[ive, husband, three, years, one, year, old, d...",POS,Need further psychosocial assessment.
2,2,blackoutjason1,1597584355,Did anyone else after different types of abuse...,abuse,Imagining and Fearing the Worst,https://www.reddit.com/r/abuse/comments/iasavg...,1597556000.0,anyone else different types abuse fear things ...,"[anyone, else, different, types, abuse, fear, ...",NEG,"{'fear': 0.12658227848101267, 'anger': 0.11392..."
3,3,wastedyovth_,1597577713,what people who were lucky &amp; don’t underst...,abuse,what others don’t understand,https://www.reddit.com/r/abuse/comments/iaqv3q...,1597549000.0,people lucky amp dont understand abusive paren...,"[people, lucky, amp, dont, understand, abusive...",POS,Need further psychosocial assessment.
4,4,fish_breath123,1597576111,,abuse,penis,https://www.reddit.com/r/abuse/comments/iaqk91...,1597547000.0,,[],NEU,Need further psychosocial assessment.
