# Krystian Gronek & Katarzyna Piotrowska
# Text Mining and Social Media Mining, final project - Analyzing men and women comments using NLP methods

# Loading packages and data

In [1]:
%matplotlib inline 

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import nltk

# VADER - VALENCE based sentiment analyzer
nltk.download('vader_lexicon');
from nltk.sentiment.vader import SentimentIntensityAnalyzer
analyser = SentimentIntensityAnalyzer();

import nltk.classify.util
from nltk.classify import NaiveBayesClassifier

men = pd.read_csv('data/final_askmen.csv', sep = ';')
women = pd.read_csv('data/final_askwomen.csv', sep = ';')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Krystian\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

# Describing the sentiment analysis approach

In the last jupyter file we split men and women dataset with a categorical variable 'sentiment' that approximates the sentiment of comments based on upvotes and downvotes of that comments given by other users reading these comments. 

Based on compounded polarity values returned from VADER analyzer, which are between values from -1 to 1, we will assign positive sentiment to those for which the polarity value is greater than 0 and negative sentiment for those that are less than or equal to zero. 


# Sentiment analysis - user comments

### Male comments from /r/AskMen

In [2]:
i=0 # counter

men_comments_polarity = list()
men_comments_predicted_sentiment = list()

while (i < len(men)):
    k = analyser.polarity_scores(men.iloc[i]['cleaned'])
    men_comments_polarity.append(k['compound'])
    
    if ((k['compound'] > 0)):
        men_comments_predicted_sentiment.append('positive')
    elif ((k['compound'] <= 0)):
        men_comments_predicted_sentiment.append('negative')
    
    i = i+1

men['comments_polarity'] = men_comments_polarity
men['comments_predicted_sentiment'] = men_comments_predicted_sentiment

### Women comments from /r/AskWomen

In [3]:
i=0 # counter

women_comments_polarity = list()
women_comments_predicted_sentiment = list()

while (i < len(women)):
    k = analyser.polarity_scores(women.iloc[i]['cleaned'])
    women_comments_polarity.append(k['compound'])
    
    if ((k['compound'] > 0)):
        women_comments_predicted_sentiment.append('positive')
    elif ((k['compound'] <= 0)):
        women_comments_predicted_sentiment.append('negative')
    
    i = i+1

women['comments_polarity'] = women_comments_polarity 
women['comments_predicted_sentiment'] = women_comments_predicted_sentiment

In [4]:
men

Unnamed: 0,username,com_original,cleaned,cleaned_wo_sw,tokenized,stemmed,tokenized_wo_sw,submission_title,submission_title_cleaned,submission_title_cleaned_wo_sw,...,submission_title_stemmed,submission_title_tokenized_wo_sw,comment_score,submission_ups,minmax,minmax_grouped,is_positive,sentiment,comments_polarity,comments_predicted_sentiment
0,8483,Thank fuck... So many great posts buried under...,thank fuck so many great posts buried under id...,thank fuck many great posts buried idiotic kar...,"['thank', 'fuck', 'so', 'many', 'great', 'post...",thank fuck mani great post buri idiot karma wh...,"['thank', 'fuck', 'many', 'great', 'posts', 'b...",BONK! Overly sexual questions are no longer al...,bonk overly sexual questions are no longer all...,bonk overly sexual questions longer allowed,...,bonk overli sexual question longer allow,"['bonk', 'overly', 'sexual', 'questions', 'lon...",11,13949,0.72,0.500000,1,positive,-0.0571,negative
1,Zeezprahh,"Well fuck me and suck me sideways, it's a deal!",well fuck me and suck me sideways its a deal,well fuck suck sideways deal,"['well', 'fuck', 'me', 'and', 'suck', 'me', 's...",well fuck suck sideway deal,"['well', 'fuck', 'suck', 'sideways', 'deal']",BONK! Overly sexual questions are no longer al...,bonk overly sexual questions are no longer all...,bonk overly sexual questions longer allowed,...,bonk overli sexual question longer allow,"['bonk', 'overly', 'sexual', 'questions', 'lon...",8,13949,0.60,0.285714,1,positive,-0.6486,negative
2,skinny_gator,I'm dying over here lmao\n\nThis is amazing. A...,im dying over here lmao this is amazing and ye...,im dying lmao amazing yes ask men straight bec...,"['im', 'dying', 'over', 'here', 'lmao', 'this'...",im die lmao amaz ye ask men straight becom nsf...,"['im', 'dying', 'lmao', 'amazing', 'yes', 'ask...",BONK! Overly sexual questions are no longer al...,bonk overly sexual questions are no longer all...,bonk overly sexual questions longer allowed,...,bonk overli sexual question longer allow,"['bonk', 'overly', 'sexual', 'questions', 'lon...",18,13949,1.00,1.000000,1,positive,0.9062,positive
3,BantyRed,I thought it was just me. I joined right befor...,i thought it was just me i joined right before...,thought joined right horny came figured horny ...,"['i', 'thought', 'it', 'was', 'just', 'me', 'i...",thought join right horni came figur horni peopl,"['thought', 'joined', 'right', 'horny', 'came'...",BONK! Overly sexual questions are no longer al...,bonk overly sexual questions are no longer all...,bonk overly sexual questions longer allowed,...,bonk overli sexual question longer allow,"['bonk', 'overly', 'sexual', 'questions', 'lon...",6,13949,0.52,0.142857,1,positive,0.0000,negative
4,postvolta,Thank god. Every other question here is about ...,thank god every other question here is about s...,thank god every question sex women variation t...,"['thank', 'god', 'every', 'other', 'question',...",thank god everi question sex women variat thereof,"['thank', 'god', 'every', 'question', 'sex', '...",BONK! Overly sexual questions are no longer al...,bonk overly sexual questions are no longer all...,bonk overly sexual questions longer allowed,...,bonk overli sexual question longer allow,"['bonk', 'overly', 'sexual', 'questions', 'lon...",5,13949,0.48,0.071429,1,positive,0.5574,positive
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15996,randylahey2883,I had a bear come into my camp on more than on...,i had a bear come into my camp on more than on...,bear come camp one occasion hiking yes food put,"['i', 'had', 'a', 'bear', 'come', 'into', 'my'...",bear come camp one occas hike ye food put,"['bear', 'come', 'camp', 'one', 'occasion', 'h...",What is a fact or story you don't get to tell ...,what is a fact or story you dont get to tell a...,fact story dont get tell much youd like,...,fact stori dont get tell much youd like,"['fact', 'story', 'dont', 'get', 'tell', 'much...",3,9,0.40,-0.666667,1,positive,0.4019,positive
15997,BalloonPilotDude,A story that I think about allot but I doubt t...,a story that i think about allot but i doubt t...,story think allot doubt others involved rememb...,"['a', 'story', 'that', 'i', 'think', 'about', ...",stori think allot doubt other involv rememb tw...,"['story', 'think', 'allot', 'doubt', 'others',...",What is a fact or story you don't get to tell ...,what is a fact or story you dont get to tell a...,fact story dont get tell much youd like,...,fact stori dont get tell much youd like,"['fact', 'story', 'dont', 'get', 'tell', 'much...",3,9,0.40,-0.666667,1,positive,0.9787,positive
15998,JetBrink,I'm awesome in bed,im awesome in bed,im awesome bed,"['im', 'awesome', 'in', 'bed']",im awesom bed,"['im', 'awesome', 'bed']",What is a fact or story you don't get to tell ...,what is a fact or story you dont get to tell a...,fact story dont get tell much youd like,...,fact stori dont get tell much youd like,"['fact', 'story', 'dont', 'get', 'tell', 'much...",3,9,0.40,-0.666667,1,positive,0.6249,positive
15999,Stabbmaster,The first slaveowner in the United States was ...,the first slaveowner in the united states was ...,first slaveowner united states black man felt ...,"['the', 'first', 'slaveowner', 'in', 'the', 'u...",first slaveown unit state black man felt entit...,"['first', 'slaveowner', 'united', 'states', 'b...",What is a fact or story you don't get to tell ...,what is a fact or story you dont get to tell a...,fact story dont get tell much youd like,...,fact stori dont get tell much youd like,"['fact', 'story', 'dont', 'get', 'tell', 'much...",9,9,0.64,0.333333,1,positive,0.8966,positive


# Sentiment analysis - submission posts titles

### Submission titles from /r/AskMen

In [5]:
i=0 # counter

men_posts_polarity = list()
men_posts_predicted_sentiment = list()

while (i < len(men)):
    k = analyser.polarity_scores(men.iloc[i]['submission_title_cleaned'])
    men_posts_polarity.append(k['compound'])
    
    if ((k['compound'] > 0)):
        men_posts_predicted_sentiment.append('positive')
    elif ((k['compound'] <= 0)):
        men_posts_predicted_sentiment.append('negative')
    
    i = i+1
    
men['posts_polarity'] = men_posts_polarity 
men['posts_predicted_sentiment'] = men_posts_predicted_sentiment

### Submission titles from /r/AskWomen

In [6]:
i=0 # counter

women_posts_polarity = list()
women_posts_predicted_sentiment = list()

while (i < len(women)):
    k = analyser.polarity_scores(women.iloc[i]['submission_title_cleaned'])
    women_posts_polarity.append(k['compound'])
    
    if ((k['compound'] > 0)):
        women_posts_predicted_sentiment.append('positive')
    elif ((k['compound'] <= 0)):
        women_posts_predicted_sentiment.append('negative')

    i = i+1

women['posts_polarity'] = women_posts_polarity
women['posts_predicted_sentiment'] = women_posts_predicted_sentiment

# Saving datasets 
### (with VADER polarity values and predicted sentiment based on them)

In [7]:
men.to_csv('./data/final_askmen.csv', sep = ';', encoding = "utf-8", index = None, header = True)
women.to_csv('./data/final_askwomen.csv', sep = ';', encoding = "utf-8", index = None, header = True)