In [7]:
from nrclex import NRCLex
from convokit import Corpus, download
import pandas as pd



In [12]:
# We need this to run the textBlob used by the NRCLex library
import nltk
nltk.download('punkt_tab')
nltk.download('stopwords')

[nltk_data] Downloading package punkt_tab to /Users/Panos/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

### Load the corpus and extract the conversation ids

In [10]:

def get_thread(corpus, conv_id):
    # Get the root utterance (main post)
    root_conversation = corpus.conversations[conv_id]
    
    # Loop through each utterance and collect necessary information
    thread_data = []
    for utt in root_conversation.iter_utterances():
        try:
            thread_data.append({
                'utterance_id': utt.id,
                'speaker': utt.speaker.id,
                'reply_to': utt.reply_to,   # This helps establish the parent-child relationship
                'timestamp': utt.timestamp if utt.timestamp is not None else 0,
                'text': utt.text,
                'success': utt.meta['success'] if 'success' in utt.meta else None  # Get success if it exists in metadata
            })
        except KeyError:
            continue
            
    
    # Create a DataFrame from the collected data and sort it
    df = pd.DataFrame(thread_data)
    df = df.sort_values(by='timestamp').reset_index(drop=True)
    
    return df

# Download the corpus
# corpus = Corpus(download("winning-args-corpus"))

# or load it from disk
corpus = Corpus("/Users/Panos/.convokit/downloads/winning-args-corpus")

# Get conversation ids
convos = [i.id for i in corpus.iter_conversations()]




### Choose conversation and build dataframe

In [11]:
#create a conversation thread
thread_df = get_thread(corpus, convos[0])
print(thread_df)

   utterance_id               speaker    reply_to   timestamp  \
0     t3_2ro9ux            seanyowens        None           0   
1    t1_cnhplrm           Account9726   t3_2ro9ux  1420697092   
2    t1_cnhpnmr            R3ptar1337   t3_2ro9ux  1420697175   
3    t1_cnhpp4o          BenIncognito   t3_2ro9ux  1420697241   
4    t1_cnhpsmr            seanyowens  t1_cnhplrm  1420697421   
5    t1_cnhpvqs           Account9726  t1_cnhpsmr  1420697567   
6    t1_cnhq330               Amablue   t3_2ro9ux  1420697904   
7    t1_cnhq7iw            seanyowens  t1_cnhpvqs  1420698102   
8    t1_cnhq7nv              nikoberg   t3_2ro9ux  1420698109   
9    t1_cnhqcwz                Nepene   t3_2ro9ux  1420698347   
10   t1_cnhqhxa             [deleted]  t1_cnhpnmr  1420698576   
11   t1_cnhqouu          whitepalms85  t1_cnhpp4o  1420698900   
12   t1_cnhqrw1                Nepene  t1_cnhq7iw  1420699040   
13   t1_cnhqzsf          whitepalms85  t1_cnhplrm  1420699399   
14   t1_cnhrd8u          

### Choose post and show text

In [29]:
text_object = NRCLex(text=thread_df["text"][0])
text = thread_df["text"][0]
print(text)

I can't remember the topic that spurred this discussion, but a friend and I were debating whether man-made things were natural. He took the position that they are unnatural. 

He cited this definition by Merriam-Webster:  existing in nature and not made or caused by people : coming from nature (http://www.merriam-webster.com/dictionary/natural) as his basis for the distinction for natural vs. unnatural.

However, I respectfully disagree with his position and furthermore that definition of natural. People arise from nature. Humankind's capacity to create, problem-solve, analyze, rationalize, and build also come from natural processes. How are the things we create unnatural? It is only through natural occurrences that we have this ability, why is it that we would give the credit of these things solely to man, as opposed to nature? We are not separate from nature, thus, how can any of our actions or creations be unnatural? If we were somehow separate from nature, I would understand the di

### Show the Lexicon words identified in the text and their corresponding emotions

In [21]:
text_object.affect_dict

{'discussion': ['positive'],
 'friend': ['joy', 'positive', 'trust'],
 'unnatural': ['disgust', 'fear', 'negative'],
 'coming': ['anticipation'],
 'distinction': ['positive'],
 'disagree': ['anger', 'negative'],
 'create': ['joy', 'positive'],
 'build': ['positive'],
 'ability': ['positive'],
 'credit': ['positive', 'trust'],
 'opposed': ['anger', 'fear', 'negative'],
 'love': ['joy', 'positive'],
 'arguments': ['anger'],
 'mistaken': ['fear', 'negative'],
 'argument': ['anger', 'negative'],
 'word': ['positive', 'trust'],
 'intended': ['anticipation', 'positive']}

### Measure the word count in the text

In [49]:
from nltk.tokenize import word_tokenize
from collections import Counter
from nltk.corpus import stopwords

def count_unique_words_nltk(text):
    # Tokenize the text
    words = word_tokenize(text.lower())
    # Remove punctuation and stopwords
    stop_words = set(stopwords.words('english')) 
    filtered_words = [word for word in words if word not in stop_words and word.isalpha()]  # isalpha() removes punctuation
    word_counts = Counter(filtered_words)
    return word_counts


unique_word_counts_nltk = dict(count_unique_words_nltk(text))
print(unique_word_counts_nltk)

AttributeError: 'list' object has no attribute 'lower'

### Compare the word count in text against the word count in Lexicon

In [40]:
len(text_object.affect_dict.keys())/len(unique_word_counts_nltk.keys())

0.2

In [23]:
text_object.raw_emotion_scores

{'positive': 14,
 'joy': 5,
 'trust': 5,
 'disgust': 5,
 'fear': 7,
 'negative': 9,
 'anticipation': 2,
 'anger': 4}

In [24]:
text_object.affect_frequencies

{'fear': 0.13725490196078433,
 'anger': 0.0784313725490196,
 'anticip': 0.0,
 'trust': 0.09803921568627451,
 'surprise': 0.0,
 'positive': 0.27450980392156865,
 'negative': 0.17647058823529413,
 'sadness': 0.0,
 'disgust': 0.09803921568627451,
 'joy': 0.09803921568627451,
 'anticipation': 0.0392156862745098}

In [28]:
('normal') in NRCLex.lexicon.keys()

False

['I',
 'ca',
 "n't",
 'remember',
 'the',
 'topic',
 'that',
 'spurred',
 'this',
 'discussion',
 'but',
 'a',
 'friend',
 'and',
 'I',
 'were',
 'debating',
 'whether',
 'man-made',
 'things',
 'were',
 'natural',
 'He',
 'took',
 'the',
 'position',
 'that',
 'they',
 'are',
 'unnatural',
 'He',
 'cited',
 'this',
 'definition',
 'by',
 'Merriam-Webster',
 'existing',
 'in',
 'nature',
 'and',
 'not',
 'made',
 'or',
 'caused',
 'by',
 'people',
 'coming',
 'from',
 'nature',
 'http',
 'www.merriam-webster.com/dictionary/natural',
 'as',
 'his',
 'basis',
 'for',
 'the',
 'distinction',
 'for',
 'natural',
 'vs',
 'unnatural',
 'However',
 'I',
 'respectfully',
 'disagree',
 'with',
 'his',
 'position',
 'and',
 'furthermore',
 'that',
 'definition',
 'of',
 'natural',
 'People',
 'arise',
 'from',
 'nature',
 'Humankind',
 "'s",
 'capacity',
 'to',
 'create',
 'problem-solve',
 'analyze',
 'rationalize',
 'and',
 'build',
 'also',
 'come',
 'from',
 'natural',
 'processes',
 'How',
