In [1]:
import os
os.chdir('../..')

In [2]:
import convokit
import pickle
import json

In [4]:
DATA_DIR = "data_liwc"

In [5]:
os.chdir('convokit/tensors')

In [6]:
with open(os.path.join(DATA_DIR, 'rank_to_factors.p'), 'rb') as f:
    rank_to_factors = pickle.load(f)

In [8]:
with open(os.path.join(DATA_DIR, 'liwc_features.p'), 'rb') as f:
    liwc_features = pickle.load(f)

with open(os.path.join(DATA_DIR, 'subreddits.p'), 'rb') as f:
    subreddits = pickle.load(f)

In [9]:
from sklearn.preprocessing import StandardScaler
import numpy as np
scaler = StandardScaler()

In [10]:
def get_anomalous_points(factor_full, idx):
    factor = factor_full[:, idx]
    reshaped = factor.reshape((factor.shape[0], 1))
    scaled = scaler.fit_transform(reshaped)
    pos_pts = np.argwhere(scaled.reshape(factor.shape[0]) > 1.5).flatten()
    neg_pts = np.argwhere(scaled.reshape(factor.shape[0]) < -1.5).flatten()
    return pos_pts, neg_pts

In [11]:
time_factor = rank_to_factors[9][0] # (9, 9)
thread_factor = rank_to_factors[9][1] # (10000, 9)
feature_factor = rank_to_factors[9][2] # (140, 9)

In [12]:
with open(os.path.join(DATA_DIR, 'convo_ids.p'), 'rb') as f:
    thread_ids = pickle.load(f)

In [13]:
from convokit import Corpus, download
corpus = Corpus(filename="longreddit_construction/long-reddit-corpus-liwc")

In [14]:
for convo in corpus.iter_conversations():
    for idx, utt in enumerate(convo.get_chronological_utterance_list()):
        utt.meta['order'] = idx

In [24]:
def get_convo_details(convo):
    print("Subreddit: {}".format(convo.get_utterance(convo.id).meta['subreddit']))
    convo.print_conversation_structure(lambda utt: str(utt.meta['order']) + ". " + color.BOLD + utt.user.id + color.END + ": " + utt.text, limit=20)

In [25]:
convos = list(corpus.iter_conversations())

In [26]:
import random

## Inspecting threads from Factor 1

In [27]:
class color:
   PURPLE = '\033[95m'
   CYAN = '\033[96m'
   DARKCYAN = '\033[36m'
   BLUE = '\033[94m'
   GREEN = '\033[92m'
   YELLOW = '\033[93m'
   RED = '\033[91m'
   BOLD = '\033[1m'
   UNDERLINE = '\033[4m'
   END = '\033[0m'

In [28]:
rank = time_factor.shape[1]
num_examples = 3

In [29]:
for idx in range(rank):
    print("#########################################################")
    print(color.BOLD + "Inspecting threads from factor {}".format(idx+1) + color.END)
    print()
    pos_threads, neg_threads = get_anomalous_points(thread_factor, idx)
    
    print(color.BOLD + color.GREEN + "Positive examples" + color.END)
    print()
    for thread_idx in random.sample(list(pos_threads), num_examples):
        get_convo_details(convos[thread_idx])
        print()
    
    print()
    print(color.BOLD + color.RED + "Negative examples" + color.END)
    for thread_idx in random.sample(list(neg_threads), num_examples):
        get_convo_details(convos[thread_idx])
        print()
        
    print("#########################################################")
    print()

#########################################################
[1mInspecting threads from factor 1[0m

[1m[92mPositive examples[0m

Subreddit: movies
1. [1mSpookingtonZ[0m: one of the rebellious aliens was the alien that brought jyn and the gang to saw in rogue one.
    2. [1mRancor2001[0m: yeah i liked the nod to rogue one there.
        14. [1mSaintNovem[0m: two-tubes and warwick davis. hell yeah.
        19. [1mDM_Me_Your_Oppai[0m: i know that movie! i clapped, i clapped when i saw it
    3. [1mBooRand[0m: i was wondering if that was benthic or edio (i think that's the second one's name), benthic is also in the comics. i was wondering if it was the same character or same species.
    4. [1mGraffikl1[0m: it looked like edrio two tubes
        7. [1marxero[0m: it was, had to be
    5. [1mRustyDetective[0m: and there was the mass effect garrus looking one that was going to hire finn at at maz's cantina.
    6. [1mkeithtbarker[0m: two tubes!
    8. [1mcoool12121212[0

1. [1mEriwich[0m: aw man, ac/dc is the reason i got in to playing guitar when i was 12. at least bon will have a friend to jam with.
    2. [1m5MoK3[0m: exact same story here. except maybe like age 10/11. grew up on ac/dc because of my dad
        4. [1mSurroundedbygoalies[0m: me too. i've had the pleasure of taking my dad with me to see them live twice, both stadium shows. last time was after malcolm was done and right before brian's announcement about hearing loss. the pauses between songs were so long and awkward, i said to my dad "i hate to say it, but i think they're done." :-(
            13. [1mpinkfloydfan4life[0m: was that in dallas?
                15. [1mSurroundedbygoalies[0m: nope.
                    19. [1mBromlife[0m: melbourne?
                        20. [1mSurroundedbygoalies[0m: nope. sad to hear the problem with the last tour seems to be worldwide.
        10. [1masipoditas[0m: me too, only i was just being born when it started.
            18. [1m

[1m[92mPositive examples[0m

Subreddit: mildlyinfuriating
1. [1mmrmatthunt[0m: if you're so fat that you can't fit your purse beside you in a booth then it's time for a lifestyle change.
    2. [1mVQopponaut35[0m: no, it's obviously the booth's fault for being too small... /s
        3. [1msprucenoose[0m: and the floor's fault for being so low you can't reach your bag there.
            4. [1m6ixalways[0m: nah i'm not trying to put my bag on the floor do you know how gross them shits are at restaurants? it goes up my asshole where it's protected
                10. [1mXenon808[0m: or is it? ( deg [?]? deg)
        6. [1mNubetastic[0m: i hate those wide gap booths where the table is past my knees...
        7. [1mI_am_not_a_Taco[0m: dude, i know it's sarcasm but godamn is it hard to sit at some restaurants with how big the booths are nowadays. i'm not small by any means (6ft and 190lbs) but i feel like a little kid half the time not being able to touch the seat back and

            15. [1mmanere[0m: rembers me of the terran whine about thaldarim amulet. blizz even announced they will nerf hts back then but they cried so much till blizzard complelty removed the upgrade. never was protoss wr in tvp so low :d like incrontrol once said. whining payes of in the long run. look at zerg. best winrate last patch and still got buffed.
                20. [1mfleekymon[0m: khaydarin amulet? tbh it is probably a bit too strong. with warp in mechanic + khaydarin it would just mean more spell spam. at this point i'm ok with not having it back in the game, if fungal and other aoe are being adjusted as well

#########################################################

#########################################################
[1mInspecting threads from factor 7[0m

[1m[92mPositive examples[0m

Subreddit: MMA
1. [1mJonmva703[0m: black dude fucked his girl. apparently the fight was [canceled](<url>)
    2. [1mArnie_Kay[0m: yeah bra ima need a source for that "

    3. [1mChizzle1496[0m: this was literally a question i was preparing to ask on this sub: "if your nwhatever does something heartfelt and nice every so often, does that mean they're not actually n?" i think it's possible that they still are. i mean, just like humans make mistakes every so often, we get some things right every so often. that doesn't mean we're inherently good or bad one way or another. all that matters is the overall picture we're painting. whatever the answer, though, at times it can sure feel like they might not be. and i think that thought is harmful as heck. your experience is *real* and just like op said, you couldn't have made up the feelings you felt while living with them. they were real. you felt lonely, anxious, overly sad, isolated etc. you felt that. if your parents were loving and not-n's, why on earth would you feel the need to go nc? if you're on this sub looking and earnestly *searching* for other people who feel just a *little bit* like you, just so