# Dependency parsing

In [1]:
# Load data with identity term matches extracted, tokenized
import pandas as pd

path = '../../data/incels/processed_comments.pkl'
# path = '../data/white_supremacist_identities.pkl'
data = pd.read_pickle(path)
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6248230 entries, 0 to 6248229
Data columns (total 9 columns):
 #   Column                      Dtype         
---  ------                      -----         
 0   type                        object        
 1   forum                       object        
 2   thread                      object        
 3   username                    object        
 4   date                        object        
 5   content                     object        
 6   parsed_date                 datetime64[ns]
 7   content_orig                object        
 8   netmapper_identity_matches  object        
dtypes: datetime64[ns](1), object(8)
memory usage: 429.0+ MB


In [7]:
import spacy

nlp = spacy.load('en_core_web_sm', disable=['ner'])

In [18]:
samp = data[data.netmapper_identity_matches.map(lambda x: len(x) > 0)].head(1)
samp

Unnamed: 0,type,forum,thread,username,date,content,parsed_date,content_orig,netmapper_identity_matches
12,COMMENT,001-MustReadContent,0000014-Itssosadthatwereplacesoc,THE TRUE HONKLER,"Nov 20, 2020","frothysolutions said : do we ? in order to have this as a replacement for socializing , we have to want to be here . fag we were forced here by foids and lack of any other choice",2020-11-20,"FrothySolutions said: Do we? In order to have this as a replacement for socializing, we have to want to be here. fag we were forced here by foids and lack of any other choice",[fag]


In [11]:
pd.set_option('display.max_colwidth', None)
test = test.loc[12, 'content']

In [12]:
doc = nlp(test)
doc

frothysolutions said : do we ? in order to have this as a replacement for socializing , we have to want to be here . fag we were forced here by foids and lack of any other choice

In [14]:
parse = [tok.dep_ for tok in doc]
parse

['nsubj',
 'ROOT',
 'punct',
 'xcomp',
 'nsubj',
 'punct',
 'prep',
 'pobj',
 'aux',
 'acl',
 'dobj',
 'prep',
 'det',
 'pobj',
 'prep',
 'pobj',
 'punct',
 'nsubj',
 'ROOT',
 'aux',
 'xcomp',
 'aux',
 'xcomp',
 'advmod',
 'punct',
 'intj',
 'nsubjpass',
 'auxpass',
 'ROOT',
 'advmod',
 'agent',
 'pobj',
 'cc',
 'conj',
 'prep',
 'det',
 'amod',
 'pobj']

In [26]:
parse[tok_idx]

'intj'

In [25]:
[tok.head for tok in doc][tok_idx]

forced

In [22]:
# Match extracted identities to tokens
from collections import defaultdict

actions_attributes = {} # identity: {'actions': [actions], {'attributes': [attributes]} # replace with separate columns of attributes and actions in exploded df
identity_ctr = defaultdict(int) # keep track of how many of this identity I've seen

for identity in samp.loc[12, 'netmapper_identity_matches']:
    
    # Get identity mention locations
    mention_idx = [i for i, tok in enumerate(doc) if tok.text==identity]
    tok_idx = mention_idx[identity_ctr[identity]]
    
    # Verbs where identity term was the subject
    verbs_subj = [tok.head.text for tok in doc if tok.i==tok_idx \
        in mention_idx and (tok.dep_=='nsubj' or tok.dep_=='agent')]

    # Verbs where identity term was the object
    verbs_obj = [tok.head.text for tok in doc if tok.i==tok_idx and \
        (tok.dep_=='dobj' or tok.dep_=='nsubjpass' or \
        tok.dep_=='dative' or tok.dep_=='pobj')]

    # Adjectives that describe the identity term
    adjs = [tok.text.lower() for tok in doc if tok.head.i == tok_idx and \
        (tok.dep_=='amod' or tok.dep_=='appos' or \
        tok.dep_=='nsubj' or tok.dep_=='nmod')] \
        + [tok.text.lower() for tok in doc if tok.dep_=='attr' and \
            (tok.head.text=='is' or tok.head.text=='was') and \
           any([c.i==tok_idx for c in tok.head.children])]
    
    actions_attributes[identity] = {'actions': verbs_subj + verbs_obj, 'attributes': adjs}
    identity_ctr[identity] += 1
    
actions_attributes

{'fag': {'actions': [], 'attributes': []}}

In [3]:
# Create a list of identity term unique indexes for each identity term list
data['netmapper_identity_matches']

0                                 []
1                                 []
2                                 []
3                                 []
4                                 []
                     ...            
6248225    [incels, teenager, teens]
6248226                           []
6248227                           []
6248228            [parenting, kids]
6248229                     [incels]
Name: netmapper_identity_matches, Length: 6248230, dtype: object

In [12]:
from collections import defaultdict, Counter

def unique_term_index(l):
    ctr = Counter()
    res = []
    for term in l:
        res.append(ctr[term])
        ctr[term] += 1
    return res

In [13]:
unique_term_index(['incels', 'incels', 'teen'])

[0, 1, 0]

# Aggregate extracted actions and attributes

In [56]:
# Load data with extracted actions and attributes
import pandas as pd

path = '../../data/incels/processed_comments.pkl'
data = pd.read_pickle(path)
data.info()

exp = data.explode(['netmapper_identity_matches', 'actions_attributes'])
# exp.info()

# Group identities
import json

identity_groups_fpath = '../resources/identity_groups.json'
with open(identity_groups_fpath, 'r') as f:
    identity_groups = json.load(f)
print(len(identity_groups))

exp['identity_group'] = exp.netmapper_identity_matches.map(lambda x: identity_groups.get(x, x))
exploded = exp.explode('identity_group') # Count intersectional mentions as a mention in each of their categories
exploded.info()

# %%timeit -n 1 -r 1 # make Counter right away
from collections import Counter

# samp = exploded.sample(int(1e6))
# gped = samp.groupby('identity_group')
gped = exploded.groupby('identity_group')

# Aggregate actions and attribute for different identities
# agg = gped.agg({'actions_attributes': lambda x: {'actions': Counter([action for el in x for action in el['actions']]).most_common(),
#                                                 'attributes': Counter([attr for el in x for attr in el['attributes']]).most_common()},
#                                                'content': 'count'
#                                               })
agg = gped.agg(verbs_subj = pd.NamedAgg(column='actions_attributes', aggfunc=lambda x: Counter([term for el in x for term in el['verbs_subj']]).most_common()),
                verbs_obj = pd.NamedAgg(column='actions_attributes', aggfunc=lambda x: Counter([term for el in x for term in el['verbs_obj']]).most_common()),
                adjs = pd.NamedAgg(column='actions_attributes', aggfunc=lambda x: Counter([term for el in x for term in el['adjs']]).most_common()),
                count = pd.NamedAgg(column='content', aggfunc='count'))
agg.sort_values('count', ascending=False, inplace=True)
agg.info()
# agg[['verbs_subj', 'verbs_obj', 'adjs']].head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6248230 entries, 0 to 6248229
Data columns (total 11 columns):
 #   Column                            Dtype         
---  ------                            -----         
 0   type                              object        
 1   forum                             object        
 2   thread                            object        
 3   username                          object        
 4   date                              object        
 5   content                           object        
 6   parsed_date                       datetime64[ns]
 7   content_orig                      object        
 8   netmapper_identity_matches        object        
 9   netmapper_identity_matches_spans  object        
 10  actions_attributes                object        
dtypes: datetime64[ns](1), object(10)
memory usage: 524.4+ MB
423
<class 'pandas.core.frame.DataFrame'>
Int64Index: 13416057 entries, 0 to 6248229
Data columns (total 12 columns):
 #   

In [None]:
import plotly.express as px

for gp in agg.index[:12]:
    for col in ['verbs_subj', 'verbs_obj', 'adjs']:
        terms, counts = zip(*agg.loc[gp, col][:50])
        fig = px.bar(x=terms, y=counts, title=f'{gp} {col}', labels=dict(x="term", y="count"))
        fig.update_xaxes(tickangle=45)
        # fig.show(config = {'staticPlot': True})
        fig.show()

## Look into examples use of actions, attributes

In [52]:
# Look at verbs_obj
gp = 'lgbtq_people'
term = 'like'
field = 'verbs_subj'

pd.set_option('display.max_colwidth', None)
crit = (exploded.identity_group == gp) & (exploded.actions_attributes.map(lambda x: term in x[field] if isinstance(x, dict) else False))
exploded.loc[crit, ['identity_group', 'content', 'actions_attributes']].sample(5)

Unnamed: 0,identity_group,content,actions_attributes
6234080,lgbtq_people,manicel said : yeah that faggot does n't like me for some reason oh oh oh order 937 incoming,"{'verbs_subj': ['like'], 'verbs_obj': [], 'adjs': []}"
1001574,lgbtq_people,"1 . what are you doing on a sub about big dick op ? 2 . also i think most of those comments are from men , i.e. faggots . faggots like to get their ass fucked by big dick .","{'verbs_subj': ['like'], 'verbs_obj': [], 'adjs': []}"
3135343,lgbtq_people,"still noone can mog germany . for every german femoid we lose to a sand person , three new gooks are obtained . reborn said : "" new "" is not the right word . here in the south there was already a massive amount of strangers before the syrians came . in some places of town when i go to a store everybody except for me speaks turkish . it is really strange . sometimes you feel like you are the stranger because everyone around you is speaking another language . germans are dying out at an insane rate with only 1.2 children per women . i know there are places where noone speaks proper german anymore but you said it yourself : you feel enstranged from these people . there is still some german solidarity left even though most multicultural fags do n't like to admit it . on the bright side : being german ( or white , even ) is going to be a huge status symbol in the future when most people are "" guests "" .","{'verbs_subj': ['like'], 'verbs_obj': [], 'adjs': ['most', 'multicultural']}"
1603391,lgbtq_people,i hate it when normal fags like the same things as me .,"{'verbs_subj': ['like'], 'verbs_obj': [], 'adjs': ['normal']}"
4331494,lgbtq_people,"gymletethnicel said : this forum is the biggest joke on this earth , an even greater one that reddit banning all incel subreddits . this forum is indeed what inceltears makes it out to be . the forum is full of 30 + pedophiles who talk about ' prime jbs ' , they call themselves ' trans - agers ' . just fucking lol . just be a 36 year old preying on 12 year old girls theory . the fucking survey created by the mods clearly showed that 1/3rd of the forum has kissed a girl at least once , it showed that a decent amount were not virgins ( excluding escortcels ) . most of you are nothing but larpers who are pretending to be incel . you 'll have people here saying that they get tons of fucking matches in asia , but they will still claim they are incel . there was some faggot here by the name of itsover who ascended in thailand and then got a russian gf because he was n't a fakecel mentacel anymore . there have been people here who have made a mockery of the recent mass shooting in germany and were either mad because the shooter did not succeed or said the whole thing is the hoax . this is something that the alt - right and /pol / fags like to do so often . ' oh someone committed mass murder in a mosque and killed children ! must be a hoax ' . ' oh assad gassed an entire village but rt says al qaeda did it so i guess it must be a hoax ' . ' oh there are refugees from iraq and syria who want to go to europe , must be a white genocide going on ' . many so - called incels here just see inceldom as something cool that is controversial , they just like to be part of the ' evil misogynist movement of racist white men ' . news flash fakecels , inceldom can occur to anyone and that person does n't need to be a big fan of hitler and promote the white genocide meme . i saw some slav here say that hamudi 's inceldom is n't valid because he is a syrian refugee and has no right to complain . mods here are the same and that is why they allow fascist and blatant nazis . reminds me of the gay ass group called the attomwaffen division . most so - called incels who are part of the ( ( ( incel ) ) ) community are larpers who just hate holes and are deep into their extremist ideologies of different flavours , whether it be fascism , nazism , radical islam and/or what not . view attachment 158733 most hilarious part of this forum are the people with their reversed racepill . statistics clearly show that white people are the most sought after race and can easily get into relationships . for you bluepilled retards , go to the blackpill tab on this forum and you will see that indians + pakis , middle - easterners and asians are the least attractive , in that order . scientific blackpill - incel wiki incels.wiki yet some of you fucking dumbass retarded larpers tell me ' nah the statistics are bullshit ' . just lol at some of the delusional fucktards that believe white women are lusting over muslims and/or arabs . i 've been in countless situations where old women literally try to get away from me because i look like a stereotypical jihadi while i am an ex - muslim . got the terrorist and isis label thrown at me by young women and of course by the same types . you 're telling me that 95 % of fucking holes excluding ethnics is in the ethnic 's favor ? the only reason why some are so deluded is because they can not get a 10/10 blonde blue eyed stacy , or perhaps they are genuinely fucking retarded . tldr : most of you are /pol / tard ethnic - hating extremists who are larping as incels , while denying the most blatant of blackpills , the racepill . i am done with this forum , it is not truly blackpilled and many of you are genuinely racist . many of you are pretending to be incels , while you are not . many of you blame inceldom on blatant bullshit such as wrist size and frame . i 'm just so done with this nonsensical forum . i 'll just ldar with my video games while being a blackpilled soggy knee at heart , unlike most of you . anyway i 'm probably getting a 50 % warning or perma ban for this post alone so screw you guys , i 'm going to play some mw2 and get a couple nukes now . 200iq based post . hate to see you go but mods will likely rape you for this .","{'verbs_subj': ['like'], 'verbs_obj': [], 'adjs': []}"


In [29]:
# Look at actions

pd.set_option('display.max_colwidth', None)
action_term = 'want'
crit = (exploded.identity_group == gp) & (exploded.actions_attributes.map(lambda x: action_term in x['actions'] if isinstance(x, dict) else False))
exploded.loc[crit, ['identity_group', 'content', 'actions_attributes']].sample(5)

Unnamed: 0,identity_group,content,actions_attributes
1780989,incels,"the way i see it , in regards to this characteristic , there 's two kinds of incel : one who 's given up on life but for whatever reason does n't want to or wo n't rope , and has thus condemned themselves to a life of ldaring aimlessly until they die ; and incels that know it 's over but still have goals / want to have a drive to live beyond foids . i ca n't understand being the first . i do n't want to let them win , and i want to maximize my own life where i can - perhaps achieve my own happiness without them . i understand the urge to rot , but i do n't understand not having personal motivations to have certain goals in life i 've been doing lots of thinking and i 'm going to throw my all into it one last time , i 'm going to try to improve my situation despite my history of my effort not mattering . i 'm going to focus on my goals and work towards a life of happiness without people .","{'actions': ['want'], 'attributes': []}"
411439,incels,"crypticel77 said : i never understood why some incels want to quit porn . no girl is gon na fuck us so what other option do we have ? if you quit you re still going to need sexual release at some point . if you stop for good , you 'll get wet dreams after a while . it 's messy to deal with but they 're fun and sometimes very realistic . porn is degenerated and sinful and to make things worse i 'm a giga sexual pervert who can only really coom to foids being degraded and abused . softcore does make me hard and eager and all but it 's tough to reach climax to it . the other day i tried fapping to some tasteful softcore stuff after a 6 days nofap and could n't coom . i literally fapped for almost 3 hours . i do n't want to look at it anymore . it reminds me of how sick foids are and how they love being abused and treated like shit . foids are disturbing . i know being sadistic is perverted but being masochistic is way more disturbing tbh .","{'actions': ['want'], 'attributes': []}"
2817645,incels,i do n't think most incels even want sex . we want acceptance and love .,"{'actions': ['want'], 'attributes': ['most']}"
1158904,incels,"those prices are way too low , but you 're also not factoring in your time . to entertain a bitch in between fucking and it 's not even fucking close . course let 's not act like even if things were comparable it would change anything ... incels are paying for a reason . also keep in mind taking a toilet out on a date does not mean sex happens . not the case with hookers . hooker skips the middle man , saves time and is guaranteed . it 's not just the sex incels want , but you also take what you can get .","{'actions': ['want'], 'attributes': []}"
411446,incels,crypticel77 said : i never understood why some incels want to quit porn . no girl is gon na fuck us,"{'actions': ['want'], 'attributes': []}"
