In [93]:
import os
import json
from collections import defaultdict, Counter
import csv
from helpers import get_sr_cats
from nltk.corpus import reuters, brown
import pandas as pd
import networkx as nx

In [2]:
ROOT = '/mnt/data0/lucy/manosphere/'
DATA = ROOT + 'data/'
LOGS = ROOT + 'logs/'

In [3]:
with open(LOGS + 'overall_word_births.json', 'r') as infile: 
    word_births = json.load(infile)

In [4]:
categories = get_sr_cats()
forums = ['FORUM_the_attraction', 'FORUM_pua_forum', 'FORUM_incels', 
          'FORUM_red_pill_talk', 'FORUM_avfm', 'FORUM_rooshv', 'FORUM_red_pill_talk']
mano_communities = set(categories.keys()) | set(forums)
print(mano_communities)

{'philosophyofrape', 'pickup', 'blackpillscience', 'suicidewatch', 'seduction', 'mensrightslinks', 'truefemcels', 'thanktrp', 'mensrights', 'FORUM_pua_forum', 'redpillparenting', 'mgtowbooks', 'femaledatingstrategy', 'FORUM_rooshv', 'incelspurgatory', 'askfds', 'thebluepill', '1ncels', 'mrref', 'malecels', 'foreveralonelondon', 'braincels', 'geotrp', 'supportcel', 'becomeaman', 'incelselfies', 'theglowup', 'pinkpillfeminism', 'pornfreerelationships', 'foreveraloneteens', 'lonelynonviolentmen', 'mgtowmusic', 'fpua', 'alttrp', 'rsd', 'askanincel', 'intactivists', 'inceltears', 'asktrufemcels', 'truecels', 'trufemcels', 'asktrp', 'masculism', 'FORUM_incels', 'foreveralone', 'trpofftopic', 'mgtow', 'FORUM_the_attraction', 'puascience', 'gold_digger', 'ladymras', 'fdssuperfans', 'marriedredpill', 'redpillwomen', 'mensrightslaw', 'gymcels', 'depression', 'gaycel', 'redpillwives', 'foreverunwanted', 'FORUM_red_pill_talk', 'pussypass', 'mractivism', 'inceldense', 'foreveralonedating', 'FORUM_a

# Get lexical innovation vocabulary

First, we find a suitable cutoff for words that we can ensure they are actually new (and not just new because they are part of general language and some communities are earlier in the dataset than others. 

We remove bigrams whose second token also appears in our vocabulary, since those tend to be innovative phrases less often. This removes terms such as "most girls" yet retains many innovative phrase such as "carousel riders". 

We use the reuters corpus (created in 2000) to filter out non-innovative words. 

In [5]:
reuters_words = set([w.lower() for w in reuters.words()])
brown_words = set([w.lower() for w in brown.words()])
print(list(reuters_words)[:10])

['electricians', 'repairs', 'bonus', 'kassenobligation', 'unavoidable', 'black', 'ipla', 'contemplated', 'friedhelm', 'vlissingen']


In [6]:
common_sources = Counter()
yearly_innovations = defaultdict(list) # {year: [list of innovations]}
for w in word_births: 
    if w in reuters_words or w in brown_words: continue # not an innovation
    if w.endswith('s'): 
        if w[:-1] in reuters_words or w[:-1] in brown_words: continue # not an innovation
    if ' ' in w: # bigram
        toks = w.split(' ')
        if toks[-1] in word_births or toks[-1] in reuters_words or toks[-1] in brown_words: continue
    content = word_births[w]
    date = content[0]
    year = date.split('-')[0]
    sources = content[1]
    # uncomment below if you want to find words that only manosphere communities pioneeered
    #if set(sources) & mano_communities: 
    yearly_innovations[year].append(w)
    common_sources.update(sources)

In [7]:
common_sources.most_common()

[('FORUM_the_attraction', 577),
 ('reddit.com', 343),
 ('FORUM_red_pill_talk', 39),
 ('politics', 12),
 ('askreddit', 11),
 ('mensrights', 11),
 ('incels', 10),
 ('programming', 9),
 ('FORUM_pua_forum', 8),
 ('funny', 5),
 ('pics', 4),
 ('thebluepill', 4),
 ('science', 4),
 ('wtf', 4),
 ('drama', 4),
 ('shitredditsays', 4),
 ('the_donald', 3),
 ('atheism', 3),
 ('seduction', 3),
 ('entertainment', 3),
 ('theredpill', 3),
 ('business', 3),
 ('news', 2),
 ('FORUM_rooshv', 2),
 ('4chan4trump', 2),
 ('mgtow', 2),
 ('purplepilldebate', 2),
 ('askmen', 2),
 ('videos', 2),
 ('worldnews', 2),
 ('antipozi', 1),
 ('kotakuinaction', 1),
 ('puahate', 1),
 ('waluigi', 1),
 ('rupaulsdragrace', 1),
 ('wikipedia', 1),
 ('oney', 1),
 ('features', 1),
 ('badhistory', 1),
 ('netsec', 1),
 ('gadgets', 1),
 ('gatekeeping', 1),
 ('anarchism', 1),
 ('askwomen', 1),
 ('cringeanarchy', 1),
 ('bestof', 1),
 ('justneckbeardthings', 1),
 ('vegan', 1),
 ('penis', 1),
 ('bacon', 1),
 ('equality', 1),
 ('egalitarian

In [8]:
for year in range(2005, 2010): 
    print(year, yearly_innovations[str(year)])
    print()

2005 ['gf', 'assholes', 'eachother', 'chodes', 'roomate', 'amogs', 'hypnotist', 'goofball', 'geek', 'cunt', 'fuckers', 'cunts', 'hippies', 'masseuse', 'enablers', 'oddball', 'newb', 'toastmasters', 'joker', 'narcissist', 'chubby chasers', 'freelancers', 'shaman', 'wackos', 'boyz', 'infj', 'server', 'keyboard jockeys', 'bloggers', 'noobie', 'prude', 'rapist', 'exes', 'shorty', 'gamer', 'brat', 'perverts', 'womanizer', 'dudes', 'girlfriends', 'smartass', 'hb2', 'stalker', 'stalkers', 'sluts', 'douche', 'noob', 'hb10', 'lifeguard', 'boyfriends', 'nerd', 'hotties', 'dumbass', 'weirdos', 'jerkoff', 'mods', 'moron', 'niceguy', 'barmaid', 'morons', 'jedi', 'wanker', 'selector', 'hb4', 'girlfriend', 'dude', 'slut', 'surfers', 'druggies', 'caveman', 'pornstar', 'hb7', 'bouncers', 'baristas', 'airhead', 'hb5', 'hb6', 'puas', 'misfits', 'hacker', 'toddler', 'paedophile', 'filmmaker', 'commenters', 'pua', 'hb9', 'wingmen', 'dork', 'nerds', 'doofus', 'cheerleader', 'pervert', 'amog', 'babysitter', 

In [9]:
innov_vocab = set()
for year in range(2007, 2020): 
    print(year, yearly_innovations[str(year)])
    innov_vocab.update(yearly_innovations[str(year)])
    print()

2007 ['millennials', 'normies', 'hikikomori', 'aspies', 'sahm', 'autists', 'wimmenz', 'beckys', 'wymyn', 'fucktoy', 'randos', 'larpers', 'neurotypicals', 'stacie', 'landwhale', 'hambeasts', 'weeaboo', 'womyns', 'people irl', 'incel', 'fembot', 'camwhore', 'sex bots', 'cosplayers', 'cum dumpsters', 'mgtow', 'globalists', 'old hags', 'masculist', 'equalist', 'sex bot', 'dravidian', 'friendo', 'radfems', 'terf', 'foid', 'camgirl', 'femnazis', 'xw', 'infiltrator', 'wageslave', 'cuckolds', 'femnazi', 'intactivists', 'homebody', 'antifa', 'aspie', 'hambeast', 'female supremacist', 'batterers', 'cum dumpster', 'mra', 'transpeople', 'filipinas', 'stepdaughter', 'assaulter', 'transwoman', 'sloot', 'sexbot', 'sexbots', 'baby daddies', 'manlet', 'influencer', 'larper', 'feminsts', 'mras', 'prepper', 'honey badgers', 'tyrones', 'transwomen', 'cuckold', 'womenz', 'batterer', 'camgirls', 'masculinist', 'cumdumpster', 'femails', 'faer']

2008 ['women irl', 'weebs', 'suffragists', 'rape apologists', '

## Analyze source and destinations of lexical innovations

In [10]:
print(len(innov_vocab))
with open(LOGS + 'lexical_innovations.txt', 'w') as outfile: 
    for w in innov_vocab: 
        outfile.write(w + '\n')

254


In [53]:
common_sources = Counter()
mano_source_words = set()
outside_source_words = set()
for w in word_births: 
    if w not in innov_vocab: continue
    content = word_births[w]
    date = content[0]
    year = date.split('-')[0]
    if set(sources) & mano_communities: 
        mano_source_words.add(w)
    else: 
        outside_source_words.add(w)
    sources = content[1]
    common_sources.update(sources)

In [54]:
common_sources.most_common(30)

[('reddit.com', 65),
 ('FORUM_red_pill_talk', 39),
 ('FORUM_the_attraction', 16),
 ('politics', 12),
 ('askreddit', 11),
 ('mensrights', 11),
 ('incels', 10),
 ('FORUM_pua_forum', 7),
 ('funny', 5),
 ('pics', 4),
 ('thebluepill', 4),
 ('science', 4),
 ('wtf', 4),
 ('drama', 4),
 ('programming', 4),
 ('shitredditsays', 4),
 ('the_donald', 3),
 ('atheism', 3),
 ('seduction', 3),
 ('entertainment', 3),
 ('theredpill', 3),
 ('business', 3),
 ('news', 2),
 ('FORUM_rooshv', 2),
 ('4chan4trump', 2),
 ('mgtow', 2),
 ('purplepilldebate', 2),
 ('askmen', 2),
 ('videos', 2),
 ('worldnews', 2)]

In [15]:
top_n_subreddits = set()
with open(DATA + 'all_reddit_post_counts/top_subreddits.txt', 'r') as infile: 
    line_count = 0
    for line in infile: 
        top_n_subreddits.add(line.strip().split(' ')[0])
        line_count += 1
        if line_count == 500: break
print(len(top_n_subreddits))
for source in common_sources: 
    if source.startswith('FORUM_'): continue
    if source not in top_n_subreddits and source not in mano_communities: 
        print(source)

500
antipozi
kotakuinaction
puahate
waluigi
wikipedia
oney
4chan4trump
badhistory
netsec
gatekeeping
anarchism
purplepilldebate
justneckbeardthings
drama
bacon
equality
egalitarian
cooking
computebazaar
buildapcsales
shitredditsays
transgender
subredditdrama
diablo
tumblrinaction
cuckold
dirtykik
feminisms
soccerspirits
nofapjuly
pfjerk
4chan
celebrities
meetup
gay
sidehugs
gaybros
fitnesscirclejerk
pets
cortexcommand
theoryofreddit


In [14]:
print(mano_source_words)
print()
print(outside_source_words)

{'normie scum', 'hambeast', 'transwomen', 'blackcel', 'faer', 'oldcel', 'foid', 'escortcels', 'brigaders', 'influencer', 'gymcel', 'chadpreet', 'fuckboy', 'poorcel', 'volcel', 'redpiller', 'looksmatches', 'larpers', 'chadcel', 'norwood reaper', 'roasties', 'cuckqueers', 'sexbot', 'chaddam', 'feminsts', 'suffragists', 'red piller', 'chadlet', 'locationcel', 'ricecels', 'itcels', 'femails', 'oldcels', 'mentalcel', 'cuckservatives', 'tradcuck', 'sub8s', 'beta bux', 'ethnicel', 'camwhore', 'heightcel', 'fakecels', 'intactivist', 'manlet', 'chadlite', 'wymyn', 'friendo', 'rapefugees', 'stepdaughter', 'manlets', 'richcel', 'turbomanlet', 'bluepiller', 'mentalcels', 'aspies', 'landwhale', 'manginas', 'edgelord', 'trper', 'looksmatch', 'srsers', 'sluthaters', 'cuckcel', 'femcel', 'youngcels', 'wahmyn', 'gigachads', 'youngcel', 'khhv', 'numales', 'landwhales', 'shitlord', 'manchildren', 'wristcel', 'noodlewhore', 'wahmen', 'weebs', 'terfs', 'sex bot', 'intactivists', 'sedditors', 'women irl', '

In [55]:
plural = set()
singular = set()
for w in innov_vocab: 
    if w.endswith('s'): 
        plural.add(w)
    else: 
        singular.add(w)
print(plural)
print()
print(singular)

{'camgirls', 'fhos', 'tyrones', 'shitskins', 'mgtowers', 'moids', 'antifeminists', 'randos', 'foids', 'cucktears', 'escortcels', 'brigaders', 'redpillers', 'bpers', 'mgtows', 'femnazis', 'looksmatches', 'stacies', 'larpers', 'womyns', 'normies', 'beckys', 'rvfers', 'jailbaits', 'roasties', 'mras', 'fuckboys', 'currycels', 'femoids', 'cuckqueers', 'feminsts', 'suffragists', 'ethniks', 'soyboys', 'female protagonists', 'tradthots', 'wagecucks', 'filipinas', 'ethnicels', 'ricecels', 'chadlites', 'itcels', 'blue pillers', 'besties', 'femails', 'oldcels', 'dark triads', 'shitlords', 'sub8s', 'cuckservatives', 'faers', 'cucks', 'noodlewhores', 'gymcels', 'female supremacists', 'fakecels', 'autists', 'incels', 'rapefugees', 'normtards', 'manlets', 'mentalcels', 'punjabis', 'aspies', 'femcels', 'rape apologists', 'manginas', 'neurotypicals', 'bronies', 'honey badgers', 'cosplayers', 'baby daddies', 'old hags', 'srsers', 'sluthaters', 'hambeasts', 'sex bots', 'youngcels', 'gigachads', 'trpers',

### Sustained periods in mainstream

Run `mainstream_sustained_periods()` in `gram_counting.py`.

In [83]:
with open(LOGS + 'sustained_mainstream.json', 'r') as infile: 
    mainstream_sp = json.load(infile)

In [84]:
print(len(mainstream_sp))

107


Run `manosphere_sustained_periods()` in `gram_counting.py`.

In [85]:
with open(LOGS + 'sustained_manosphere.json', 'r') as infile: 
    mano_sp = json.load(infile)

In [100]:
network_edges = [] # [(source, destination)]
manosphere_lead_words = []
manosphere_com = set()
mainstream_com = set()
for w in mainstream_sp: 
    min_month = 'z'
    for sr in mainstream_sp[w]: 
        mainstream_com.add(sr)
        start = mainstream_sp[w][sr][0]
        min_month = min(start, min_month)
    if w not in mano_sp: continue
    print(w, len(mainstream_sp[w]), list(mainstream_sp[w].keys()))
    print('MAINSTREAM:', min_month)
    main_min_month = min_month
    min_month = 'z'
    for sr in mano_sp[w]: 
        manosphere_com.add(sr)
        start = mano_sp[w][sr][0]
        min_month = min(start, min_month)
    print(w, len(mano_sp[w]), list(mano_sp[w].keys()))
    print("MANOSPHERE:", min_month)
    mano_min_month = min_month
    if mano_min_month < main_min_month: 
        print("****MANOSPHERE LEAD")
        manosphere_lead_words.append(w)
        leads = []
        for sr in mano_sp[w]: 
            start = mano_sp[w][sr][0]
            if start == mano_min_month: 
                leads.append(sr)
        dest = mainstream_sp[w]
        for l in leads: 
            for d in dest: 
                network_edges.append((l, d))
    print()

millennials 92 ['Bitcoin', 'PoliticalHumor', 'baseball', 'relationships', 'politics', 'dankmemes', 'im14andthisisdeep', 'technology', 'atheism', 'AskMen', 'pics', 'ukpolitics', 'AdviceAnimals', 'Entrepreneur', 'gaming', 'CFB', 'RoastMe', 'facepalm', 'Music', 'PewdiepieSubmissions', 'Philippines', 'canada', 'aww', 'personalfinance', 'exmormon', 'europe', 'nba', 'OldSchoolCool', 'mildlyinteresting', 'gifs', 'australia', 'OutOfTheLoop', 'tifu', 'funny', 'FortNiteBR', 'HistoryMemes', 'news', 'Christianity', 'videos', 'worldpolitics', 'AskWomen', 'The_Donald', 'nottheonion', 'books', 'modernwarfare', 'CryptoCurrency', 'science', 'DestinyTheGame', 'Libertarian', 'cars', 'Conservative', 'trashy', 'AskReddit', 'me_irl', 'ComedyCemetery', 'Showerthoughts', 'POLITIC', 'BlackPeopleTwitter', 'LifeProTips', 'Economics', 'movies', 'worldnews', 'SandersForPresident', 'mildlyinfuriating', 'cringepics', 'unitedkingdom', 'wallstreetbets', 'india', 'wholesomememes', 'television', 'rupaulsdragrace', 'nfl'

In [89]:
print(len(manosphere_lead_words))
print(manosphere_lead_words)

26
['cuck', 'incels', 'cucks', 'incel', 'mgtow', 'femoid', 'femoids', 'femcel', 'mra', 'mras', 'autists', 'normie scum', 'transwomen', 'menz', 'manlets', 'transwoman', 'cuckolds', 'landwhale', 'sexbot', 'neurotypicals', 'radfems', 'aspies', 'volcel', 'volcels', 'femcels', 'mgtows']


In [110]:
network_weights = Counter(network_edges)
G = nx.DiGraph()
edge_names = {'FORUM_red_pill_talk': 'Red Pill Talk', 'FORUM_mgtow': 'MGTOW'}
for edge in network_weights: 
    start = edge[0]
    if start.startswith('FORUM_'): 
        start = edge_names[start]
    G.add_edge(start, edge[1], weight=network_weights[edge])
for n in G.nodes: 
    if n in set(edge_names.values()): 
        G.nodes[n]['dataset'] = 'forum'
    elif n in manosphere_com: 
        G.nodes[n]['dataset'] = 'mano'
    else: 
        G.nodes[n]['dataset'] = 'main'
nx.write_gexf(G, LOGS + "word_spread.gexf")

In [111]:
leads = Counter()
dest = Counter()
for tup in network_weights: 
    leads[tup[0]] += 1
    dest[tup[1]] += 1
print(leads.most_common(10))
print(dest.most_common(10))

[('FORUM_red_pill_talk', 133), ('theredpill', 115), ('mensrights', 47), ('incels', 3), ('braincels', 2), ('FORUM_mgtow', 2), ('mgtow', 1)]
[('teenagersnew', 4), ('dankmemes', 4), ('The_Donald', 4), ('AskReddit', 4), ('ChapoTrapHouse', 4), ('unpopularopinion', 4), ('politics', 3), ('AskMen', 3), ('pics', 3), ('gaming', 3)]


# FDS & Femcels

First, we look at common words in each of these communities. 

FDS only occurs in 2019, while Femcels occur between 2018 and 2019. 

In [16]:
with open(LOGS + 'gram_counts/combined_catyear_word_count.json', 'r') as infile: 
    catyear_word_count = json.load(infile)

In [27]:
for catyear in catyear_word_count: 
    if catyear.startswith('FDS'): 
        print(catyear)
        this_counts = Counter(catyear_word_count[catyear])
        print(this_counts.most_common(100))
        print()
    elif catyear.startswith('Femcels'): 
        print(catyear)
        this_counts = Counter(catyear_word_count[catyear])
        print(this_counts.most_common(100))
        print()

Femcels_2018
[('women', 6664), ('she', 4157), ('guys', 2899), ('woman', 2786), ('girl', 2429), ('girls', 2353), ('men', 2070), ('incel', 1478), ('someone', 1426), ('femcels', 1370), ('female', 1145), ('guy', 1067), ('man', 955), ('friend', 932), ('person', 881), ('others', 796), ('family', 701), ('femcel', 659), ('kids', 625), ('friends', 568), ('dude', 552), ('stacies', 513), ('partner', 511), ('group', 485), ('parents', 456), ('mom', 428), ('child', 383), ('sis', 377), ('bitch', 350), ('chads', 344), ('ugly women', 333), ('you guys', 332), ('stacy', 328), ('most people', 326), ('normie', 306), ('dad', 303), ('black women', 297), ('males', 287), ('boys', 284), ('kid', 274), ('troll', 269), ('dudes', 258), ('most men', 250), ('mgtow', 236), ('boy', 231), ('someone else', 225), ('normies', 222), ('stacey', 219), ('baby', 218), ('becky', 211), ('most women', 197), ('attractive women', 195), ('partners', 193), ('adult', 174), ('father', 173), ('other people', 171), ('virgin', 162), ('bf',

In [31]:
femcel_fds_df = pd.read_csv(LOGS + 'semantics_mano/femcel_fds_exp.csv', index_col=0)
femcel_fds_df

Unnamed: 0,axis,word,community,score
0,absolute.a.01,women,Femcels,0.060758
1,absolute.a.01,men,Femcels,0.110514
2,absolute.a.01,female,Femcels,0.102758
3,absolute.a.01,male,Femcels,0.155545
4,absolute.a.01,women,Incels,0.068412
...,...,...,...,...
8187,worthy.a.01,male,FDS,0.151995
8188,worthy.a.01,women,TRP,0.141234
8189,worthy.a.01,men,TRP,0.130761
8190,worthy.a.01,female,TRP,0.145246


In [33]:
femcel_df = femcel_fds_df[femcel_fds_df['community'] == 'Femcels'].rename(columns={"score": "femcel_score"})
incel_df = femcel_fds_df[femcel_fds_df['community'] == 'Incels'].rename(columns={"score": "incel_score"})
fds_df = femcel_fds_df[femcel_fds_df['community'] == 'FDS'].rename(columns={"score": "fds_score"})
trp_df = femcel_fds_df[femcel_fds_df['community'] == 'TRP'].rename(columns={"score": "trp_score"})

In [39]:
incel_femcel_df = pd.merge(femcel_df, incel_df,  how='outer', on=['axis', 'word'])
incel_femcel_df['raw_diff'] = incel_femcel_df['femcel_score'] - incel_femcel_df['incel_score']
incel_femcel_df['abs_diff'] = abs(incel_femcel_df['raw_diff'])
incel_femcel_df

Unnamed: 0,axis,word,community_x,femcel_score,community_y,incel_score,raw_diff,abs_diff
0,absolute.a.01,women,Femcels,0.060758,Incels,0.068412,-0.007654,0.007654
1,absolute.a.01,men,Femcels,0.110514,Incels,0.103083,0.007431,0.007431
2,absolute.a.01,female,Femcels,0.102758,Incels,0.106544,-0.003786,0.003786
3,absolute.a.01,male,Femcels,0.155545,Incels,0.149159,0.006386,0.006386
4,abstemious.a.01,women,Femcels,0.010938,Incels,0.021679,-0.010741,0.010741
...,...,...,...,...,...,...,...,...
2043,worldly.a.01,male,Femcels,-0.083667,Incels,-0.079809,-0.003858,0.003858
2044,worthy.a.01,women,Femcels,0.149462,Incels,0.143995,0.005467,0.005467
2045,worthy.a.01,men,Femcels,0.134662,Incels,0.140710,-0.006048,0.006048
2046,worthy.a.01,female,Femcels,0.148622,Incels,0.140059,0.008563,0.008563


In [49]:
axis_df = incel_femcel_df[incel_femcel_df['axis'] == 'beautiful.a.01']
axis_df

Unnamed: 0,axis,word,community_x,femcel_score,community_y,incel_score,raw_diff,abs_diff
152,beautiful.a.01,women,Femcels,-0.031545,Incels,-0.035566,0.00402,0.00402
153,beautiful.a.01,men,Femcels,-0.013514,Incels,-0.005568,-0.007946,0.007946
154,beautiful.a.01,female,Femcels,0.021769,Incels,0.018666,0.003103,0.003103
155,beautiful.a.01,male,Femcels,0.034883,Incels,0.035578,-0.000695,0.000695


In [45]:
for w in ['women', 'men', 'female', 'male']: 
    word_df = incel_femcel_df[incel_femcel_df['word'] == w]
    idx = word_df['abs_diff'].idxmax()
    print(word_df.ix[idx])

axis            awake.a.01
word                 women
community_x        Femcels
femcel_score     0.0282544
community_y         Incels
incel_score     0.00682351
raw_diff         0.0214309
abs_diff         0.0214309
Name: 136, dtype: object
axis            exterior.a.01
word                      men
community_x           Femcels
femcel_score        0.0313608
community_y            Incels
incel_score         0.0539819
raw_diff           -0.0226211
abs_diff            0.0226211
Name: 693, dtype: object
axis            concerned.a.01
word                    female
community_x            Femcels
femcel_score         0.0901957
community_y             Incels
incel_score          0.0749193
raw_diff             0.0152764
abs_diff             0.0152764
Name: 362, dtype: object
axis            regular.a.01
word                    male
community_x          Femcels
femcel_score      0.00330827
community_y           Incels
incel_score        -0.015772
raw_diff           0.0190802
abs_diff          

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  after removing the cwd from sys.path.


In [51]:
fds_trp_df = pd.merge(fds_df, trp_df,  how='outer', on=['axis', 'word'])
fds_trp_df['raw_diff'] = fds_trp_df['fds_score'] - fds_trp_df['trp_score']
fds_trp_df['abs_diff'] = abs(fds_trp_df['raw_diff'])
fds_trp_df

Unnamed: 0,axis,word,community_x,fds_score,community_y,trp_score,raw_diff,abs_diff
0,absolute.a.01,women,FDS,0.061165,TRP,0.067281,-0.006116,0.006116
1,absolute.a.01,men,FDS,0.107137,TRP,0.102403,0.004734,0.004734
2,absolute.a.01,female,FDS,0.080855,TRP,0.104319,-0.023464,0.023464
3,absolute.a.01,male,FDS,0.145136,TRP,0.147706,-0.002570,0.002570
4,abstemious.a.01,women,FDS,0.012801,TRP,0.020298,-0.007496,0.007496
...,...,...,...,...,...,...,...,...
2043,worldly.a.01,male,FDS,-0.076996,TRP,-0.075622,-0.001374,0.001374
2044,worthy.a.01,women,FDS,0.149915,TRP,0.141234,0.008680,0.008680
2045,worthy.a.01,men,FDS,0.135024,TRP,0.130761,0.004263,0.004263
2046,worthy.a.01,female,FDS,0.141976,TRP,0.145246,-0.003270,0.003270


In [52]:
for w in ['women', 'men', 'female', 'male']: 
    word_df = fds_trp_df[incel_femcel_df['word'] == w]
    idx = word_df['abs_diff'].idxmax()
    print(word_df.ix[idx])

axis            rich.a.01
word                women
community_x           FDS
fds_score        0.020947
community_y           TRP
trp_score     -0.00140336
raw_diff        0.0223503
abs_diff        0.0223503
Name: 1592, dtype: object
axis           prejudiced.a.02
word                       men
community_x                FDS
fds_score            -0.255107
community_y                TRP
trp_score            -0.225362
raw_diff            -0.0297455
abs_diff             0.0297455
Name: 1429, dtype: object
axis           womanly.a.01
word                 female
community_x             FDS
fds_score         -0.469929
community_y             TRP
trp_score         -0.555332
raw_diff          0.0854025
abs_diff          0.0854025
Name: 2038, dtype: object
axis           high.a.04
word                male
community_x          FDS
fds_score     -0.0263715
community_y          TRP
trp_score      0.0463511
raw_diff      -0.0727226
abs_diff       0.0727226
Name: 903, dtype: object


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated
  after removing the cwd from sys.path.
