
# Analysis Comparison between Original and Expanded Lexicons on Cohort Analysis Task

Subreddit: Social Anxiety

In [15]:
from pathlib import Path
import pandas as pd

from config import global_config
from lexicons2 import Values, ValuesExpanded, Liwc2015, Liwc2015Expanded
from spaces import WordEmbeddings, LabelEmbeddings

In [16]:
COHORT_NAME = 'socialanxiety'

In [17]:
cohort_corpus_path = global_config.reddit.submissions[COHORT_NAME]
cohort_models_path = f'{global_config.paths.models}/{COHORT_NAME}'

control_corpus_path = global_config.reddit.submissions['IAMA']
control_models_path = f'{global_config.paths.models}/IAMA'

In [18]:
cohort_ranks_csv = f'{global_config.paths.ranks}/{COHORT_NAME}_ranks.csv'
expanded_cohort_ranks_csv = f'{global_config.paths.ranks}/{COHORT_NAME}_ranks_expanded.csv'

control_ranks_csv = f'{global_config.paths.ranks}/IAMA_ranks.csv'
expanded_control_ranks_csv = f'{global_config.paths.ranks}/IAMA_ranks_expanded.csv'

relative_ranks_csv = f'{global_config.paths.ranks}/{COHORT_NAME}_relative-ranks.csv'
expanded_relative_ranks_csv = f'{global_config.paths.ranks}/{COHORT_NAME}_relative-ranks_expanded.csv'

Build Control Spaces

In [19]:
control_word_space = WordEmbeddings(corpus_path=control_corpus_path, model_path=control_models_path).build()

In [20]:
org_control_label_space = LabelEmbeddings(lexicons=[Liwc2015(), Values()], word_embeddings=control_word_space).build()

In [21]:
expanded_control_label_space = LabelEmbeddings(lexicons=[Liwc2015Expanded(), ValuesExpanded()], word_embeddings=control_word_space).build()

Build Cohort Spaces

In [22]:
cohort_word_space = WordEmbeddings(corpus_path=cohort_corpus_path, model_path=cohort_models_path).build()

In [23]:
org_cohort_label_space = LabelEmbeddings(lexicons=[Liwc2015(), Values()], word_embeddings=cohort_word_space).build()

In [24]:
expanded_cohort_label_space = LabelEmbeddings(lexicons=[Liwc2015Expanded(), ValuesExpanded()], word_embeddings=cohort_word_space).build()

Save and view results

In [25]:
def save_if_not_save_and_return(space: LabelEmbeddings, path: str):
	if not Path(path).exists():
		space.save_distances_to_csv(path)
	return pd.read_csv(path, names=['label_one', 'label_two', 'distance'])

control_ranks = save_if_not_save_and_return(org_control_label_space, control_ranks_csv)
expanded_control_ranks = save_if_not_save_and_return(expanded_control_label_space, expanded_control_ranks_csv)

cohort_ranks = save_if_not_save_and_return(org_cohort_label_space, cohort_ranks_csv)
expanded_cohort_ranks = save_if_not_save_and_return(expanded_cohort_label_space, expanded_cohort_ranks_csv)

In [26]:
org_cohort_label_space.compute_rank_deltas(org_control_label_space, relative_ranks_csv)
relative_ranks = pd.read_csv(relative_ranks_csv, names=['label_one', 'label_two', 'current_rank', 'control_rank', 'current_distance', 'control_distance', 'rank_delta', 'distance_delta'])

expanded_cohort_label_space.compute_rank_deltas(expanded_control_label_space, expanded_relative_ranks_csv)
expanded_relative_ranks = pd.read_csv(expanded_relative_ranks_csv, names=['label_one', 'label_two', 'current_rank', 'control_rank', 'current_distance', 'control_distance', 'rank_delta', 'distance_delta'])

View Stuff

In [27]:
relative_ranks

Unnamed: 0,label_one,label_two,current_rank,control_rank,current_distance,control_distance,rank_delta,distance_delta
0,liwc2015:function,values:feeling-good,76,43,0.595167,0.499940,33,0.095227
1,liwc2015:function,values:truth,78,55,0.673343,0.713501,23,-0.040158
2,liwc2015:function,values:children,72,50,0.303095,0.680796,22,-0.377701
3,liwc2015:function,values:animals,81,64,0.957955,0.794112,17,0.163843
4,liwc2015:function,liwc2015:posemo,47,32,0.026139,0.259349,15,-0.233209
...,...,...,...,...,...,...,...,...
6967,values:animals,liwc2015:home,3,68,0.904813,0.975827,-65,-0.071013
6968,values:animals,values:truth,2,71,0.877259,0.982374,-69,-0.105116
6969,values:animals,liwc2015:male,12,82,0.934944,1.117406,-70,-0.182462
6970,values:animals,liwc2015:body,5,77,0.921115,1.040876,-72,-0.119761


Unnamed: 0,label_one,label_two,current_rank,control_rank,current_distance,control_distance,rank_delta,distance_delta
0,liwc2015:function,values:feeling-good,76,43,0.595167,0.499940,33,0.095227
1,liwc2015:function,values:truth,78,55,0.673343,0.713501,23,-0.040158
2,liwc2015:function,values:children,72,50,0.303095,0.680796,22,-0.377701
3,liwc2015:function,values:animals,81,64,0.957955,0.794112,17,0.163843
4,liwc2015:function,liwc2015:posemo,47,32,0.026139,0.259349,15,-0.233209
...,...,...,...,...,...,...,...,...
6967,values:animals,liwc2015:home,3,68,0.904813,0.975827,-65,-0.071013
6968,values:animals,values:truth,2,71,0.877259,0.982374,-69,-0.105116
6969,values:animals,liwc2015:male,12,82,0.934944,1.117406,-70,-0.182462
6970,values:animals,liwc2015:body,5,77,0.921115,1.040876,-72,-0.119761


In [28]:
expanded_relative_ranks

Unnamed: 0,label_one,label_two,current_rank,control_rank,current_distance,control_distance,rank_delta,distance_delta
0,liwc2015:function,liwc2015:posemo,53,34,0.026139,0.259349,19,-0.233209
1,liwc2015:function,liwc2015:nonflu,79,65,0.327410,0.754531,14,-0.427121
2,liwc2015:function,liwc2015:money,72,58,0.119109,0.681783,14,-0.562674
3,liwc2015:function,liwc2015:pconcern,67,54,0.099536,0.624154,13,-0.524618
4,liwc2015:function,liwc2015:sad,70,57,0.110813,0.666393,13,-0.555579
...,...,...,...,...,...,...,...,...
7135,liwc2015:death,liwc2015:discrep,11,61,0.757361,0.980766,-50,-0.223406
7136,liwc2015:death,liwc2015:pconcern,3,57,0.744971,0.974575,-54,-0.229603
7137,liwc2015:death,liwc2015:informal,1,65,0.740628,1.004945,-64,-0.264316
7138,liwc2015:death,liwc2015:nonflu,6,77,0.746120,1.086080,-71,-0.339960


Unnamed: 0,label_one,label_two,current_rank,control_rank,current_distance,control_distance,rank_delta,distance_delta
0,liwc2015:function,liwc2015:posemo,53,34,0.026139,0.259349,19,-0.233209
1,liwc2015:function,liwc2015:nonflu,79,65,0.327410,0.754531,14,-0.427121
2,liwc2015:function,liwc2015:money,72,58,0.119109,0.681783,14,-0.562674
3,liwc2015:function,liwc2015:pconcern,67,54,0.099536,0.624154,13,-0.524618
4,liwc2015:function,liwc2015:sad,70,57,0.110813,0.666393,13,-0.555579
...,...,...,...,...,...,...,...,...
7135,liwc2015:death,liwc2015:discrep,11,61,0.757361,0.980766,-50,-0.223406
7136,liwc2015:death,liwc2015:pconcern,3,57,0.744971,0.974575,-54,-0.229603
7137,liwc2015:death,liwc2015:informal,1,65,0.740628,1.004945,-64,-0.264316
7138,liwc2015:death,liwc2015:nonflu,6,77,0.746120,1.086080,-71,-0.339960
