In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

from IPython.core.display import display, HTML
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import random
from string import Template
import json
import sys 
sys.path.append('..')
# coEgoNetworks
import src.coegonets as cnets
import src.notebook as nb

In [None]:
# load javascript for sigma JS 
HTML('''<script src="./js/sigma.min.js"></script>
<script src="./js/sigma.layout.forceAtlas2.min.js"></script>
<script src="./js/sigma.renderers.snapshot.min.js"></script>
<script src="./js/sigma.exporters.svg.min.js"></script>
<script src="./js/sigma.renderers.parallelEdges.min.js"></script>
<script src="./js/sigma.plugins.dragNodes.min.js"></script>
<script src="./js/sigma.plugins.filter.min.js"></script>
<script src="./js/sigma.renderers.edgeLabels.min"></script>''')

In [None]:
STYLE='bmh'
plt.style.use(STYLE)
font = {'family' : 'sans-serif',
        'size'   : 12}
mpl.rc('font', **font)
mpl.rc('legend',fontsize=12)
mpl.rc('xtick',labelsize=12)
mpl.rc('ytick',labelsize=12)
mpl.rc('axes',facecolor="white",labelsize=12)
# mpl.rc('text', usetex=True)
PLOT_WIDTH  = 14
PLOT_HEIGHT = 7

In [None]:
def run_all(data, target_word, filter_all, top_k, year="all"):
    T = cnets.read_data(data, filter_all=filter_all)
    cooc_normalizer = len(T)
    #build matrix
    k, v = cnets.build_COOM(T)
    C = cnets.getDF(k, v)
    #top K occurrences
    oc = nb.top_k(C, top_k)
    f,ax = plt.subplots(1,1,figsize=(PLOT_WIDTH,PLOT_HEIGHT),sharey=True)
    oc.plot(ax=ax,kind="bar", rot=45)
    f.suptitle("Top {} Hashtag Frequency ({})".format(top_k, year))
    #top K co-occurrences
    cooc = nb.top_k_cooc(C,target_word,K, normalizer=cooc_normalizer)
    g,ax2 = plt.subplots(1,1,figsize=(PLOT_WIDTH,PLOT_HEIGHT),sharey=True)
    cooc.plot(ax=ax2,kind="bar", rot=45)
    g.suptitle("{}: top {} coocurrences".format(target_word, K, year))
    plt.show()
    #plot top co-occurrences
    top_words = list(cooc.keys())
    graph = nb.graph(C, top_words, target_word, top_k_edges=None, style=STYLE)
    g = nb.sigmaJSGraph(graph) 
    return HTML(g) 

In [None]:
DATA_PATH="../DATA/txt/wellness_all"
target_word="#wellness"
run_all(DATA_PATH, [target_word], 10)

## Manual Analysis

In [None]:
#Read data and filter by target word
DATA_PATH="../DATA/txt/wellness_all"
target_word="#wellness"
secondary_target="#fitness"

T = cnets.read_data(DATA_PATH, filter_all=[target_word,secondary_target])
cooc_normalizer = len(T)

In [None]:
#build matrix
k, v = cnets.build_COOM(T)
C = cnets.getDF(k, v)


In [None]:
#top K co-occurrences
cooc = nb.top_k_cooc(C,target_word,K, normalizer=cooc_normalizer)
f,ax = plt.subplots(1,1,figsize=(PLOT_WIDTH,PLOT_HEIGHT),sharey=True)
cooc.plot(ax=ax,kind="bar", rot=45)
f.suptitle("{}: top {} coocurrences".format(target_word, K))

In [None]:
#plot top co-occurrences
n=100
top_words = v[1:n]
graph = nb.graph(C, top_words, target_word, top_k_edges=None, style=STYLE)
g = nb.sigmaJSGraph(graph) 
HTML(g) 