In [31]:
import gzip
import pickle
from MulticoreTSNE import MulticoreTSNE as TSNE
import numpy as np
import plotly.plotly as py
import plotly.graph_objs as go
from sklearn.metrics.pairwise import cosine_similarity


def similar_vectors(board,vectors_key,k=5,skip=[]):
    global vectors
    similarity = {}
    
    if len(skip) == 0:
        skip = [board]
  
    for subreddit in vectors[vectors_key]:
        if subreddit not in skip:
            similarity[subreddit] = cosine_similarity( \
                vectors[vectors_key][board].reshape(1,-1),vectors[vectors_key][subreddit].reshape(1,-1))
    
    return sorted(similarity,key = lambda x:similarity[x],reverse=True)[0:k]

def plot(points,labels):
    trace = go.Scatter(
        x = points[:,0],
        y = points[:,1],
        mode = 'markers+text',
        marker = {
            'size': 5,
            'colorscale': 'Viridis',
            'opacity': 1
        },
        text = labels,
        textposition = "top center"
    )

    data = [trace]

    # Plot and embed in ipython notebook!
    display(py.iplot(data, filename='basic-scatter'))

def transform_and_plot(vector_key,boards):
    global vectors
    tsne = TSNE(n_jobs=72)
    points = tsne.fit_transform(np.array([vectors['politics'][board] for board in boards]))
    plot(points,boards)

In [2]:
vectors = {}
vectors['small'] = pickle.load(gzip.open('../Results/0to500.pickle.gz',"rb"))
vectors['mediam'] = pickle.load(gzip.open('../Results/0to2500.pickle.gz',"rb"))
vectors['medium_alt'] = pickle.load(gzip.open('../Results/2500to5000.pickle.gz',"rb"))
vectors['politics'] = pickle.load(gzip.open('../Results/politics.pickle.gz',"rb"))
vectors['politics_alt'] = pickle.load(gzip.open('../Results/politics_alt.pickle.gz',"rb"))
vectors['sanders_The_Donald'] = pickle.load(gzip.open('../Results/sanders_The_Donald.pickle.gz',"rb"))
# vectors['large'] = pickle.load(gzip.open('../Results/0to25k_active_members.pickle.gz',"rb"))
# vectors['xlarge'] = pickle.load(gzip.open('../Results/0to50k_active_members.pickle.gz',"rb"))

In [10]:
political_share = pickle.load(gzip.open('../Results/political_share.pickle.gz',"rb"))
political_boards = sorted(political_share,key=lambda x:political_share[x]['users'],reverse=True)[0:50]

In [25]:
political_boards = ['politics','The_Donald','hillaryclinton','SandersForPresident','Conservative','progressive', \
        'NeutralPolitics','neutralnews','worldnews','news','Le_Pen','The_Farage', \
        'democrats','Republican','altright','RightwingLGBT','AskThe_Donald',\
         'WhiteRights','moderatepolitics','HillaryForAmerica','BlueMidterm2018','esist',\
         'GrassrootsSelect','BannedFromThe_Donald','BernTheConvention','ShitPoliticsSays',\
         'ShitThe_DonaldSays','AskTrumpSupporters','OurPresident','AskBernieSupporters','StillSandersForPres',\
          'The_Europe','askhillarysupporters','DNCleaks','TrumpForPrison','HillaryForPrison','Mr_Trump',\
          'EnoughHillHate','justicedemocrats','ShitRConservativeSays','enoughsandersspam','Enough_Sanders_Spam']

In [32]:
transform_and_plot('politics',political_boards)

In [33]:
transform_and_plot('politics_alt',political_boards)

In [36]:
transform_and_plot('sanders_The_Donald',political_boards)