In [1]:
import numpy as np
import pandas as pd
import math
import logging
import pickle
import gensim
from gensim import corpora
from tqdm import tqdm
from wordcloud import WordCloud
import matplotlib
import matplotlib.pylab as plt
import pyLDAvis
import pyLDAvis.gensim
pyLDAvis.enable_notebook()

In [2]:
df = pd.read_csv('../data/AmNat_allAbstracts.csv')
df = df.dropna(subset=['Abstract','Title'])
df = df.reset_index()

In [3]:
with open('corpus.pkl', 'rb') as f:
    corpus = pickle.load(f)

lda_model = gensim.models.ldamodel.LdaModel.load('lda_t17.model')

In [9]:
# assign topic vector for each document
num_topics = lda_model.num_topics
topics = [lda_model[c] for c in corpus]

dense = np.zeros( (len(topics), num_topics), float)
for ti,t in enumerate(topics):
    for tj,v in t:
        dense[ti,tj] = v

In [23]:
transpose = dense.transpose()
top10 = [np.argsort(column)[::-1][0:9] for column in transpose] # top10 documents for each topic


In [79]:
docs_by_topics = pd.DataFrame({
    'Topic 0':df['Title'][top10[0]].values.tolist(),
    'Topic 2':df['Title'][top10[2]].values.tolist(),
    'Topic 3':df['Title'][top10[3]].values.tolist(),
    'Topic 6':df['Title'][top10[6]].values.tolist(),
    'Topic 11':df['Title'][top10[11]].values.tolist(),
    'Topic 16':df['Title'][top10[16]].values.tolist()
})


In [82]:
docs_by_topics.T.to_csv('docs_by_topics.csv')

In [66]:
from scipy.spatial import distance
pairwise = distance.squareform(distance.pdist(dense))

largest = pairwise.max()
for ti in range(len(topics)):
    pairwise[ti,ti] = largest + 1
    
def closest_to(doc_id):
    return pairwise[doc_id].argmin()



In [67]:
targets= df.index[range(10)]
match = [closest_to(t) for t in targets]
comparison = pd.DataFrame({
    'Target_Index':df['index'][targets],
    'Traget_Title':df['Title'][targets],
    'Target_Abstract':df['Abstract'][targets],
    'Matching_Index':[df['index'][d] for d in match],
    'Matching_Title':[df['Title'][d] for d in match],
    'Matching_Abstract':[df['Abstract'][d] for d in match]
})
# comparison.to_csv('comparison.csv')

In [68]:
comparison

Unnamed: 0,Target_Index,Traget_Title,Target_Abstract,Matching_Index,Matching_Title,Matching_Abstract
0,0,Experimental Hybridization Studies Suggest Tha...,The alleles used for adaptation can pleiotropi...,1810,"Selection, Epistasis, and Parent-of-Origin Eff...",Understanding the nature of selection against ...
1,1,The Ecology of Individual Differences Empirica...,Movement provides a link between individual be...,4361,THE TIME-SCALE PROBLEM IN EXPLOITER-VICTIM MOD...,Standard exploiter-victim models assume an ins...
2,3,Floral Trait Evolution of Angiosperms on Pacif...,Animals frequently evolve unique suites of tra...,8008,AN ANALYTICAL STUDY OF THE GEOGRAPHIC DISTRIBU...,"Where moisture is a limiting factor, xerophyti..."
3,4,Looking for Mimicry in a Snake Assemblage Usin...,Batesian mimicry is a canonical example of evo...,2234,Predator-dependent species-area relationships,In addition to having a positive effect on spe...
4,5,Correlated Evolution of Sex Allocation and Mat...,In accordance with predictions of the size-adv...,5736,BALANCED SEX-RATIOS IN DIMORPHIC ALTRICIAL BIR...,Sex ratio theory holds that the population sex...
5,6,Climate Change and Thermoregulatory Consequenc...,Activity times structure the thermal environme...,6190,GEOGRAPHIC ANALYSIS OF THERMAL EQUILIBRIA - A ...,Adult body size and fecundity of several speci...
6,7,Optimal Network Architectures for Spatially St...,The motivation of this article is to derive ne...,1256,Beyond the Competition-Colonization Trade-Off:...,Disturbances' role in shaping communities is w...
7,8,Dispersal Predicts Hybrid Zone Widths across A...,Hybrid zones occur as range boundaries for man...,5690,THE INTRODUCED HAWAIIAN AVIFAUNA - BIOGEOGRAPH...,We discuss the patterns of introduction and ex...
8,9,"Where Is Natural History in Ecological, Evolut...",Natural history is the careful observation of ...,2149,Predation-competition interactions for seasona...,We investigate the interacting effects of pred...
9,10,Species Differences in Phenology Shape Coexist...,Ecological theory produces opposing prediction...,1360,Successional Dynamics in the Seasonally Forced...,Plankton seasonal succession is a classic exam...
