# CS511 - Red or Blue Classifier
### Classify a Twitter user as Republican or Democrat based on the users that they are following

### TA Dataset

In [1]:
# Read data
def read_data_file(file_name):
    data = []
    
    for line in open(file_name, 'r').read().splitlines():
        if line != '':
            data.append(line.rstrip())
            
    return data

In [2]:
# Load data
republican_data_file = 'data_files/RawTrain_#MAGA.txt'
democrat_data_file = 'data_files/RawTrain_#trumprussia.txt'

republican_data = read_data_file(republican_data_file)
democrat_data = read_data_file(democrat_data_file)

In [14]:
# Example
print(republican_data[0])

4215476655 954143712560517123 93069110 477803115 3002112321 18018355 353112820 551519702 4571209935 869024599941500929 120214980 703334588 15957417 9004352 2334193741 69396552 47334379 122998273 71339449 36454495 32871086 133869377 27020616 106768069 402372756 456795048 780436424 760567303629049856 2310330376 49878013 1681973533 2427419786 256789748 66170398 283752845 42053830 16117029 143063931 318162125 384630162 870653857327702016 210822651 3864154937 738378148715364354 16944022 375019316 280682278 555424329 50077786 24578794 803694179079458816 169514470 836293347945897985 299802277 757303975 869247852282826752 22771961 21619519 856401836 39344374 31464977 2908170952 1007893561 267287345 590262014 857249000 20733972 620571475 190401318 421314274 123308459 2742806678 463167098 108617810 18020081 245963716 19035510 20776147 823693635564802048 25203361 2467791 2173228424 3439849403 807095 2306885139 2735562711 63796828 813553259411554304 3065981667 497474968 1269656353 245013395 229865

In [3]:
from gensim.models.doc2vec import Doc2Vec, TaggedDocument

class TaggedDocumentList:
    def __init__(self, data, tag):
        self.data = data
        self.tag = tag
        
    def __iter__(self):
        for line in self.data:
            yield TaggedDocument(words=line.split(), tags=[self.tag])



In [18]:
import random

republican_corpus = list(TaggedDocumentList(republican_data, "republican"))
democrat_corpus = list(TaggedDocumentList(democrat_data, "democrat"))

random.shuffle(republican_corpus)
random.shuffle(democrat_corpus)

corpus_train = republican_corpus[len(republican_corpus)//10:] + democrat_corpus[len(democrat_corpus)//10:]
corpus_test = republican_corpus[:len(republican_corpus)//10] + democrat_corpus[:len(democrat_corpus)//10]
print(len(corpus_train))
print(len(corpus_test))

432
48


In [55]:
import logging

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
model = Doc2Vec(corpus_train, min_count=1, workers=8, size=200, iter=50)
model.save('data_files/red2blue_model')

2018-04-29 14:02:41,032 : INFO : collecting all words and their counts
2018-04-29 14:02:41,043 : INFO : PROGRESS: at example #0, processed 0 words (0/s), 0 word types, 0 tags
2018-04-29 14:02:41,411 : INFO : collected 122173 word types and 2 unique tags from a corpus of 432 examples and 240614 words
2018-04-29 14:02:41,415 : INFO : Loading a fresh vocabulary
2018-04-29 14:02:42,132 : INFO : min_count=1 retains 122173 unique words (100% of original 122173, drops 0)
2018-04-29 14:02:42,133 : INFO : min_count=1 leaves 240614 word corpus (100% of original 240614, drops 0)
2018-04-29 14:02:42,840 : INFO : deleting the raw counts dictionary of 122173 items
2018-04-29 14:02:42,861 : INFO : sample=0.001 downsamples 0 most-common words
2018-04-29 14:02:42,863 : INFO : downsampling leaves estimated 240614 word corpus (100.0% of prior 240614)
2018-04-29 14:02:42,865 : INFO : estimated required memory for 122173 words and 200 dimensions: 256565300 bytes
2018-04-29 14:02:43,425 : INFO : resetting l

In [24]:
count = 0
accurate = 0

for user in corpus_test:
    infer_vector = model.infer_vector(user[0])
    label = model.docvecs.most_similar([infer_vector], topn= 2)
    #print(user[1][0] + "," + str(label[0][0]) + "," + str(label[0][1]) + "," + str(label[1][0]) + "," + str(label[1][1]))
    
    if user[1][0] == label[0][0]:
        accurate += 1
    count += 1

print("Testing accuracy: " + str(accurate/count))

2018-04-29 13:13:18,730 : INFO : precomputing L2-norms of doc weight vectors


Testing accuracy: 0.75


In [25]:
accurate = 0
count = 0

for user in democrat_corpus:
    infer_vector = model.infer_vector(user[0])
    similar_doc = model.docvecs.most_similar([infer_vector], topn= 1)
    if similar_doc[0][0] == user[1][0]:
        accurate += 1
    count += 1

print("Democrat accuracy: " + str(accurate/count))

accurate = 0
count = 0

for user in republican_corpus:
    infer_vector = model.infer_vector(user[0])
    similar_doc = model.docvecs.most_similar([infer_vector], topn= 1)
    if similar_doc[0][0] == user[1][0]:
        accurate += 1
    count += 1

print("Republican accuracy: " + str(accurate/count))

Democrat accuracy: 0.9791666666666666
Republican accuracy: 0.9791666666666666


In [26]:
for user in corpus_test:
    infer_vector = model.infer_vector(user[0])
    label = model.docvecs.most_similar([infer_vector], topn= 2)
    print(user[1][0] + "," + str(label[0][0]) + "," + str(label[0][1]) + "," + str(label[1][0]) + "," + str(label[1][1]))

republican,republican,0.9109641909599304,democrat,0.6422760486602783
republican,democrat,0.9223817586898804,republican,0.7742854952812195
republican,republican,0.9362626075744629,democrat,0.7363926768302917
republican,republican,0.8163026571273804,democrat,0.7249524593353271
republican,republican,0.857017457485199,democrat,0.6562468409538269
republican,democrat,0.8631985187530518,republican,0.786471426486969
republican,democrat,0.9350166320800781,republican,0.7461199164390564
republican,republican,0.9636876583099365,democrat,0.66451096534729
republican,republican,0.8965323567390442,democrat,0.5489869117736816
republican,republican,0.8988766670227051,democrat,0.6323325037956238
republican,republican,0.9472075700759888,democrat,0.6873716115951538
republican,democrat,0.8141393065452576,republican,0.8072285652160645
republican,republican,0.8750300407409668,democrat,0.786867618560791
republican,republican,0.9052923321723938,democrat,0.7342959046363831
republican,republican,0.940398454666137

### Politician Dataset

In [27]:
# Connect to Twitter API
import tweepy

auth = tweepy.OAuthHandler(XXXXXXX)
auth.set_access_token(XXXXXXX)

api = tweepy.API(auth)

In [28]:
# Get Twitter Friends
def get_friends(user):
    friends = []

    cursor = tweepy.Cursor(api.friends_ids, id=user)

    for page in cursor.pages():
        for friend in page:
            friends.append(str(friend))
            
        time.sleep(70)

    return friends

In [58]:
# Get Twitter Followers
def get_followers(user):
    followers = []

    cursor = tweepy.Cursor(api.followers_ids, id=user)

    count = 10
    for page in cursor.pages():
        for follower in page:
            followers.append(str(follower))
            
        #time.sleep(70)
        count -= 1
        
        if count == 0:
            break

    return followers

In [30]:
# Read in seed data
import csv

senators = []
with open('data_files/us-senate.csv') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        senators.append([row[49], row[15]])
        
representatives = []
with open('data_files/us-house.csv') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        representatives.append([row[51], row[17]])
        
governors = []
with open('data_files/us-governors.csv') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        governors.append([row[40], row[6]])
        
seed_politicians = senators[1:] + representatives[1:] + governors[1:]

In [31]:
print(senators[1])
print(len(senators))
print(representatives[1])
print(len(representatives))
print(governors[1])
print(len(governors))
print(seed_politicians[0])
print(len(seed_politicians))

['SenDanSullivan', 'republican']
101
['repdonyoung', 'republican']
436
['LtGovIvey', 'republican']
51
['SenDanSullivan', 'republican']
585


In [8]:
# Get followers and friends
import time
#f = open("data_files/data.txt", "w+")

for politician in seed_politicians:
    print(politician)
    #friends = get_friends(politician[0])
    #print(len(friends))
    try:
        followers = get_followers(politician[0])
        print("Users added!")
    except:
        print("User skipped!")
        
    print(len(followers))
    
    #f.write(" ".join(followers) + '\n')
    #twitter_users = friends + followers
    #seed_data.append((twitter_users, politician[1]))


['SenDanSullivan', 'republican']
Users added!
25265
['lisamurkowski', 'republican']
Users added!
50000
['sendougjones', 'democrat']
Users added!
10229
['SenShelby', 'republican']
Users added!
50000
['JohnBoozman', 'republican']
Users added!
50000
['sentomcotton', 'republican']
Users added!
50000
['SenJeffFlake', 'republican']
Users added!
794
['SenJohnMcCain', 'republican']
Users added!
50000
['SenFeinstein', 'democrat']
Users added!
50000
['KamalaHarris', 'democrat']
Users added!
50000
['sencorygardner', 'republican']
Users added!
50000
['SenBennetCO', 'democrat']
Users added!
50000
['ChrisMurphyCT', 'democrat']
Users added!
50000
['SenBlumenthal', 'democrat']
Users added!
50000
['ChrisCoons', 'democrat']
Users added!
50000
['SenatorCarper', 'democrat']
Users added!
50000
['SenBillNelson', 'democrat']
Users added!
50000
['marcorubio', 'republican']
Users added!
50000
['sendavidperdue', 'republican']
Users added!
48987
['SenatorIsakson', 'republican']
Users added!
50000
['brianschatz',

Users added!
10
['DarrellIssa', 'republican']
Users added!
50000
['Rep_Hunter', 'republican']
Users added!
14592
['RepJuanVargas', 'democrat']
Users added!
17537
['RepScottPeters', 'democrat']
Users added!
21803
['RepSusanDavis', 'democrat']
Users added!
17270
['RepDianaDeGette', 'democrat']
Users added!
40450
['jaredpolis', 'democrat']
Users added!
48273
['RepTipton', 'republican']
Users added!
18605
['RepKenBuck', 'republican']
Users added!
14615
['RepDLamborn', 'republican']
Users added!
15593
['RepMikeCoffman', 'republican']
Users added!
36720
['RepPerlmutter', 'democrat']
Users added!
28686
['RepJohnLarson', 'democrat']
Users added!
22607
['RepJoeCourtney', 'democrat']
Users added!
22158
['rosadelauro', 'democrat']
Users added!
34807
['jahimes', 'democrat']
Users added!
50000
['RepEsty', 'democrat']
Users added!
22428
['LisaBRochester', 'democrat']
Users added!
3104
['mattgaetz', 'republican']
Users added!
30856
['drnealdunnfl2', 'republican']
Users added!
4145
['RepTedYoho', 'rep

Users added!
6182
['RepRichHudson', 'republican']
Users added!
14691
['', 'republican']
Users added!
10
['PatrickMcHenry', 'republican']
Users added!
23343
['RepMarkMeadows', 'republican']
Users added!
50000
['RepAdams', 'democrat']
Users added!
10463
['RepHolding', 'republican']
Users added!
13223
['RepKevinCramer', 'republican']
Users added!
11872
['JeffFortenberry', 'republican']
Users added!
18403
['DonJBacon', 'republican']
Users added!
3085
['RepAdrianSmith', 'republican']
Users added!
10183
['RepSheaPorter', 'democrat']
Users added!
6427
['RepAnnieKuster', 'democrat']
Users added!
18584
['DonaldNorcross', 'democrat']
Users added!
8982
['RepLoBiondo', 'republican']
Users added!
15754
['RepTomMacArthur', 'republican']
Users added!
14692
['RepChrisSmith', 'republican']
Users added!
20420
['JoshGottheimer', 'democrat']
Users added!
4498
['FrankPallone', 'democrat']
Users added!
27497
['RepLanceNJ7', 'republican']
Users added!
14651
['RepSires', 'democrat']
Users added!
16851
['BillP

Users added!
14709
['RepDonBeyer', 'democrat']
Users added!
50000
['RepMGriffith', 'republican']
Users added!
23699
['RepComstock', 'republican']
Users added!
15629
['GerryConnolly', 'democrat']
Users added!
42749
['PeterWelch', 'democrat']
Users added!
11247
['RepDelBene', 'democrat']
Users added!
23148
['RepRickLarsen', 'democrat']
Users added!
16872
['HerreraBeutler', 'republican']
Users added!
17562
['RepNewhouse', 'republican']
Users added!
9334
['cathymcmorris', 'republican']
Users added!
45024
['RepDerekKilmer', 'democrat']
Users added!
23652
['pramilajayapal', 'democrat']
Users added!
45949
['davereichert', 'republican']
Users added!
20330
['RepAdamSmith', 'democrat']
Users added!
27748
['RepDennyHeck', 'democrat']
Users added!
20849
['SpeakerRyan', 'republican']
Users added!
50000
['repmarkpocan', 'democrat']
Users added!
50000
['RepRonKind', 'democrat']
Users added!
12905
['RepGwenMoore', 'democrat']
Users added!
30809
['JimPressOffice', 'republican']
Users added!
6231
['RepG

In [32]:
# Remove governers
temp = seed_politicians[:len(seed_politicians)-50]
print(temp[len(temp) - 1][0])    
print(seed_politicians[len(seed_politicians)-51][0])

CynthiaLummis
CynthiaLummis


In [33]:
# Remove people without twitter
#temp.remove(['RepDuckworth', 'democrat']) 
#temp.remove(['RepTrentFranks', 'republican'])
#temp.remove(['USRepDarrenSoto', 'democrat'])
#temp.remove(['CaptClayHiggins', 'republican'])
#temp.remove(['RepTimMurphy', 'republican'])
#temp.remove(['farenthold', 'republican'])

print(len(temp))

535


In [34]:
# Load data
data_file = 'data_files/data.txt'

data = read_data_file(data_file)

data = data[:len(data) - 50]

print(len(data))

535


In [35]:
from gensim.models.doc2vec import Doc2Vec, TaggedDocument

class TaggedDocumentList:
    def __init__(self, data, politicians):
        self.data = data
        self.politicians = politicians
        
    def __iter__(self):
        for i in range(len(self.data)):
            yield TaggedDocument(words=self.data[i].split(), tags=[self.politicians[i][1]])

In [37]:
corpus = list(TaggedDocumentList(data, temp))

random.shuffle(corpus)

corpus_train_2 = corpus[len(corpus)//10:]
corpus_test_2 = corpus[:len(corpus)//10]
print(len(corpus_train_2))
print(len(corpus_test_2))

482
53


In [38]:
import logging

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
model_2 = Doc2Vec(corpus_train_2, min_count=5, workers=8, size=1000, iter=50)
model_2.save('data_files/red2blue_model_2')

2018-04-29 13:16:30,671 : INFO : collecting all words and their counts
2018-04-29 13:16:30,672 : INFO : PROGRESS: at example #0, processed 0 words (0/s), 0 word types, 0 tags
2018-04-29 13:16:36,518 : INFO : collected 4001244 word types and 3 unique tags from a corpus of 482 examples and 12482921 words
2018-04-29 13:16:36,519 : INFO : Loading a fresh vocabulary
2018-04-29 13:16:40,905 : INFO : min_count=5 retains 509951 unique words (12% of original 4001244, drops 3491293)
2018-04-29 13:16:40,906 : INFO : min_count=5 leaves 7353371 word corpus (58% of original 12482921, drops 5129550)
2018-04-29 13:16:43,347 : INFO : deleting the raw counts dictionary of 4001244 items
2018-04-29 13:16:43,417 : INFO : sample=0.001 downsamples 0 most-common words
2018-04-29 13:16:43,418 : INFO : downsampling leaves estimated 7353371 word corpus (100.0% of prior 7353371)
2018-04-29 13:16:43,419 : INFO : estimated required memory for 509951 words and 1000 dimensions: 4334596100 bytes
2018-04-29 13:16:45,56

2018-04-29 13:20:41,954 : INFO : PROGRESS: at 3.28% examples, 30711 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:20:42,982 : INFO : PROGRESS: at 3.41% examples, 31659 words/s, in_qsize 16, out_qsize 0
2018-04-29 13:20:44,025 : INFO : PROGRESS: at 3.54% examples, 32739 words/s, in_qsize 14, out_qsize 1
2018-04-29 13:20:45,046 : INFO : PROGRESS: at 3.67% examples, 33782 words/s, in_qsize 16, out_qsize 0
2018-04-29 13:20:46,080 : INFO : PROGRESS: at 3.80% examples, 34778 words/s, in_qsize 16, out_qsize 2
2018-04-29 13:20:47,091 : INFO : PROGRESS: at 3.93% examples, 35805 words/s, in_qsize 16, out_qsize 0
2018-04-29 13:20:48,092 : INFO : PROGRESS: at 4.06% examples, 36877 words/s, in_qsize 16, out_qsize 0
2018-04-29 13:20:49,133 : INFO : PROGRESS: at 4.20% examples, 37933 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:20:50,204 : INFO : PROGRESS: at 4.33% examples, 38948 words/s, in_qsize 16, out_qsize 0
2018-04-29 13:20:51,224 : INFO : PROGRESS: at 4.47% examples, 40042 words/s, in_qs

2018-04-29 13:22:05,774 : INFO : PROGRESS: at 13.98% examples, 94216 words/s, in_qsize 16, out_qsize 1
2018-04-29 13:22:06,839 : INFO : PROGRESS: at 14.11% examples, 94751 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:22:07,854 : INFO : PROGRESS: at 14.23% examples, 95237 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:22:08,927 : INFO : PROGRESS: at 14.36% examples, 95751 words/s, in_qsize 16, out_qsize 2
2018-04-29 13:22:09,929 : INFO : PROGRESS: at 14.49% examples, 96333 words/s, in_qsize 16, out_qsize 1
2018-04-29 13:22:10,972 : INFO : PROGRESS: at 14.63% examples, 96952 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:22:11,982 : INFO : PROGRESS: at 14.76% examples, 97462 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:22:12,983 : INFO : PROGRESS: at 14.88% examples, 97956 words/s, in_qsize 16, out_qsize 0
2018-04-29 13:22:13,987 : INFO : PROGRESS: at 15.02% examples, 98529 words/s, in_qsize 16, out_qsize 0
2018-04-29 13:22:15,031 : INFO : PROGRESS: at 15.16% examples, 99106 word

2018-04-29 13:23:27,573 : INFO : PROGRESS: at 25.16% examples, 133273 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:23:28,596 : INFO : PROGRESS: at 25.29% examples, 133631 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:23:29,658 : INFO : PROGRESS: at 25.45% examples, 134053 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:23:30,763 : INFO : PROGRESS: at 25.60% examples, 134488 words/s, in_qsize 16, out_qsize 0
2018-04-29 13:23:31,774 : INFO : PROGRESS: at 25.75% examples, 134925 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:23:32,774 : INFO : PROGRESS: at 25.89% examples, 135302 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:23:33,787 : INFO : PROGRESS: at 26.02% examples, 135642 words/s, in_qsize 15, out_qsize 1
2018-04-29 13:23:34,846 : INFO : PROGRESS: at 26.17% examples, 136051 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:23:35,890 : INFO : PROGRESS: at 26.32% examples, 136450 words/s, in_qsize 16, out_qsize 0
2018-04-29 13:23:36,929 : INFO : PROGRESS: at 26.46% examples, 1

2018-04-29 13:24:49,101 : INFO : PROGRESS: at 36.80% examples, 160758 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:24:50,101 : INFO : PROGRESS: at 36.97% examples, 161068 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:24:51,107 : INFO : PROGRESS: at 37.10% examples, 161320 words/s, in_qsize 14, out_qsize 1
2018-04-29 13:24:52,129 : INFO : PROGRESS: at 37.25% examples, 161629 words/s, in_qsize 16, out_qsize 0
2018-04-29 13:24:53,168 : INFO : PROGRESS: at 37.41% examples, 161918 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:24:54,169 : INFO : PROGRESS: at 37.56% examples, 162246 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:24:55,174 : INFO : PROGRESS: at 37.70% examples, 162497 words/s, in_qsize 16, out_qsize 0
2018-04-29 13:24:56,210 : INFO : PROGRESS: at 37.86% examples, 162804 words/s, in_qsize 16, out_qsize 0
2018-04-29 13:24:57,257 : INFO : PROGRESS: at 38.01% examples, 163116 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:24:58,257 : INFO : PROGRESS: at 38.17% examples, 1

2018-04-29 13:26:10,237 : INFO : PROGRESS: at 48.79% examples, 181377 words/s, in_qsize 16, out_qsize 0
2018-04-29 13:26:11,255 : INFO : PROGRESS: at 48.96% examples, 181633 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:26:12,311 : INFO : PROGRESS: at 49.10% examples, 181821 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:26:13,340 : INFO : PROGRESS: at 49.26% examples, 182053 words/s, in_qsize 14, out_qsize 1
2018-04-29 13:26:14,348 : INFO : PROGRESS: at 49.43% examples, 182308 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:26:15,373 : INFO : PROGRESS: at 49.56% examples, 182511 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:26:16,436 : INFO : PROGRESS: at 49.72% examples, 182736 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:26:17,456 : INFO : PROGRESS: at 49.88% examples, 182980 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:26:18,480 : INFO : PROGRESS: at 50.04% examples, 183226 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:26:19,499 : INFO : PROGRESS: at 50.20% examples, 1

2018-04-29 13:27:31,482 : INFO : PROGRESS: at 61.03% examples, 197395 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:27:32,544 : INFO : PROGRESS: at 61.18% examples, 197561 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:27:33,567 : INFO : PROGRESS: at 61.33% examples, 197705 words/s, in_qsize 14, out_qsize 2
2018-04-29 13:27:34,577 : INFO : PROGRESS: at 61.49% examples, 197896 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:27:35,600 : INFO : PROGRESS: at 61.62% examples, 198046 words/s, in_qsize 14, out_qsize 1
2018-04-29 13:27:36,664 : INFO : PROGRESS: at 61.78% examples, 198203 words/s, in_qsize 16, out_qsize 0
2018-04-29 13:27:37,682 : INFO : PROGRESS: at 61.95% examples, 198403 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:27:38,685 : INFO : PROGRESS: at 62.10% examples, 198594 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:27:39,719 : INFO : PROGRESS: at 62.25% examples, 198759 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:27:40,760 : INFO : PROGRESS: at 62.41% examples, 1

2018-04-29 13:28:52,518 : INFO : PROGRESS: at 73.23% examples, 209736 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:28:53,538 : INFO : PROGRESS: at 73.39% examples, 209883 words/s, in_qsize 16, out_qsize 2
2018-04-29 13:28:54,540 : INFO : PROGRESS: at 73.53% examples, 209995 words/s, in_qsize 14, out_qsize 1
2018-04-29 13:28:55,602 : INFO : PROGRESS: at 73.68% examples, 210131 words/s, in_qsize 14, out_qsize 1
2018-04-29 13:28:56,613 : INFO : PROGRESS: at 73.85% examples, 210296 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:28:57,633 : INFO : PROGRESS: at 74.00% examples, 210427 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:28:58,642 : INFO : PROGRESS: at 74.16% examples, 210574 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:28:59,650 : INFO : PROGRESS: at 74.31% examples, 210702 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:29:00,685 : INFO : PROGRESS: at 74.46% examples, 210842 words/s, in_qsize 16, out_qsize 0
2018-04-29 13:29:01,701 : INFO : PROGRESS: at 74.61% examples, 2

2018-04-29 13:30:13,678 : INFO : PROGRESS: at 85.37% examples, 219317 words/s, in_qsize 14, out_qsize 1
2018-04-29 13:30:14,731 : INFO : PROGRESS: at 85.52% examples, 219447 words/s, in_qsize 16, out_qsize 0
2018-04-29 13:30:15,738 : INFO : PROGRESS: at 85.66% examples, 219524 words/s, in_qsize 15, out_qsize 2
2018-04-29 13:30:16,773 : INFO : PROGRESS: at 85.83% examples, 219638 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:30:17,790 : INFO : PROGRESS: at 85.98% examples, 219745 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:30:18,811 : INFO : PROGRESS: at 86.13% examples, 219862 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:30:19,825 : INFO : PROGRESS: at 86.27% examples, 219955 words/s, in_qsize 16, out_qsize 1
2018-04-29 13:30:20,847 : INFO : PROGRESS: at 86.44% examples, 220091 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:30:21,870 : INFO : PROGRESS: at 86.59% examples, 220196 words/s, in_qsize 14, out_qsize 1
2018-04-29 13:30:22,912 : INFO : PROGRESS: at 86.73% examples, 2

2018-04-29 13:31:34,966 : INFO : PROGRESS: at 97.22% examples, 226448 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:31:35,978 : INFO : PROGRESS: at 97.37% examples, 226509 words/s, in_qsize 14, out_qsize 1
2018-04-29 13:31:37,004 : INFO : PROGRESS: at 97.51% examples, 226602 words/s, in_qsize 16, out_qsize 1
2018-04-29 13:31:38,036 : INFO : PROGRESS: at 97.66% examples, 226669 words/s, in_qsize 16, out_qsize 0
2018-04-29 13:31:39,070 : INFO : PROGRESS: at 97.81% examples, 226750 words/s, in_qsize 16, out_qsize 1
2018-04-29 13:31:40,119 : INFO : PROGRESS: at 97.95% examples, 226798 words/s, in_qsize 15, out_qsize 1
2018-04-29 13:31:41,135 : INFO : PROGRESS: at 98.11% examples, 226902 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:31:42,185 : INFO : PROGRESS: at 98.26% examples, 226986 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:31:43,225 : INFO : PROGRESS: at 98.41% examples, 227078 words/s, in_qsize 15, out_qsize 0
2018-04-29 13:31:44,285 : INFO : PROGRESS: at 98.56% examples, 2

In [46]:
count = 0
accurate = 0

for user in corpus_test_2:
    infer_vector = model_2.infer_vector(user[0])
    label = model_2.docvecs.most_similar([infer_vector], topn= 2)
    #print(user[1][0] + "," + str(label[0][0]) + "," + str(label[0][1]) + "," + str(label[1][0]) + "," + str(label[1][1]))
    
    if user[1][0] == label[0][0]:
        accurate += 1
    count += 1

print("Testing accuracy: " + str(accurate/count))

Testing accuracy: 0.9622641509433962


In [47]:
for user in corpus_test_2:
    infer_vector = model_2.infer_vector(user[0])
    label = model_2.docvecs.most_similar([infer_vector], topn= 2)
    print(user[1][0] + "," + str(label[0][0]) + "," + str(label[0][1]) + "," + str(label[1][0]) + "," + str(label[1][1]))

republican,republican,0.9882102608680725,democrat,0.9777097105979919
democrat,democrat,0.9909756183624268,republican,0.9779123663902283
republican,republican,0.9905202984809875,democrat,0.979185163974762
republican,republican,0.9798581004142761,democrat,0.9733731150627136
republican,republican,0.9902716279029846,democrat,0.9889044761657715
republican,republican,0.9858303070068359,democrat,0.9777659177780151
republican,republican,0.9929103851318359,democrat,0.9846211671829224
republican,republican,0.9896551370620728,democrat,0.9839009046554565
republican,republican,0.8295853734016418,democrat,0.8022875785827637
democrat,democrat,0.993126630783081,republican,0.9833117723464966
republican,republican,0.992561399936676,democrat,0.9880997538566589
republican,republican,0.9892390966415405,democrat,0.9820978045463562
republican,republican,0.9902167320251465,democrat,0.9820261597633362
republican,republican,0.9930213093757629,democrat,0.9825872182846069
republican,republican,0.9907503128051758,

In [59]:
# Tom Cruise
tc_followers = get_followers('TomCruise')
infer_vector = model_2.infer_vector(tc_followers)
label = model_2.docvecs.most_similar([infer_vector], topn= 2)

print(label[0][0])

democrat


In [61]:
# Dwayne Johnson
tc_followers = get_followers('TheRock')
infer_vector = model_2.infer_vector(tc_followers)
label = model_2.docvecs.most_similar([infer_vector], topn= 2)

print(label[0][0])

republican


In [62]:
# Arnold Schwarzenegger
tc_followers = get_followers('Schwarzenegger')
infer_vector = model_2.infer_vector(tc_followers)
label = model_2.docvecs.most_similar([infer_vector], topn= 2)

print(label[0][0])

republican


### Second layer followers

In [5]:
print(republican_data[0])

4215476655 954143712560517123 93069110 477803115 3002112321 18018355 353112820 551519702 4571209935 869024599941500929 120214980 703334588 15957417 9004352 2334193741 69396552 47334379 122998273 71339449 36454495 32871086 133869377 27020616 106768069 402372756 456795048 780436424 760567303629049856 2310330376 49878013 1681973533 2427419786 256789748 66170398 283752845 42053830 16117029 143063931 318162125 384630162 870653857327702016 210822651 3864154937 738378148715364354 16944022 375019316 280682278 555424329 50077786 24578794 803694179079458816 169514470 836293347945897985 299802277 757303975 869247852282826752 22771961 21619519 856401836 39344374 31464977 2908170952 1007893561 267287345 590262014 857249000 20733972 620571475 190401318 421314274 123308459 2742806678 463167098 108617810 18020081 245963716 19035510 20776147 823693635564802048 25203361 2467791 2173228424 3439849403 807095 2306885139 2735562711 63796828 813553259411554304 3065981667 497474968 1269656353 245013395 229865

In [11]:
import time
f = open("data_files/republican_2.txt", "w+")

for followers in republican_data:
    comp_followers = []
    count = 10
    for follower in followers.split():
        print("Count: " + str(count))
        print(follower)
        try:
            followers_2 = get_followers(follower)
            s = " ".join(followers_2)
            comp_followers.append(s)
            print("Users added!")
        except:
            print("User skipped!")
        count -= 1
        if count == 0:
            break
            
    #f.write(" ".join(comp_followers) + '\n')

Count: 10
4215476655
Users added!
Count: 9
954143712560517123
Users added!
Count: 8
93069110
Users added!
Count: 7
477803115
Users added!
Count: 6
3002112321
Users added!
Count: 5
18018355
Users added!
Count: 4
353112820
Users added!
Count: 3
551519702
Users added!
Count: 2
4571209935
Users added!
Count: 1
869024599941500929
Users added!
Count: 10
987846722
Users added!
Count: 9
955584668941905920
User skipped!
Count: 8
780574247357812737
Users added!
Count: 7
3433778997
Users added!
Count: 6
785959633240350720
Users added!
Count: 5
702705429017706497
Users added!
Count: 4
844137239592472576
User skipped!
Count: 3
2313099380
Users added!
Count: 2
1364478211
Users added!
Count: 1
1930551558
Users added!
Count: 10
910688113894346752
Users added!
Count: 9
1393297200
Users added!
Count: 8
4091551984
Users added!
Count: 7
843112550
Users added!
Count: 6
918346674
Users added!
Count: 5
1322319715
Users added!
Count: 4
1305940272
Users added!
Count: 3
881483948341243905
Users added!
Count: 2


Users added!
Count: 2
2827371006
Users added!
Count: 1
61050338
Users added!
Count: 10
3148592893
Users added!
Count: 9
877192350388412417
Users added!
Count: 8
36715753
Users added!
Count: 7
823905
Users added!
Count: 6
480541588
Users added!
Count: 5
795423419219120128
Users added!
Count: 4
14344823
Users added!
Count: 3
19947268
Users added!
Count: 2
2273576767
Users added!
Count: 1
711393889731657732
Users added!
Count: 10
4886380750
Users added!
Count: 9
822462665910063105
Users added!
Count: 8
899552364
Users added!
Count: 7
908016484261048320
Users added!
Count: 6
343041182
Users added!
Count: 5
342887079
Users added!
Count: 4
63525003
Users added!
Count: 3
54996176
Users added!
Count: 2
922691386196893696
Users added!
Count: 1
838840485133365249
Users added!
Count: 10
802707141895344129
Users added!
Count: 9
1583865109
Users added!
Count: 8
923440869734277120
Users added!
Count: 7
73381015
Users added!
Count: 6
3075562766
Users added!
Count: 5
911039547206758400
Users added!
Co

User skipped!
Count: 5
3160599212
User skipped!
Count: 4
114345786
User skipped!
Count: 3
824142768603807745
User skipped!
Count: 2
737732358
User skipped!
Count: 1
839099917121511425
User skipped!
Count: 10
870336037326925828
User skipped!
Count: 9
953991390
User skipped!
Count: 8
18247062
User skipped!
Count: 7
531586866
User skipped!
Count: 6
703755811399585793
User skipped!
Count: 5
2204186305
User skipped!
Count: 4
927199977436340224
User skipped!
Count: 3
748290437639737344
User skipped!
Count: 2
377228272
User skipped!
Count: 1
21347710
User skipped!
Count: 10
4027124119
User skipped!
Count: 9
76184375
User skipped!
Count: 8
322879013
User skipped!
Count: 7
1179779472
User skipped!
Count: 6
923768725051985920
User skipped!
Count: 5
2927284436
User skipped!
Count: 4
25686547
User skipped!
Count: 3
3009028587
User skipped!
Count: 2
757078971590651904
User skipped!
Count: 1
880018682
User skipped!
Count: 10
260639575
User skipped!
Count: 9
850036892
User skipped!
Count: 8
16989178


User skipped!
Count: 6
336116660
User skipped!
Count: 5
2259809232
User skipped!
Count: 4
77395878
User skipped!
Count: 3
100045891
User skipped!
Count: 2
43295228
User skipped!
Count: 1
28177043
User skipped!
Count: 10
737458424041066496
User skipped!
Count: 9
22677397
User skipped!
Count: 8
4136936597
User skipped!
Count: 7
39076082
User skipped!
Count: 6
975217231
User skipped!
Count: 5
36715753
User skipped!
Count: 4
3406938525
User skipped!
Count: 3
1561077758
User skipped!
Count: 2
15757578
User skipped!
Count: 1
58348452
User skipped!
Count: 10
891168598550495232
User skipped!
Count: 9
889909780868988928
User skipped!
Count: 8
734049427388669952
User skipped!
Count: 7
3966442700
User skipped!
Count: 6
860200224
User skipped!
Count: 5
20118767
User skipped!
Count: 4
3247993823
Users added!
Count: 3
795538097119305728
Users added!
Count: 2
151633598
User skipped!
Count: 1
27278966
Users added!
Count: 10
802551737655574528
Users added!
Count: 9
17918808
Users added!
Count: 8
350394

User skipped!
Count: 4
942655817127403520
User skipped!
Count: 3
2874359694
User skipped!
Count: 2
25975551
User skipped!
Count: 1
953608878385754112
User skipped!
Count: 10
1042673966
User skipped!
Count: 9
279037545
User skipped!
Count: 8
55044457
User skipped!
Count: 7
19453047
User skipped!
Count: 6
828400245797220352
User skipped!
Count: 5
894694902361198592
User skipped!
Count: 4
474351727
User skipped!
Count: 3
823280797897486337
User skipped!
Count: 2
48497926
User skipped!
Count: 1
295988417
User skipped!
Count: 10
875484826517864452
User skipped!
Count: 9
247723476
User skipped!
Count: 8
4136936597
User skipped!
Count: 7
948198491876143104
User skipped!
Count: 6
947578402457964545
User skipped!
Count: 5
2794432004
User skipped!
Count: 4
1965826896
User skipped!
Count: 3
2790361662
User skipped!
Count: 2
1280607440
User skipped!
Count: 1
2469799574
User skipped!
Count: 10
232074836
User skipped!
Count: 9
732027915894824960
User skipped!
Count: 8
531710110
User skipped!
Count: 

User skipped!
Count: 2
1552100222
User skipped!
Count: 1
846302320283566080
User skipped!
Count: 10
831752260921200641
User skipped!
Count: 9
281373704
User skipped!
Count: 8
828056756
User skipped!
Count: 7
624944269
User skipped!
Count: 6
98956941
User skipped!
Count: 5
127941629
User skipped!
Count: 4
2906962900
User skipped!
Count: 3
803330505391411200
User skipped!
Count: 2
1225439204
User skipped!
Count: 1
72276309
User skipped!
Count: 10
2610659786
User skipped!
Count: 9
22703645
User skipped!
Count: 8
216444984
User skipped!
Count: 7
471672239
User skipped!
Count: 6
2417586104
User skipped!
Count: 5
2359926157
User skipped!
Count: 4
17629860
User skipped!
Count: 3
15647676
User skipped!
Count: 2
830128820447539208
User skipped!
Count: 1
828613457020870657
User skipped!
Count: 10
891761821383667712
User skipped!
Count: 9
826187221904535553
User skipped!
Count: 8
16187637
User skipped!
Count: 7
3048730646
User skipped!
Count: 6
879016370016579584
User skipped!
Count: 5
7899514683

User skipped!
Count: 2
2964664061
User skipped!
Count: 1
2843872435
User skipped!
Count: 10
918919628016967681
User skipped!
Count: 9
26401490
User skipped!
Count: 8
801923601230241792
User skipped!
Count: 7
28265064
User skipped!
Count: 6
472132412
User skipped!
Count: 5
1107507061
User skipped!
Count: 4
827391980778819584
User skipped!
Count: 3
601535938
User skipped!
Count: 2
2908170952
User skipped!
Count: 1
4884762153
User skipped!
Count: 10
842203135754047488
User skipped!
Count: 9
821042327573712896
User skipped!
Count: 8
15757578
User skipped!
Count: 7
897924242028756994
User skipped!
Count: 6
821105815373418497
User skipped!
Count: 5
1870107482
User skipped!
Count: 4
743113457000534016
User skipped!
Count: 3
372372537
User skipped!
Count: 2
941040015118884864
User skipped!
Count: 1
1315836344
User skipped!
Count: 10
131286917
User skipped!
Count: 9
20583291
Users added!
Count: 8
928163087127064576
Users added!
Count: 7
828997388920619008
Users added!
Count: 6
282695161
Users a

Users added!
Count: 9
2868389650
Users added!
Count: 8
1345822466
Users added!
Count: 7
899460920329478144
Users added!
Count: 6
834018591766933507
Users added!
Count: 5
64229716
Users added!
Count: 4
2985335882
Users added!
Count: 3
2382749012
Users added!
Count: 2
4182380774
Users added!
Count: 1
232108392
Users added!
Count: 10
564568759
Users added!
Count: 9
2751419379
Users added!
Count: 8
92524719
Users added!
Count: 7
709533350889795584
Users added!
Count: 6
44966627
User skipped!
Count: 5
821157370340392962
Users added!
Count: 4
90114840
Users added!
Count: 3
1904739704
Users added!
Count: 2
858130549994598400
Users added!
Count: 1
70447484
Users added!
Count: 10
3353724259
User skipped!
Count: 9
21904231
Users added!
Count: 8
175988779
Users added!
Count: 7
532781117
Users added!
Count: 6
4818277768
Users added!
Count: 5
736079298
Users added!
Count: 4
275138287
Users added!
Count: 3
954474656827957248
Users added!
Count: 2
3937397053
Users added!
Count: 1
2490434600
Users add

Users added!
Count: 5
955253530486591488
Users added!
Count: 4
93269181
Users added!
Count: 3
21526388
Users added!
Count: 2
343785942
Users added!
Count: 1
40716483
Users added!
Count: 10
16047039
Users added!
Count: 9
26819424
Users added!
Count: 8
946174553100902402
Users added!
Count: 7
909448512
Users added!
Count: 6
918534858871377920
Users added!
Count: 5
924673662732652545
Users added!
Count: 4
177976646
Users added!
Count: 3
328325681
Users added!
Count: 2
731986727414300672
User skipped!
Count: 1
926802115
Users added!
Count: 10
827149924261306368
Users added!
Count: 9
767087054064844800
Users added!
Count: 8
836275012877287426
User skipped!
Count: 7
739536307295158272
Users added!
Count: 6
4668690253
Users added!
Count: 5
3075562766
Users added!
Count: 4
938611707689226245
Users added!
Count: 3
944713825684721667
User skipped!
Count: 2
38803142
Users added!
Count: 1
754845211176554496
Users added!
Count: 10
290426542
Users added!
Count: 9
34793128
Users added!
Count: 8
36715

User skipped!
Count: 5
9624742
User skipped!
Count: 4
52136185
User skipped!
Count: 3
73181712
User skipped!
Count: 2
232901331
User skipped!
Count: 1
20459404
User skipped!
Count: 10
4837510348
User skipped!
Count: 9
73238146
User skipped!
Count: 8
50055701
User skipped!
Count: 7
19397785
User skipped!
Count: 6
52551600
User skipped!
Count: 5
50374439
User skipped!
Count: 4
3223426134
User skipped!
Count: 3
132339474
User skipped!
Count: 2
42995067
User skipped!
Count: 1
893405045508763648
User skipped!
Count: 10
551083978
User skipped!
Count: 9
2916305152
User skipped!
Count: 8
3141213592
User skipped!
Count: 7
102843654
User skipped!
Count: 6
2336050548
User skipped!
Count: 5
2847221717
User skipped!
Count: 4
25123197
User skipped!
Count: 3
171632862
User skipped!
Count: 2
418264878
User skipped!
Count: 1
73119923
User skipped!
Count: 10
618222518
User skipped!
Count: 9
869584484416446464
User skipped!
Count: 8
943394783753654274
User skipped!
Count: 7
940056602354135043
User skippe

In [12]:
import time
f = open("data_files/democrat_2.txt", "w+")

for followers in democrat_data:
    comp_followers = []
    count = 10
    for follower in followers.split():
        print("Count: " + str(count))
        print(follower)
        try:
            followers_2 = get_followers(follower)
            s = " ".join(followers_2)
            comp_followers.append(s)
            print("Users added!")
        except:
            print("User skipped!")
        count -= 1
        if count == 0:
            break
            
    #f.write(" ".join(comp_followers) + '\n')

Count: 10
944226398205595648
Users added!
Count: 9
881174262983254016
Users added!
Count: 8
1965420800
Users added!
Count: 7
104271823
Users added!
Count: 6
702643881054089216
Users added!
Count: 5
884578187392544769
Users added!
Count: 4
2863996955
Users added!
Count: 3
510464011
Users added!
Count: 2
372536101
Users added!
Count: 1
909200191626121216
Users added!
Count: 10
47814072
Users added!
Count: 9
948946378939609089
Users added!
Count: 8
4156558678
Users added!
Count: 7
16160352
Users added!
Count: 6
173999134
Users added!
Count: 5
771388593130483712
Users added!
Count: 4
19983050
Users added!
Count: 3
224910936
Users added!
Count: 2
234163619
Users added!
Count: 1
2180371
Users added!
Count: 10
3035561968
Users added!
Count: 9
292083207
Users added!
Count: 8
170735298
Users added!
Count: 7
861229975566942209
Users added!
Count: 6
798254257187291136
Users added!
Count: 5
751283435382067202
Users added!
Count: 4
810619093749559296
Users added!
Count: 3
46197784
Users added!
Coun

Users added!
Count: 3
47730523
Users added!
Count: 2
915542143061696512
User skipped!
Count: 1
29450962
Users added!
Count: 10
786457943759687680
Users added!
Count: 9
357606935
Users added!
Count: 8
2334193741
Users added!
Count: 7
731132367058968576
Users added!
Count: 6
2400284491
Users added!
Count: 5
296527153
Users added!
Count: 4
3924002953
Users added!
Count: 3
903273119137320960
User skipped!
Count: 2
19877186
Users added!
Count: 1
171968009
Users added!
Count: 10
2409451987
Users added!
Count: 9
816400050247135234
Users added!
Count: 8
5509762
Users added!
Count: 7
2167340796
Users added!
Count: 6
890112351952920576
Users added!
Count: 5
2176999581
Users added!
Count: 4
100277835
Users added!
Count: 3
825717050538618880
Users added!
Count: 2
951349253628203009
Users added!
Count: 1
155640818
Users added!
Count: 10
1397477960
Users added!
Count: 9
915117280136241152
User skipped!
Count: 8
755098969190072320
Users added!
Count: 7
200846651
Users added!
Count: 6
284241195
Users 

Users added!
Count: 6
525925642
Users added!
Count: 5
39279821
Users added!
Count: 4
1055907624
Users added!
Count: 3
794889975041060865
Users added!
Count: 2
17394696
Users added!
Count: 1
26642006
Users added!
Count: 10
589390524
Users added!
Count: 9
249314086
Users added!
Count: 8
2706430603
Users added!
Count: 7
536904693
Users added!
Count: 6
2223157392
Users added!
Count: 5
36686040
Users added!
Count: 4
1648370773
Users added!
Count: 3
24268066
Users added!
Count: 2
16870421
Users added!
Count: 1
2728657237
Users added!
Count: 10
4581936622
Users added!
Count: 9
713839291210792960
Users added!
Count: 8
14247236
Users added!
Count: 7
32917190
Users added!
Count: 6
43445412
User skipped!
Count: 5
17403718
Users added!
Count: 4
1853310463
Users added!
Count: 3
3316388106
Users added!
Count: 2
954077019041562625
Users added!
Count: 1
95092020
Users added!
Count: 10
851812979549106176
Users added!
Count: 9
803043124419248129
Users added!
Count: 8
1903263620
Users added!
Count: 7
755

Users added!
Count: 8
944831800739860480
Users added!
Count: 7
1301923506
Users added!
Count: 6
2915096942
User skipped!
Count: 5
1688
Users added!
Count: 4
963480595
Users added!
Count: 3
918302785011093504
Users added!
Count: 2
4874478547
Users added!
Count: 1
828302764371542016
Users added!
Count: 10
800184262536241152
Users added!
Count: 9
441596194
Users added!
Count: 8
937442792959340546
Users added!
Count: 7
566806945
Users added!
Count: 6
16548855
Users added!
Count: 5
725741909822562304
Users added!
Count: 4
392212735
Users added!
Count: 3
3193455540
Users added!
Count: 2
29112110
Users added!
Count: 1
4580002464
User skipped!
Count: 10
2867978738
Users added!
Count: 9
909428575270350859
Users added!
Count: 8
933098376723230722
Users added!
Count: 7
875744359160872960
Users added!
Count: 6
924315535004880898
Users added!
Count: 5
2934566176
User skipped!
Count: 4
902950290042834946
Users added!
Count: 3
1131887528
Users added!
Count: 2
769197392679804928
Users added!
Count: 1


Users added!
Count: 5
792442371963293697
Users added!
Count: 4
827654988935610368
Users added!
Count: 3
25601183
Users added!
Count: 2
164346642
Users added!
Count: 1
403450037
Users added!
Count: 10
2796475822
Users added!
Count: 9
920630504906547203
Users added!
Count: 8
821420676766662656
Users added!
Count: 7
820891701564899328
Users added!
Count: 6
955162461421916165
Users added!
Count: 5
112532110
Users added!
Count: 4
3223426134
Users added!
Count: 3
616737225
Users added!
Count: 2
878747000
Users added!
Count: 1
4753817673
Users added!
Count: 10
277094043
Users added!
Count: 9
548384458
Users added!
Count: 8
764187611116208128
Users added!
Count: 7
890592151335190528
Users added!
Count: 6
766447044890333185
Users added!
Count: 5
21757965
Users added!
Count: 4
216270430
Users added!
Count: 3
1345772725
Users added!
Count: 2
850955642592309248
Users added!
Count: 1
802649176299466752
Users added!
Count: 10
3265222782
Users added!
Count: 9
825895510724771843
Users added!
Count: 8


Users added!
Count: 5
41689584
Users added!
Count: 4
548384458
Users added!
Count: 3
74790676
Users added!
Count: 2
60087571
Users added!
Count: 1
843884005569302533
Users added!
Count: 10
25695193
Users added!
Count: 9
794358888074199046
User skipped!
Count: 8
2487526194
User skipped!
Count: 7
38029205
Users added!
Count: 6
21222599
Users added!
Count: 5
2335899655
Users added!
Count: 4
3167508269
Users added!
Count: 3
842550390818201600
Users added!
Count: 2
830994691
Users added!
Count: 1
101491032
Users added!
Count: 10
24393751
Users added!
Count: 9
824359737055735813
Users added!
Count: 8
855240223432769538
Users added!
Count: 7
461697741
Users added!
Count: 6
205392468
Users added!
Count: 5
16076032
Users added!
Count: 4
78116670
Users added!
Count: 3
17006157
Users added!
Count: 2
43412697
Users added!
Count: 1
3342812001
Users added!
Count: 10
2374247370
Users added!
Count: 9
45055696
Users added!
Count: 8
289548939
Users added!
Count: 7
757303975
Users added!
Count: 6
1306498

User skipped!
Count: 9
892566309761818625
User skipped!
Count: 8
1611685532
User skipped!
Count: 7
829977013888782338
User skipped!
Count: 6
1607408977
User skipped!
Count: 5
813178646366142465
User skipped!
Count: 4
344747525
User skipped!
Count: 3
18948541
User skipped!
Count: 2
28344432
User skipped!
Count: 1
1612067526
User skipped!
Count: 10
257691095
User skipped!
Count: 9
753057467496558592
User skipped!
Count: 8
842025891093991425
User skipped!
Count: 7
27980159
User skipped!
Count: 6
12354832
User skipped!
Count: 5
41697270
User skipped!
Count: 4
332638587
User skipped!
Count: 3
771778156424491009
User skipped!
Count: 2
63514682
User skipped!
Count: 1
799650593048526848
User skipped!
Count: 10
2416354051
User skipped!
Count: 9
104016988
User skipped!
Count: 8
132496568
User skipped!
Count: 7
30354991
User skipped!
Count: 6
165552004
User skipped!
Count: 5
16051471
User skipped!
Count: 4
292083207
User skipped!
Count: 3
34097113
User skipped!
Count: 2
786309892990574592
User sk

Users added!
Count: 5
186681343
Users added!
Count: 4
2480936977
Users added!
Count: 3
293338829
Users added!
Count: 2
268982264
Users added!
Count: 1
2853322953
Users added!
Count: 10
410245106
Users added!
Count: 9
47098558
Users added!
Count: 8
17487795
Users added!
Count: 7
142849883
Users added!
Count: 6
25535595
Users added!
Count: 5
14750983
Users added!
Count: 4
80612021
Users added!
Count: 3
36948268
Users added!
Count: 2
89820928
Users added!
Count: 1
1428659995
Users added!
Count: 10
42089546
Users added!
Count: 9
916517756257865728
Users added!
Count: 8
2256901579
Users added!
Count: 7
95956928
Users added!
Count: 6
19558732
Users added!
Count: 5
14529929
Users added!
Count: 4
506420950
Users added!
Count: 3
247560825
Users added!
Count: 2
19671129
Users added!
Count: 1
2918161040
Users added!
Count: 10
3837622278
Users added!
Count: 9
717149686176550912
Users added!
Count: 8
1933042237
Users added!
Count: 7
15472373
Users added!
Count: 6
825524430738694146
Users added!
Cou

Users added!
Count: 3
812101377057030149
Users added!
Count: 2
947956236804337664
Users added!
Count: 1
65497475
Users added!
Count: 10
1164221954
Users added!
Count: 9
593083543
Users added!
Count: 8
232650394
Users added!
Count: 7
15735939
Users added!
Count: 6
799047255378391040
Users added!
Count: 5
347618514
Users added!
Count: 4
47379354
Users added!
Count: 3
15464697
Users added!
Count: 2
4506499473
Users added!
Count: 1
856596564516651008
Users added!
Count: 10
90723356
Users added!
Count: 9
222642262
Users added!
Count: 8
254873869
Users added!
Count: 7
303898942
Users added!
Count: 6
903990151147208708
Users added!
Count: 5
87227050
Users added!
Count: 4
23150370
Users added!
Count: 3
15066876
Users added!
Count: 2
177661775
Users added!
Count: 1
36412963
Users added!
Count: 10
4715952374
Users added!
Count: 9
212657985
Users added!
Count: 8
2771345762
Users added!
Count: 7
17072707
Users added!
Count: 6
31182139
Users added!
Count: 5
16700555
Users added!
Count: 4
8526178847

Users added!
Count: 5
929752815559462914
Users added!
Count: 4
2303447282
Users added!
Count: 3
937689794200719360
Users added!
Count: 2
3330193385
Users added!
Count: 1
585636409
Users added!
Count: 10
195138591
Users added!
Count: 9
175085755
Users added!
Count: 8
952219372730318848
Users added!
Count: 7
2314105303
Users added!
Count: 6
23013131
Users added!
Count: 5
2442456596
Users added!
Count: 4
18166778
Users added!
Count: 3
36715753
Users added!
Count: 2
835627637049450497
Users added!
Count: 1
24822993
Users added!
Count: 10
715934174
Users added!
Count: 9
778427250492313600
Users added!
Count: 8
85124669
Users added!
Count: 7
106021590
Users added!
Count: 6
2557564489
Users added!
Count: 5
289548939
Users added!
Count: 4
949895262964563968
Users added!
Count: 3
131810343
Users added!
Count: 2
89887215
Users added!
Count: 1
234186830
Users added!
Count: 10
149086569
Users added!
Count: 9
2496335227
Users added!
Count: 8
104051410
Users added!
Count: 7
825518870421180416
Users 

Users added!
Count: 1
363659029
Users added!
Count: 10
227490812
Users added!
Count: 9
826251192967823361
Users added!
Count: 8
27433580
Users added!
Count: 7
38936142
Users added!
Count: 6
2960441091
Users added!
Count: 5
274440617
Users added!
Count: 4
701725963
Users added!
Count: 3
476256944
Users added!
Count: 2
924744084476059648
Users added!
Count: 1
136550204
Users added!
Count: 10
820380004643061760
Users added!
Count: 9
22254470
Users added!
Count: 8
776173080846622720
Users added!
Count: 7
1622049547
Users added!
Count: 6
2439032786
Users added!
Count: 5
737154821086543873
Users added!
Count: 4
411446545
Users added!
Count: 3
1579303014
Users added!
Count: 2
51139453
Users added!
Count: 1
820733745149792257
Users added!
Count: 10
2413314535
Users added!
Count: 9
953350592210046976
Users added!
Count: 8
825518870421180416
Users added!
Count: 7
132339474
Users added!
Count: 6
133938408
Users added!
Count: 5
95956928
Users added!
Count: 4
3924002953
Users added!
Count: 3
233589

In [54]:
# Load data
republican_data_file_2 = 'data_files/republican_2.txt'
democrat_data_file_2 = 'data_files/democrat_2.txt'

republican_data_2 = read_data_file(republican_data_file_2)
democrat_data_2 = read_data_file(democrat_data_file_2)

print(len(republican_data_2))
print(len(democrat_data_2))

119
218


In [49]:
# Example
print(republican_data_2[0])

897912869907562498 298602588 716248216996560897 969785446543872001 385663346 477803115 2988631277 2244845930 978006911990796289 975682208668274689 713706813762498560 927924718681907200 825804900441415680 451136428 974279815703552002 18500852 956324596441075719 789657362114105344 959828625150357506 3331840661 841268641601585153 25785311 293221626 880827295530975232 78968880 70879844 787141837286875137 916783567090454528 399961663 3713508623 954579633957167105 975891993732796418 134604426 951392574333423616 948989136756772864 974730984699678721 976494947368079361 28842043 736490974340349952 940881677827665921 808753301135171584 173661475 950752853261303808 956325473885200384 938149362789691393 975447254483062786 871142323920527360 1318523322 953802494332809216 777618850460344321 182917963 3545293697 959987559978774528 449473836 797248422562697216 554783472 839543825609736202 969542637857501184 28175561 860833064 3842592924 538370634 4862357050 123510096 3390446901 967927974006808576 1644

In [51]:
from gensim.models.doc2vec import Doc2Vec, TaggedDocument

class TaggedDocumentList:
    def __init__(self, data, politicians):
        self.data = data
        self.politicians = politicians
        
    def __iter__(self):
        for i in range(len(self.data)):
            yield TaggedDocument(words=self.data[i].split(), tags=[self.politicians[i][1]])

In [53]:
import random

republican_corpus_2 = list(TaggedDocumentList(republican_data_2, "republican"))
democrat_corpus_2 = list(TaggedDocumentList(democrat_data_2, "democrat"))

random.shuffle(republican_corpus_2)
random.shuffle(democrat_corpus_2)

corpus_train_3 = republican_corpus_2[len(republican_corpus_2)//10:] + democrat_corpus_2[len(democrat_corpus_2)//10:]
corpus_test_3 = republican_corpus_2[:len(republican_corpus_2)//10] + democrat_corpus_2[:len(democrat_corpus_2)//10]
print(len(corpus_train_3))
print(len(corpus_test_3))

IndexError: string index out of range

In [42]:
import numpy as np
from sklearn.decomposition import PCA
from matplotlib import pyplot
#get model, we use w2v only

words_np = []
#a list of labels (words)
words_label = []
for word in model.wv.vocab.keys():
    words_np.append(model.wv[word])
    words_label.append(word)
print('Added %s words. Shape %s'%(len(words_np),np.shape(words_np)))

X = model[model.wv.vocab]

pca = PCA(n_components=2)
result = pca.fit_transform(X)

pyplot.scatter(result[:,0], result[:,1])
words = list(model.wv.vocab
            )
for i,word in enumerate(words):
        pyplot.annotate(word, xy=(result[i,0], result[i,1]))
pyplot.show()

Added 122173 words. Shape (122173, 200)


KeyboardInterrupt: 