In [76]:

import pandas as pd
import re
import numpy as np
import pandas as pd
from pprint import pprint
from collections import Counter

#nltk stopwords
from nltk.corpus import stopwords


# Gensim
import gensim
import gensim.corpora as corpora
from gensim.utils import simple_preprocess
from gensim.models import CoherenceModel

# spacy for lemmatization
import spacy

#TF-IDF
from sklearn.feature_extraction.text import TfidfTransformer,CountVectorizer

# Enable logging for gensim - optional
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.ERROR)

import warnings
warnings.filterwarnings("ignore",category=DeprecationWarning)

### Read Posts

In [77]:
name = "LatinosForTrump"
posts1 = pd.read_csv(name+"1.csv")
posts2 = pd.read_csv(name+"2.csv")
candidate = pd.concat([posts1,posts2],axis=0)
candidate.head(2)

Unnamed: 0,Group Name,User Name,Facebook Id,Likes at Posting,Created,Type,Likes,Comments,Shares,Love,...,Message,Link,Final Link,Image Text,Link Text,Description,Sponsor Id,Sponsor Name,Total Interactions,Total Interactions (weighted — Likes 1x Shares 1x Comments 1x Love 1x Wow 1x Haha 1x Sad 1x Angry 1x Care 1x )
0,#WalkAway Campaign,,1945356878817544,473251.0,2020-09-20 10:14:06 EDT,Photo,17315,21844,2248,9780,...,Update: We've been overwhelmed with love and s...,https://www.facebook.com/photo.php?fbid=101592...,,,,,,,87280,87280
1,#WalkAway Campaign,,1945356878817544,403749.0,2020-08-28 19:09:31 EDT,Photo,14404,2267,1666,7915,...,My husband was in California this week on busi...,https://www.facebook.com/photo.php?fbid=101021...,,,,,,,26518,26518


In [78]:
candidate.columns

Index(['Group Name', 'User Name', 'Facebook Id', 'Likes at Posting', 'Created',
       'Type', 'Likes', 'Comments', 'Shares', 'Love', 'Wow', 'Haha', 'Sad',
       'Angry', 'Care', 'Video Share Status', 'Post Views', 'Total Views',
       'Total Views For All Crossposts', 'Video Length', 'URL', 'Message',
       'Link', 'Final Link', 'Image Text', 'Link Text', 'Description',
       'Sponsor Id', 'Sponsor Name', 'Total Interactions',
       'Total Interactions (weighted  —  Likes 1x Shares 1x Comments 1x Love 1x Wow 1x Haha 1x Sad 1x Angry 1x Care 1x )'],
      dtype='object')

## Filter the most relevant columns

In [79]:
content = candidate[['Message','Description','Link','Group Name','Likes', 'Comments', 'Shares', 'Love', 'Wow', 'Haha', 'Sad',
       'Angry', 'Care','Total Interactions']].copy()
content.shape

(17007, 14)

### Add column to concat Message and Description

In [80]:
concat_message = content['Message'].fillna('') + (' ' + content['Description']).fillna('') + (' ' + content['Link']).fillna('')
content['MessageDescr'] = concat_message

### Just keep those groups that have Trump or Republican in their name

In [81]:
groups_candidate = content['Group Name'].str.lower().reset_index()
final_groups = groups_candidate[groups_candidate['Group Name'].str.contains(terms)].copy()
final_groups = final_groups['Group Name'].unique()
final_groups = pd.DataFrame(final_groups)
final_groups.columns = ['Group Name']
final_groups.to_csv(name+"_groups.csv")
final_groups.shape

(1394, 1)

In [82]:
content['Group Name'] = content['Group Name'].str.lower()
content.shape

(17007, 15)

### Keep the posts from the preselected groups

In [83]:
content = content[content['Group Name'].isin(final_groups['Group Name'])].copy()
content.shape

(6525, 15)

## Tokenize words and Clean-up text

In [84]:
content['cleanMessage'] = content['MessageDescr'].apply(lambda x: re.split('https:\/\/.*', str(x))[0])
content['cleanMessage'].shape

(6525,)

In [85]:
# Convert to list
data = content.cleanMessage.tolist()

In [86]:
# Remove new line characters
data = [re.sub('\s+', ' ', sent) for sent in data]

# Remove distracting single quotes
data = [re.sub("\'", "", sent) for sent in data]

In [87]:
def sent_to_words(sentences):
    for sentence in sentences:
        yield(gensim.utils.simple_preprocess(str(sentence), deacc=True))  # deacc=True removes punctuations

data_words = list(sent_to_words(data))

print(data_words[:1])

[['latinos', 'for', 'trump', 'midland', 'texas']]


#### Creating Bigram and Trigram Models

In [88]:
# Build the bigram and trigram models
bigram = gensim.models.Phrases(data_words, min_count=5, threshold=100) # higher threshold fewer phrases.
trigram = gensim.models.Phrases(bigram[data_words], threshold=100)  

# Faster way to get a sentence clubbed as a trigram/bigram
bigram_mod = gensim.models.phrases.Phraser(bigram)
trigram_mod = gensim.models.phrases.Phraser(trigram)

# See trigram example
print(trigram_mod[bigram_mod[data_words[0]]])

['latinos', 'for', 'trump', 'midland', 'texas']


In [89]:
stop_words = stopwords.words('english')
stop_words.extend(stopwords.words('spanish'))
stop_words.extend(['su','sus','al','de','en','el'])
stop_words.extend(['like','would','get','many','much'])

## Remove Stopwords, Make Bigrams and Lemmatize

In [90]:
# Define functions for stopwords, bigrams, trigrams and lemmatization
def remove_stopwords(texts,stop_words_lang):
    return [[word for word in simple_preprocess(str(doc)) if word not in stop_words_lang] for doc in texts]

def make_bigrams(texts):
    return [bigram_mod[doc] for doc in texts]

def make_trigrams(texts):
    return [trigram_mod[bigram_mod[doc]] for doc in texts]

def lemmatization(texts, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV']):
    """https://spacy.io/api/annotation"""
    texts_out = []
    for sent in texts:
        doc = nlp(" ".join(sent)) 
        texts_out.append([token.lemma_ for token in doc if token.pos_ in allowed_postags])
    return texts_out

In [92]:
# Remove Stop Words
data_words_nostops = remove_stopwords(data_words,stop_words)

# Form Bigrams
data_words_bigrams = make_bigrams(data_words_nostops)

# Initialize spacy 'en' model, keeping only tagger component (for efficiency)
#!python3 -m spacy download en
nlp = spacy.load('en', disable=['parser', 'ner'])

# Do lemmatization keeping only noun, adj, vb, adv
data_lemmatized = lemmatization(data_words_bigrams, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV'])

print(data_lemmatized[:2])

[[], ['parade', 'today']]


## Bag of Words

In [95]:
def combine_words(word1,word2,dictionary):

    if word1 in dictionary and word2 in dictionary:
        concat_name = word1+" / "+word2
        dictionary[concat_name] = dictionary[word1] + dictionary[word2]
        dictionary.pop(word1, None)
        dictionary.pop(word2, None)
    return dictionary

In [96]:
flat_list_words = [item for sublist in data_lemmatized for item in sublist]
count_words = Counter(flat_list_words)

In [97]:
final_dict = combine_words('donald','trump',count_words)
final_dict = combine_words('joe','biden',count_words)
final_dict = combine_words('kamala','harris',count_words)
final_dict = combine_words('american','america',count_words)
final_dict = combine_words('estados','unidos',count_words)
final_dict = combine_words('voters','vote',count_words)
final_dict = combine_words('mexico','mexican',count_words)
final_dict = combine_words('casa','blanca',count_words)
final_dict = combine_words('venezuela','venezuelans',count_words)
final_dict = combine_words('latino','latinos',count_words)
final_dict = combine_words('american / america','americans',count_words)
final_dict = combine_words('voters / vote','voting',count_words)
final_dict = combine_words('puerto','rico',count_words)
final_dict = combine_words('communism','communist',count_words)
final_dict = combine_words('ee','uu',count_words)
final_dict = combine_words('united','states',count_words)

In [98]:
final_dict.most_common(1000)

[('donald / trump', 7595),
 ('latino / latinos', 3164),
 ('great', 1736),
 ('support', 1662),
 ('vote', 1633),
 ('people', 1499),
 ('roundtable', 1220),
 ('live', 1191),
 ('make', 1177),
 ('woman', 1164),
 ('time', 1068),
 ('know', 1038),
 ('president', 1011),
 ('say', 991),
 ('black', 916),
 ('night', 897),
 ('american', 879),
 ('watch', 867),
 ('hold', 867),
 ('flag', 845),
 ('love', 819),
 ('event', 804),
 ('want', 784),
 ('rally', 760),
 ('man', 741),
 ('go', 720),
 ('group', 716),
 ('see', 688),
 ('election', 671),
 ('world', 666),
 ('keep', 658),
 ('get', 658),
 ('line', 656),
 ('show', 654),
 ('medium', 642),
 ('host', 622),
 ('thousand', 613),
 ('voter', 606),
 ('today', 576),
 ('pray', 568),
 ('tell', 544),
 ('joe / biden', 529),
 ('text', 521),
 ('come', 520),
 ('good', 520),
 ('last', 516),
 ('first', 516),
 ('leave', 511),
 ('community', 509),
 ('even', 504),
 ('well', 499),
 ('honest', 481),
 ('country', 471),
 ('latinosfortrump', 450),
 ('back', 437),
 ('participant', 437

## TD-IDF

In [99]:
docs = ([' '.join(ele) for ele in data_words_nostops] )

In [100]:
cv=CountVectorizer() 
word_count_vector=cv.fit_transform(docs)
word_count_vector.shape

(6525, 12581)

In [101]:
tfidf_transformer=TfidfTransformer(smooth_idf=True,use_idf=True) 
tfidf_transformer.fit(word_count_vector)

TfidfTransformer()

In [102]:
# print idf values 
df_idf = pd.DataFrame(tfidf_transformer.idf_, index=cv.get_feature_names(),columns=["idf_weights"]) 
 
# sort ascending 
df_idf.sort_values(by=['idf_weights'])

Unnamed: 0,idf_weights
trump,1.170864
latinos,1.326743
president,1.752489
donald,2.261690
florida,2.563907
...,...
landed,9.090402
lancaster,9.090402
lanai,9.090402
lauderdale,9.090402


In [103]:
# count matrix 
count_vector=cv.transform(docs) 
 
# tf-idf scores 
tf_idf_vector=tfidf_transformer.transform(count_vector)

In [104]:
feature_names = cv.get_feature_names() 
 
#get tfidf vector for first document 
first_document_vector=tf_idf_vector[0] 
 
#print the scores 
df = pd.DataFrame(first_document_vector.T.todense(), index=feature_names, columns=["tfidf"]) 
df = df[(df != 0).all(1)]
df.sort_values(by=["tfidf"],ascending=False).to_csv(name+"_tfidf.csv")

In [105]:
df

Unnamed: 0,tfidf
latinos,0.137237
midland,0.845524
texas,0.501585
trump,0.121113


In [106]:
word = df.index[0]
content["lowercase"] = content['MessageDescr'].apply(lambda x: x.lower())
word_messages = content[content['lowercase'].str.contains(word)]

word_messages = word_messages['lowercase'].value_counts(ascending=False).rename_axis('unique_messages').reset_index(name='counts')
word_messages.to_csv(word +"_"+ name+"_messages.csv")
word_messages

Unnamed: 0,unique_messages,counts
0,watch live: president donald trump holds lati...,201
1,watch live: president trump hosts latinos for...,162
2,🚨 watch this 🚨\n\nthousands of hispanics gath...,134
3,kamala harris stops by 2 florida\nrestaurants...,130
4,live: president donald trump holds latinos fo...,127
...,...,...
2251,whoa!!!!! trump 2020 🇺🇸❤🇺🇸 > trump51 - nationa...,1
2252,biden is once again spending a leisurely day a...,1
2253,#latinosfortrump will flip new mexico red for...,1
2254,latinos for trump at conservative grounds in l...,1


### Repeated messages

In [107]:
content['Total Interactions']=content['Total Interactions'].astype(str).str.replace(',', '').astype(int)
results = content.groupby(['MessageDescr']).aggregate({'MessageDescr': 'count','Likes': 'sum','Comments': 'sum', 'Shares': 'sum', 'Love': 'sum', 'Wow': 'sum', 'Haha': 'sum', 'Sad': 'sum',
       'Angry': 'sum', 'Care': 'sum','Total Interactions':'sum'})
results.rename(columns={"MessageDescr": "Counts"}, inplace=True)


results.to_csv("mostpopularLatinosFor"+name+".csv")

In [108]:
results.sort_values(by='Total Interactions',ascending=False)

Unnamed: 0_level_0,Counts,Likes,Comments,Shares,Love,Wow,Haha,Sad,Angry,Care,Total Interactions
MessageDescr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
"Kamala Harris stops by 2 Florida\nrestaurants but was shocked, she was met by Latinos for Trump who oppose her socialism.\n(Spanish news)\n#latinosfortrump \n#lexit https://www.facebook.com/RedWaveArmyUSA/videos/948812358956819/",130,4617,726,1840,1337,24,522,2,16,30,9114
"https://www.thegatewaypundit.com/2020/10/miami-pd-estimates-30000-cars-participated-anti-communist-latinos-trump-caravan-south-florida-video/?utm_source=Twitter&utm_medium=PostTopSharingButtons&utm_campaign=websitesharingbuttons Latinos in South Florida love President Trump. Latinos from Nicaragua, Venezuela and Cuba living in South Florida know the dangers of Socialism and Communism which is why they reject Joe Biden and Kamala Harris. Thousands of cars participated in Saturday’s anti-Communist, Latinos for Trump caravan... https://www.thegatewaypundit.com/2020/10/miami-pd-estimates-30000-cars-participated-anti-communist-latinos-trump-caravan-south-florida-video/?utm_source=Twitter&utm_medium=PostTopSharingButtons&utm_campaign=websitesharingbuttons",3,4022,493,2226,1684,119,10,0,1,25,8580
🚨 WATCH THIS 🚨\n\nTHOUSANDS of Hispanics gathered in Miami today for an anti-socialism caravan and concert in support of President Donald J. Trump.\n\nLatino enthusiasm for President Trump is very real—and it is growing! https://www.facebook.com/LatinosForTrump/videos/708587353085197/,134,1712,176,458,600,23,2,0,1,17,2989
"Miami PD Estimates More Than 30,000 Cars Participated in Anti-Communist, Latinos For Trump Caravan in South Florida (VIDEO) https://www.thegatewaypundit.com/2020/10/miami-pd-estimates-30000-cars-participated-anti-communist-latinos-trump-caravan-south-florida-video/?utm_source=Facebook",10,1334,236,473,662,44,2,1,0,8,2760
"Latinos in South Florida love President Trump. Latinos from Nicaragua, Venezuela and Cuba living in South Florida know the dangers of Socialism and Communism which is why they reject Joe Biden and Kamala Harris. Thousands of cars participated in Saturday’s anti-Communist, Latinos for Trump caravan... https://www.thegatewaypundit.com/2020/10/miami-pd-estimates-30000-cars-participated-anti-communist-latinos-trump-caravan-south-florida-video/",24,1170,121,578,418,23,0,0,0,6,2316
...,...,...,...,...,...,...,...,...,...,...,...
"[https://www.facebook.com/events/319950532647266/](https://www.facebook.com/events/319950532647266/) It's on: The Struggle for the Future\n\n\nWill it be fascist \n\nOr, will we act on our conscience?\n\n\nOctober 3rd We Begin\n\n\n\nNon-Violent Protests that Continue Day After Day \n\nFrom Now Through the Elections \n\nUnited by the Demand: \n\nTrump/Pence OUTNOW!\n\n\n\nWe face a rolling coup barreling to a showdown on November 3rd. The Trump/Pence regime have said and shown that they will not abide by an election they lose. \n\n\n\nIt is the regime—Trump, Pence, Barr—who have raised the specter of insurrection and sedition. It is the regime that is tearing up the rule of law. It is the regime that demonizes and brutalizes immigrants, Black people, Latinos, Native Americans, women, LGBTQ people, the media and peaceful protesters. It is the regime that opposes science, leading to a country and a planet burning and drowning, and to unconscionable unnecessary death and illness from Covid. \n\n\n\nWe face a critical crossroads. The hour is late, but not too late. Relying only on our voting will lead to disaster. The Trump/Pence regime is subverting the election now and preparing to nullify it. Waiting to act till they do so will be too late. \n\n\n\nTrump's MAGA mobs maraud around the country in water and land caravans; they murder protesters and innocent people by guns and by their cars. They spread Covid. They are inflamed with passionate reactionary intensity. All for the purpose of rousing their followers and intimidating and demoralizing all who should be standing up for justice. \n\n\nNow we must act. Beginning Saturday, October 3rd, gather and bring your family, friends, and community into the public squares of cities and towns across the country in non-violent protest unified by the demand: Trump/Pence OUTNOW! And, come back in waves, day after day. This is possible. We saw this in June and early July where people came out every day for justice for Black Lives. Now, that struggle, and every struggle, and the lives of millions of people hang in the balance. In our diversity, with our multiplicity of causes and demands we must come together and reach out to others to join us, recognizing that should the Trump/Pence regime remain in power it will be a disaster for every just cause, and most of all, for the lives of people here and around the world. \n\n\n\nWe will act on our principles, modeling the world we aspire to in contrast to the bigotry and hatred of this fascist regime. We will act together with creative joy and with the conviction and courage of standing together not only for ourselves but for all humanity. In the Name of Humanity We Refuse to Accept a Fascist America - Trump/Pence Out NOW! https://www.facebook.com/events/319950532647266/",1,0,0,0,0,0,0,0,0,0,0
Latinos for Trump in Las Vegas https://www.youtube.com/watch?v=wqWZGt2-p9E&feature=share,1,0,0,0,0,0,0,0,0,0,0
"[https://www.facebook.com/410081532814093/videos/1596602500539490/](https://www.facebook.com/410081532814093/videos/1596602500539490/?__cft__[0]=AZUOt1qqMaIpTm4MPapDkv9SeBitC8VlmW_kHI8x-osL5mqYyr5ud7kW5fUsqCW9lgc8TdPk4meeLUMcJDEpyJ57KcvQsDHFeVvMDfua7VvJPo3kYx9SZbjClCCrlSdKZA1IUzvmK8_5bqBjm7VMWBQx62U0Fy3-ws9h9_wXlFnMMoI3ztW42eSs-inMgkOGDUuOA8Lqu87J8TzVt4wJo4FA&__tn__=-UK-R) Trump /Biden debate high lights with laugh track -Simply relying on voting to oust this regime will almost certainly lead to very bad, even disastrous results. This is especially true given what this regime is already doing, and what Trump is saying, in relation to the election.\n\nThrough completely unfounded attacks on voting by mail, and preparation to intimidate and obstruct Black people and Latinos attempting to exercise their right to vote, moves are already underway by the Trump/Pence regime and its supporters to suppress the votes of those who are likely to vote against Trump. As he did in the 2016 election, Trump has already indicated his likely refusal to accept the result of the election this time if he is not the winner. And now Trump has openly “floated” the idea of “delaying” the election.\n\nGiven what Trump has already done, and what he has blatantly declared, as much as it is horrifying it is also very realistic to envision this regime stationing storm troopers, loyal to this regime, in cities all across this country—viciously moving to suppress any expression of resistance or opposition—with the approach of the election, and continuing beyond that.\n\nThe ongoing pandemic, or executive orders to quell “civil disorder” (i.e., protests) in many parts of the country, could also serve as pretexts to “postpone” the election, perhaps indefinitely.\n\nAnd it is certainly not unthinkable that Trump would move to create a “national emergency”—for example, by carrying out acts of war, against Iran or possibly even China—in order to further institute even more extreme repressive conditions, with even greater numbers of para-military storm troopers occupying cities, in order either to cancel (or indefinitely “delay”) the election, or to control the voting and the results of the election if it is held.\n\nIt is of critical importance to continue to build resistance, right now and in an increasingly powerful way, against any and all repressive moves by Trump, including by building mass opposition to this regime’s attempts at voter suppression and through mass mobilization in support and defense of those who are targets of such suppression.\n\nWith the full awareness of what is represented by this fascist regime, and what it means that Trump is not only seeking to suppress the votes of people who will vote against him but is also preparing to utilize forceful, violent repression to remain in office if he is not declared the winner in the election, it is of critical and urgent importance to build now truly massive and sustained mobilization around the unifying demand that this regime must be OUT NOW!—with an orientation of being prepared to continue this even past the election, if the situation requires it.\n\nFrom the first days of the Trump/Pence regime, Refuse Fascism has been calling out the fascism of this regime and calling for the mass mobilization to drive out this regime that is now, all the more urgently, required. It would have been very good—it could have made a real difference—if all those who hate this regime but failed, or refused, to recognize its actual fascist nature and the great danger to humanity it poses as such, had much earlier responded to and actively taken up this call by Refuse Fascism. Now, finally, there is a growing recognition, and increasing discussion, about the “authoritarian” nature of this regime, and even the use of the term “fascism” to describe it. (As I have pointed out, on the part of many, this is a case of “Oh, now they’re saying” this is fascism, as if that has just become the case. But, with an understanding of the profound stakes involved, it is important to recognize that now is better than never.) The hour is getting late—but it is still not too late to make this mass mobilization a reality. Relying on, and confining actions within, the “norms” and “regular channels” of this system, including the upcoming election, cannot solve this profound and urgent problem, especially when dealing with a fascist regime and its fanatical followers that are determined to trample on and tear up those “norms.” https://www.facebook.com/410081532814093/videos/1596602500539490/",3,0,0,0,0,0,0,0,0,0,0
[https://therightscoop.com/massive-latinos-for-trump-caravan-takes-place-in-miami-but-almost-no-media-attention/?fbclid=IwAR1KHcbiiJB-6V2C-Sp8H7Bmo114ZxyESdQt9rwIKV5ElGua06cuHI8bJM4](https://therightscoop.com/massive-latinos-for-trump-caravan-takes-place-in-miami-but-almost-no-media-attention/?fbclid=IwAR1KHcbiiJB-6V2C-Sp8H7Bmo114ZxyESdQt9rwIKV5ElGua06cuHI8bJM4) UPDATE: The group Cubans4Trump has told TRS that they are the ones who organized the the Trump caravan yesterday. They note the group Latinos for Trump coordinated with them in Orlando only. https://therightscoop.com/massive-latinos-for-trump-caravan-takes-place-in-miami-but-almost-no-media-attention/?fbclid=IwAR1KHcbiiJB-6V2C-Sp8H7Bmo114ZxyESdQt9rwIKV5ElGua06cuHI8bJM4,1,0,0,0,0,0,0,0,0,0,0
