# Part 1: Data import and cleaning

In [1]:
#import initial libraries

import pandas as pd
import numpy as np

In [2]:
#import data (duplicate Tweets already removed using Twarc)

df = pd.read_csv("data/navalny.csv")

  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
# look at basic info about data

df.info()

# 85,382 Tweeets on keyword search "navalny" on Sunday of weekend of big protests in Russia 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 85382 entries, 0 to 85381
Data columns (total 37 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   id                            85382 non-null  int64  
 1   tweet_url                     85382 non-null  object 
 2   created_at                    85382 non-null  object 
 3   parsed_created_at             85382 non-null  object 
 4   user_screen_name              85382 non-null  object 
 5   text                          85382 non-null  object 
 6   tweet_type                    85382 non-null  object 
 7   coordinates                   3 non-null      object 
 8   hashtags                      21391 non-null  object 
 9   media                         7274 non-null   object 
 10  urls                          12356 non-null  object 
 11  favorite_count                85382 non-null  int64  
 12  in_reply_to_screen_name       10206 non-null  object 
 13  i

In [4]:
# count number of languages in data set

count_lang = df['lang'].unique()
print(len(count_lang), count_lang)

51 ['en' 'es' 'und' 'pt' 'ru' 'de' 'fr' 'tr' 'it' 'ca' 'ro' 'pl' 'in' 'ja'
 'ar' 'nl' 'fa' 'ht' 'hi' 'uk' 'zh' 'hu' 'et' 'cs' 'th' 'sv' 'fi' 'no'
 'lv' 'da' 'is' 'bg' 'eu' 'sl' 'el' 'sr' 'ur' 'tl' 'lt' 'ko' 'vi' 'bn'
 'mr' 'ckb' 'cy' 'iw' 'ps' 'ta' 'gu' 'ne' 'te']


In [5]:
# tweets are in 51 different languages

# I'll be working only with Tweets in English
# drop tweets in all other languages
# now working with 38884 Tweets 

df = df[df.lang == 'en']
df.shape

# after non-English removed, just 58,071 Tweets left 

(58071, 37)

In [6]:
# drop unnecessary columns 

df = df.drop(['tweet_url', 'created_at', 'media', 'urls','in_reply_to_screen_name',
       'in_reply_to_status_id', 'in_reply_to_user_id', 'retweet_or_quote_id',
       'retweet_or_quote_screen_name', 'retweet_or_quote_user_id', 'source',
       'user_created_at', 'user_name', 'user_verified', 'user_friends_count', 'user_listed_count',
       'user_statuses_count', 'user_default_profile_image', 'user_description',
       'user_favourites_count', 'user_followers_count', 'coordinates', 'lang', 'user_location', 'user_time_zone', 'user_urls', 'place'], axis=1)

In [11]:
# check end time & date of data

df.iloc[0]

# last Tweet downloaded 2021-01-25 02:28:56+00:00

id                                                  1353530261405913088
parsed_created_at                             2021-01-25 02:28:56+00:00
user_screen_name                                           TooheySpence
text                  .@newtgingrich, what the hell has happened to ...
tweet_type                                                      retweet
hashtags                                                            NaN
favorite_count                                                     4114
possibly_sensitive                                                  NaN
retweet_count                                                       637
user_id                                                      2329017175
Name: 0, dtype: object

In [10]:
# check start time & date of data 

df.iloc[-1]

# first Tweet on 2021-01-24 06:02:39+00:00

id                                                  1353221656437792768
parsed_created_at                             2021-01-24 06:02:39+00:00
user_screen_name                                            PINK1963DSW
text                  Russian police detain more than 2,000 people d...
tweet_type                                                      retweet
hashtags                                                            NaN
favorite_count                                                     5122
possibly_sensitive                                                  NaN
retweet_count                                                      2589
user_id                                                      2856133189
Name: 85381, dtype: object

# Part 2: Text processing for NLP 

In [12]:
# create variable for "text" column 

text = df['text'] 

In [13]:
# tokenize, remove stopwords, remove urls, lowercase, remove punctuation, remove numbers

# import necessary libraries: ntlk etc.

import string
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import TweetTokenizer
from nltk import pos_tag
from nltk.stem import WordNetLemmatizer 


stop = stopwords.words('english')

punc = list(set(string.punctuation))

def tokenizer(text):
    tokenizer = TweetTokenizer()
    tokens = tokenizer.tokenize(text)
    return tokens

def remove_url(text):
    url = re.compile(r"https?://\S+|www\.\S+")
    return url.sub(r'', text)

def process_text(text):
    text = remove_url(text)
    text = tokenizer(text)
    text = [word.lower() for word in text]
    text = [re.sub('[0-9]+', '', word) for word in text]
    text = [word for word in text if word not in punc]
    text = [word for word in text if word not in stop]
    text = [each for each in text if len(each) > 1]
    text = [word for word in text if ' ' not in word]
     
    return text

In [14]:
# apply text processing functions to text

df['processed_text'] = df['text'].apply(process_text)

In [15]:
# look at some of processed text

pd.set_option('display.max_colwidth', -1)
df['processed_text'][:20]

  This is separate from the ipykernel package so we can avoid doing imports until


0     [@newtgingrich, hell, happened, offensive, navalny, fights, democracy, corrupt, despot, seeks, silence, political, opponents, trump, threatens, democracy, corrupt, despot, seeks, retribution, silence, political, opponents]                             
1     [newt, offensive, wrong, trump, equivalent, putin, navalny]                                                                                                                                                                                                
3     [@newtgingrich, hell, happened, offensive, navalny, fights, democracy, corrupt, despot, seeks, silence, political, opponents, trump, threatens, democracy, corrupt, despot, seeks, retribution, silence, political, opponents]                             
4     [@teamnavalny, @navalny, checkout, ice, rink]                                                                                                                                                                               

In [16]:
# part-of-speech tagging 

ready_for_pos = df['processed_text']

def pos_tagging(text):
    pos_tag = [pos_tag(word) for word in ready_for_pos]

df['pos_tagged'] = df.processed_text.apply(lambda x: pos_tag(x))

In [17]:
# lemmatizing

pos_tagged = df['pos_tagged']

wordnet = WordNetLemmatizer() 

lemmatized = [[wordnet.lemmatize(word[0]) for word in words] for words in pos_tagged]

In [18]:
pos_tagged = df['pos_tagged']

# lemmatizing

from nltk.stem import WordNetLemmatizer 
wordnet = WordNetLemmatizer() 

lemmatized = [[wordnet.lemmatize(word[0]) for word in words] for words in pos_tagged]

In [19]:
# look at lemmatized text

df['lemmatized'] = lemmatized
lemmatized[:20]

[['@newtgingrich',
  'hell',
  'happened',
  'offensive',
  'navalny',
  'fight',
  'democracy',
  'corrupt',
  'despot',
  'seek',
  'silence',
  'political',
  'opponent',
  'trump',
  'threatens',
  'democracy',
  'corrupt',
  'despot',
  'seek',
  'retribution',
  'silence',
  'political',
  'opponent'],
 ['newt', 'offensive', 'wrong', 'trump', 'equivalent', 'putin', 'navalny'],
 ['@newtgingrich',
  'hell',
  'happened',
  'offensive',
  'navalny',
  'fight',
  'democracy',
  'corrupt',
  'despot',
  'seek',
  'silence',
  'political',
  'opponent',
  'trump',
  'threatens',
  'democracy',
  'corrupt',
  'despot',
  'seek',
  'retribution',
  'silence',
  'political',
  'opponent'],
 ['@teamnavalny', '@navalny', 'checkout', 'ice', 'rink'],
 ['@newtgingrich',
  'fucking',
  'moron',
  'comparison',
  'involving',
  'navalny',
  'bloated',
  'mango',
  'corpse',
  'rotting',
  'mar',
  'lago',
  'would',
  'putin'],
 ['russia',
  'navalny',
  'protest',
  'kremlin',
  'hit',
  'west'

# Part 3: Topic modeling

In [20]:
# before vectorizing, cast lists of words back into strings

df['final_docs'] = df['lemmatized'].apply(lambda x: " ".join(x))
pd.set_option('display.max_colwidth', -1)
final_docs = df['final_docs']
final_docs[3000:3020]

  after removing the cwd from sys.path.


3467    newt offensive wrong trump equivalent putin navalny                                                                                                                                              
3468    @newtgingrich equating trump navalny reveals depth moral bankruptcy truly breathtaking                                                                                                           
3469    @newtgingrich fucking moron comparison involving navalny bloated mango corpse rotting mar lago would putin                                                                                       
3470    @newtgingrich fucking moron comparison involving navalny bloated mango corpse rotting mar lago would putin                                                                                       
3471    @newtgingrich comparing navalny trump vile insulting specious navalny risking countryman exact opposite trump navalny courage honor altruism one hair follicle trump entire lb body     

In [21]:
#create document term matrix with TFIDF

#import vectorizing tool (usee TFIDF)
from sklearn.feature_extraction.text import TfidfVectorizer
# set max_features to 2000 (specifies the number of most frequently occurring words for which we want to create feature vectors)
# set min_df to 5 (word must occur in at least 5 documents)
# set max_df to 0.85 (word must not occur in more than 85 percent of the documents) 

tfidfconverter = TfidfVectorizer(max_features=2000, min_df=5, max_df=0.85, ngram_range=(1, 2), stop_words='english')  
doc_term_matrix_1 = tfidfconverter.fit_transform(df['final_docs'].values.astype('U'))

In [22]:
#run NMF model 

#import NMF tool 
from sklearn.decomposition import NMF

nmf_model = NMF(n_components=6)
nmf_Z = nmf_model.fit_transform(doc_term_matrix_1)


In [24]:
# run LDA model

#import LDA tool 
from sklearn.decomposition import LatentDirichletAllocation

lda_model = LatentDirichletAllocation(n_components = 6, max_iter=10, learning_method='online', learning_decay=.7)
lda_Z = lda_model.fit_transform(doc_term_matrix_1)

In [25]:
def print_topics(model, vectorizer, top_n=10):
    for idx, topic in enumerate(model.components_):
        print("Topic %d:" % (idx))
        print([(vectorizer.get_feature_names()[i], topic[i])
                        for i in topic.argsort()[:-top_n - 1:-1]])
 
print("LDA Model:")
print_topics(lda_model, tfidfconverter )
print("=" * 30)
 
print("NMF Model:")
print_topics(nmf_model, tfidfconverter )
print("=" * 30)

LDA Model:
Topic 0:
[('protest', 1307.386433033172), ('russia', 1186.155136247537), ('putin', 1145.4338217469776), ('support', 955.6376575527966), ('russian', 927.2763382886401), ('people', 847.048493367766), ('opposition', 790.5448125640161), ('jailed', 658.8859105620048), ('alexei', 656.7037902618763), ('alexei navalny', 655.3131047349726)]
Topic 1:
[('protester', 1035.6604707927377), ('moscow', 972.4691464745188), ('police', 961.257725715466), ('navalny protester', 954.7064938831056), ('snowball', 927.0615953307649), ('police snowball', 904.4118034133913), ('snowball moscow', 885.4716942733882), ('pelting', 885.3518629188189), ('pelting police', 885.3518629188189), ('russia', 885.2501793871986)]
Topic 2:
[('trump', 411.6709622710228), ('regime', 294.8622443365178), ('democracy', 234.71843701463277), ('year', 233.97251457365644), ('putin', 231.4560406415081), ('old', 230.80009143862034), ('newtgingrich', 221.70458473351525), ('fear', 215.04277185316207), ('way', 213.7007341643761), (

In [26]:

topic_values = lda_model.transform(doc_term_matrix_1)
df['LDA1_topic'] = topic_values.argmax(axis=1)

In [33]:

topic_values = nmf_model.transform(doc_term_matrix_1)
df['NMF_topic'] = topic_values.argmax(axis=1)

In [27]:
df['text'][df.LDA1_topic ==0][:20]

4     @teamnavalny @navalny Checkout that ice rink                                                                                                                                                                                                                                                                           
8     Alexei Navalny is the most Russian Russian who ever lived. Utterly unafraid.                                                                                                                                                                                                                                           
11    If I see you defending Navalny, it's block on sight                                                                                                                                                                                                                                                                    
12    are also restricted from doing business 

In [28]:
df['text'][df.LDA1_topic ==1][:20]

10     Biden administration condemns Putin’s attack on the protesters in Russia and calls for the release of @navalny. Big change from you-know-who. Elections have consequences. Maybe even for Putin. \n#FreeNavalny. https://t.co/Gcp4hxG88S                                                                   
31     Alexei Navalny is the face of courage on the planet, as are the thousands of Russians who stand with him. Every champion of truth and freedom salutes them.                                                                                                                                                
61     @ImReadinHere Where do you people get your talking points?\n\nA secret decoder ring with keys in Rush Limbaugh ads?\n\nNavalny was born in the late 1970s in the Moscow Oblast. There’s less Nazism there than on the Temple Mount in Jerusalem.\n\nLearn the basics before you repeat big words. \n\n#MAGA
77     Alexei Navalny is the face of courage on the planet, as are the thousand

In [29]:
df['text'][df.LDA1_topic ==2][:20]

0     .@newtgingrich, what the hell has happened to you?\nThis is offensive.\n\nNavalny fights for democracy against a corrupt despot, who seeks to silence political opponents.\n\nTrump threatens democracy, is a corrupt despot, who seeks retribution on and to silence political opponents. https://t.co/z8pYuJIwot
1     Newt, this is offensive. And wrong. Trump is the equivalent of Putin. Not Navalny. https://t.co/3JyKX5i8fj                                                                                                                                                                                                        
3     .@newtgingrich, what the hell has happened to you?\nThis is offensive.\n\nNavalny fights for democracy against a corrupt despot, who seeks to silence political opponents.\n\nTrump threatens democracy, is a corrupt despot, who seeks retribution on and to silence political opponents. https://t.co/z8pYuJIwot
13    Newt, this is offensive. And wrong. Trump is the equiva

In [30]:
df['text'][df.LDA1_topic ==3][:20]

16     @newtgingrich Did Alexei Navalny try to get the VP murdered so he couldn't certify election results? Did he have insurrectionists beat police with the flag and did he have a cop murdered? Did he try to force a state Secretary of State to falsify election results? DID HE DO ANY OF THAT NEWT? https://t.co/2oT3Qu82jt
17     That disgusting supplicant, John Croyn, said that Biden should hold Russia accountable for the Navalny poisoning.  Do these liars and thieves ever hear their own hypocrisy.  Never did trump hold Russia accountable. Nor did any elected Republican. Bounties and attacks. STFU Croyn.                                   
34     @newtgingrich Trump tried to overturn a free and fair election. Navalny opposes the autocrat in Russia. This is just a stupid lie.                                                                                                                                                                                         
49     Biden should hold Russia

In [31]:
df['text'][df.LDA1_topic ==4][:20]

6      @newtgingrich You are a fucking moron.\n\nIn a comparison involving Navalny, that bloated mango corpse rotting at Mar a Lago would be Putin.                                                                                                                                                            
24     This is the Netherlands today.  A demonstration against #Lockdownnl . What if this scene was from yesterday's #Navalny Rally in Moscow,  #Russia ? \n\n https://t.co/d371FG1NhG                                                                                                                         
33     So boring. So typical. Blame the west for your failures. Did the west try to murder Navalny? Did the west steal your citizen’s money to build a Billion dollar secret palace for a “President” who plans to stay in power for decades? Maybe listen to your people for a change. https://t.co/OykQDz8IsH
40     @newtgingrich You are a fucking moron.\n\nIn a comparison involving Navalny, that

In [37]:
df['text'][df.LDA1_topic ==5][140:170]

2769    In the Siberian city of Irkutsk, which is ca. 5,200 km from Moscow, people are taking to the streets today, chanting: "We won't go!"\n\n#Navalny, Putin's most high-profile critic, called for protests after his arrest last weekend.\n\nhttps://t.co/NYqKxydNnP                                                            
2783    @revoltinghippie @brummer_brumm Navalny is a nationalist? Read this thread, if you please, and you will see that he is nothing but an opportunist and traitor. https://t.co/czBcn7WaQP                                                                                                                                       
2798    @YiSiping Well, from what I understand Navalny has a past mired in right-wing, anti-muslim views. The question begs: Who *are* these people? And what motivates them? I've yet to come across a clear explanation of this...                                                                                                 
2804    it seems to me

In [38]:
df['text'][df.NMF_topic ==0][170:200]

1567    Alexei Navalny is the face of courage on the planet, as are the thousands of Russians who stand with him. Every champion of truth and freedom salutes them.
1571    Alexei Navalny is the face of courage on the planet, as are the thousands of Russians who stand with him. Every champion of truth and freedom salutes them.
1573    Alexei Navalny is the face of courage on the planet, as are the thousands of Russians who stand with him. Every champion of truth and freedom salutes them.
1579    Alexei Navalny is the face of courage on the planet, as are the thousands of Russians who stand with him. Every champion of truth and freedom salutes them.
1581    Alexei Navalny is the face of courage on the planet, as are the thousands of Russians who stand with him. Every champion of truth and freedom salutes them.
1582    Alexei Navalny is the face of courage on the planet, as are the thousands of Russians who stand with him. Every champion of truth and freedom salutes them.
1588    Alexei N

In [39]:
df['text'][df.NMF_topic ==1][170:200]

11937    Alexei Navalny protesters bloodied in violent police clashes with 2,500 arrests https://t.co/GxnAHUTM4b https://t.co/hdCdii4xOB                                   
12007    Navalny protesters pelting police with snowballs in Moscow.\n#Russia https://t.co/gY6rceXuO5                                                                      
12074    RUSSIA - Protests in support of opponent Navalny held in jail : police pelted with snowballs in Moscow \n\nWatch this video:\nhttps://t.co/YxgPK8p2N4 via @YouTube
12098    Navalny protesters pelting police with snowballs in Moscow.\n#Russia https://t.co/gY6rceXuO5                                                                      
12112    Protesters chanting “Russia without Putin” at #Navalny rally in Moscow #Russia #CNN https://t.co/W128e5btLn                                                       
12166    Always a bummer for westerners when they first learn about Navalny's anti-immigrant xenophobia and his role in Moscow skinhead marc

In [41]:
df['text'][df.NMF_topic ==2][500:530]

33249    VIDEO: Protesters take out government surveillance cameras in #Eindhoven 🇳🇱 https://t.co/jWG8cttt1N #COVID19 #coronavirus #Merkel #Macron #BorisJohnson #DominicRaab #Navalny #lockdown #oldandyoung #GreatReset                                                                                                
33260    Video of the moment when Navalny ally/lawyer Lyubov Sobol is grabbed by riot cops while speaking in Moscow. (Video by @tvrain) https://t.co/dCY0nlLzgi                                                                                                                                                          
33269    Video of the moment when Navalny ally/lawyer Lyubov Sobol is grabbed by riot cops while speaking in Moscow. (Video by @tvrain) https://t.co/dCY0nlLzgi                                                                                                                                                          
33272    @fabnana74 @AlexKokcharov For an ethno nationalis

In [43]:
df['text'][df.NMF_topic ==3][500:530]

769    .@navalny says not to believe it if he “accidentally” dies in jail. “Just in case, I am announcing that I don’t plan to either hang myself on a window grill or cut my veins or throat open with a sharpened spoon. I use the stairs very carefully.”                                                             
773    Newt, this is offensive. And wrong. Trump is the equivalent of Putin. Not Navalny. https://t.co/3JyKX5i8fj                                                                                                                                                                                                        
774    That disgusting supplicant, John Croyn, said that Biden should hold Russia accountable for the Navalny poisoning.  Do these liars and thieves ever hear their own hypocrisy.  Never did trump hold Russia accountable. Nor did any elected Republican. Bounties and attacks. STFU Croyn.                          
775    Weird footage from #navalnyprotests in #Russia \n\n

In [45]:
df['text'][df.NMF_topic ==4][700:730]

20299    BREAKING: The World Economic Forum has invited Putin to address their “virtual Davos” in spite of his recent poisoning and arrest of Navalny. The motto of the WEF is “Improving the state of the world”. Seems like legitimizing Putin does just the opposite  https://t.co/vPHckt8zwX                                           
20300    BREAKING: The World Economic Forum has invited Putin to address their “virtual Davos” in spite of his recent poisoning and arrest of Navalny. The motto of the WEF is “Improving the state of the world”. Seems like legitimizing Putin does just the opposite  https://t.co/vPHckt8zwX                                           
20340    BREAKING: The World Economic Forum has invited Putin to address their “virtual Davos” in spite of his recent poisoning and arrest of Navalny. The motto of the WEF is “Improving the state of the world”. Seems like legitimizing Putin does just the opposite  https://t.co/vPHckt8zwX                                           
2034

In [46]:
df['text'][df.NMF_topic ==5][700:730]

10266    Bizarre, even sick comparison. There is no universe in which someone as committed to truth as Navalny - he is prepared to die for it - can be compared to a man who lied and conned his way into the presidency, doing terrible damage to America's credibility. https://t.co/3omNIrNe5R
10280    Bizarre, even sick comparison. There is no universe in which someone as committed to truth as Navalny - he is prepared to die for it - can be compared to a man who lied and conned his way into the presidency, doing terrible damage to America's credibility. https://t.co/3omNIrNe5R
10288    Bizarre, even sick comparison. There is no universe in which someone as committed to truth as Navalny - he is prepared to die for it - can be compared to a man who lied and conned his way into the presidency, doing terrible damage to America's credibility. https://t.co/3omNIrNe5R
10293    Bizarre, even sick comparison. There is no universe in which someone as committed to truth as Navalny - he is prepared to

In [49]:
df.head(30)

Unnamed: 0,id,parsed_created_at,user_screen_name,text,tweet_type,hashtags,favorite_count,possibly_sensitive,retweet_count,user_id,processed_text,pos_tagged,lemmatized,final_docs,LDA1_topic,NMF_topic
0,1353530261405913088,2021-01-25 02:28:56+00:00,TooheySpence,".@newtgingrich, what the hell has happened to you?\nThis is offensive.\n\nNavalny fights for democracy against a corrupt despot, who seeks to silence political opponents.\n\nTrump threatens democracy, is a corrupt despot, who seeks retribution on and to silence political opponents. https://t.co/z8pYuJIwot",retweet,,4114,,637,2329017175,"[@newtgingrich, hell, happened, offensive, navalny, fights, democracy, corrupt, despot, seeks, silence, political, opponents, trump, threatens, democracy, corrupt, despot, seeks, retribution, silence, political, opponents]","[(@newtgingrich, JJ), (hell, NN), (happened, VBD), (offensive, JJ), (navalny, JJ), (fights, NNS), (democracy, NN), (corrupt, VBP), (despot, NN), (seeks, VBZ), (silence, RB), (political, JJ), (opponents, NNS), (trump, VBP), (threatens, VBZ), (democracy, NN), (corrupt, JJ), (despot, NN), (seeks, VBZ), (retribution, NN), (silence, NN), (political, JJ), (opponents, NNS)]","[@newtgingrich, hell, happened, offensive, navalny, fight, democracy, corrupt, despot, seek, silence, political, opponent, trump, threatens, democracy, corrupt, despot, seek, retribution, silence, political, opponent]",@newtgingrich hell happened offensive navalny fight democracy corrupt despot seek silence political opponent trump threatens democracy corrupt despot seek retribution silence political opponent,2,3
1,1353530259946299393,2021-01-25 02:28:56+00:00,shawnpitz,"Newt, this is offensive. And wrong. Trump is the equivalent of Putin. Not Navalny. https://t.co/3JyKX5i8fj",retweet,,3162,False,468,17051121,"[newt, offensive, wrong, trump, equivalent, putin, navalny]","[(newt, RB), (offensive, JJ), (wrong, JJ), (trump, NN), (equivalent, JJ), (putin, NN), (navalny, NN)]","[newt, offensive, wrong, trump, equivalent, putin, navalny]",newt offensive wrong trump equivalent putin navalny,2,3
3,1353530252623048705,2021-01-25 02:28:54+00:00,inkonspicuo,".@newtgingrich, what the hell has happened to you?\nThis is offensive.\n\nNavalny fights for democracy against a corrupt despot, who seeks to silence political opponents.\n\nTrump threatens democracy, is a corrupt despot, who seeks retribution on and to silence political opponents. https://t.co/z8pYuJIwot",retweet,,4114,,637,206264653,"[@newtgingrich, hell, happened, offensive, navalny, fights, democracy, corrupt, despot, seeks, silence, political, opponents, trump, threatens, democracy, corrupt, despot, seeks, retribution, silence, political, opponents]","[(@newtgingrich, JJ), (hell, NN), (happened, VBD), (offensive, JJ), (navalny, JJ), (fights, NNS), (democracy, NN), (corrupt, VBP), (despot, NN), (seeks, VBZ), (silence, RB), (political, JJ), (opponents, NNS), (trump, VBP), (threatens, VBZ), (democracy, NN), (corrupt, JJ), (despot, NN), (seeks, VBZ), (retribution, NN), (silence, NN), (political, JJ), (opponents, NNS)]","[@newtgingrich, hell, happened, offensive, navalny, fight, democracy, corrupt, despot, seek, silence, political, opponent, trump, threatens, democracy, corrupt, despot, seek, retribution, silence, political, opponent]",@newtgingrich hell happened offensive navalny fight democracy corrupt despot seek silence political opponent trump threatens democracy corrupt despot seek retribution silence political opponent,2,3
4,1353530218187776000,2021-01-25 02:28:46+00:00,EthanMillenium,@teamnavalny @navalny Checkout that ice rink,reply,,0,,0,1074657406511169536,"[@teamnavalny, @navalny, checkout, ice, rink]","[(@teamnavalny, JJ), (@navalny, NN), (checkout, NN), (ice, NN), (rink, NN)]","[@teamnavalny, @navalny, checkout, ice, rink]",@teamnavalny @navalny checkout ice rink,0,3
6,1353530210122149888,2021-01-25 02:28:44+00:00,WhitnerTrinia,"@newtgingrich You are a fucking moron.\n\nIn a comparison involving Navalny, that bloated mango corpse rotting at Mar a Lago would be Putin.",retweet,,4865,,324,951966514005069825,"[@newtgingrich, fucking, moron, comparison, involving, navalny, bloated, mango, corpse, rotting, mar, lago, would, putin]","[(@newtgingrich, JJ), (fucking, VBG), (moron, NN), (comparison, NN), (involving, VBG), (navalny, RB), (bloated, VBN), (mango, NN), (corpse, NN), (rotting, VBG), (mar, NN), (lago, NN), (would, MD), (putin, VB)]","[@newtgingrich, fucking, moron, comparison, involving, navalny, bloated, mango, corpse, rotting, mar, lago, would, putin]",@newtgingrich fucking moron comparison involving navalny bloated mango corpse rotting mar lago would putin,4,5
7,1353530210067443713,2021-01-25 02:28:44+00:00,SigneBurke,Russia Navalny protests: Kremlin hits out at West as it downplays rallies https://t.co/vpm3qj9mO2,retweet,,585,False,134,515069390,"[russia, navalny, protests, kremlin, hits, west, downplays, rallies]","[(russia, NN), (navalny, JJ), (protests, NNS), (kremlin, VBP), (hits, NNS), (west, VBP), (downplays, NNS), (rallies, NNS)]","[russia, navalny, protest, kremlin, hit, west, downplays, rally]",russia navalny protest kremlin hit west downplays rally,5,3
8,1353530205130739713,2021-01-25 02:28:42+00:00,MrMikeGuy,Alexei Navalny is the most Russian Russian who ever lived. Utterly unafraid.,original,,0,,0,31865194,"[alexei, navalny, russian, russian, ever, lived, utterly, unafraid]","[(alexei, NN), (navalny, NN), (russian, JJ), (russian, JJ), (ever, RB), (lived, VBD), (utterly, JJ), (unafraid, NN)]","[alexei, navalny, russian, russian, ever, lived, utterly, unafraid]",alexei navalny russian russian ever lived utterly unafraid,0,3
10,1353530175892418560,2021-01-25 02:28:35+00:00,renee3147,Biden administration condemns Putin’s attack on the protesters in Russia and calls for the release of @navalny. Big change from you-know-who. Elections have consequences. Maybe even for Putin. \n#FreeNavalny. https://t.co/Gcp4hxG88S,retweet,FreeNavalny,4923,,1433,937022372883980290,"[biden, administration, condemns, putin, attack, protesters, russia, calls, release, @navalny, big, change, you-know-who, elections, consequences, maybe, even, putin, #freenavalny]","[(biden, JJ), (administration, NN), (condemns, NN), (putin, JJ), (attack, NN), (protesters, NNS), (russia, VBP), (calls, VBZ), (release, NN), (@navalny, NNP), (big, JJ), (change, NN), (you-know-who, JJ), (elections, NNS), (consequences, NNS), (maybe, RB), (even, RB), (putin, VBP), (#freenavalny, NNS)]","[biden, administration, condemns, putin, attack, protester, russia, call, release, @navalny, big, change, you-know-who, election, consequence, maybe, even, putin, #freenavalny]",biden administration condemns putin attack protester russia call release @navalny big change you-know-who election consequence maybe even putin #freenavalny,1,3
11,1353530174768173056,2021-01-25 02:28:35+00:00,VelourStolen,"If I see you defending Navalny, it's block on sight",retweet,,29,,2,1337697154572292098,"[see, defending, navalny, block, sight]","[(see, VB), (defending, VBG), (navalny, JJ), (block, NN), (sight, NN)]","[see, defending, navalny, block, sight]",see defending navalny block sight,0,0
12,1353530152786001922,2021-01-25 02:28:30+00:00,truthquest8,are also restricted from doing business with China.” Jan 23 Russian police arrested +3400 in protests demanding Navalny's release. Weekend January 23-24 China flies warplanes in show of force near Taiwan #NationalSecurity #USResponse @CBSNews,retweet,NationalSecurity USResponse,95,,51,825783059039547393,"[also, restricted, business, china, jan, russian, police, arrested, protests, demanding, navalny's, release, weekend, january, china, flies, warplanes, show, force, near, taiwan, #nationalsecurity, #usresponse, @cbsnews]","[(also, RB), (restricted, VBN), (business, NN), (china, NN), (jan, NN), (russian, JJ), (police, NN), (arrested, VBD), (protests, NNS), (demanding, VBG), (navalny's, JJ), (release, NN), (weekend, NN), (january, JJ), (china, NN), (flies, NNS), (warplanes, VBP), (show, NN), (force, NN), (near, IN), (taiwan, JJ), (#nationalsecurity, NN), (#usresponse, NNP), (@cbsnews, NNS)]","[also, restricted, business, china, jan, russian, police, arrested, protest, demanding, navalny's, release, weekend, january, china, fly, warplane, show, force, near, taiwan, #nationalsecurity, #usresponse, @cbsnews]",also restricted business china jan russian police arrested protest demanding navalny's release weekend january china fly warplane show force near taiwan #nationalsecurity #usresponse @cbsnews,0,3


In [50]:
# I think I'll get better results if I drop retweets... 

df_new = df[df.tweet_type != 'retweet']


In [91]:
# create variable for "text" column 
text_new = df_new['text'] 

In [92]:
# apply text processing functions to text

df_new['processed_text'] = df_new['text'].apply(process_text)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [93]:
#looks like I'm better off working with the original dataframe, just deleting the retweets

df = df[df.tweet_type != 'retweet']

In [94]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9294 entries, 4 to 85364
Data columns (total 16 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   id                  9294 non-null   int64 
 1   parsed_created_at   9294 non-null   object
 2   user_screen_name    9294 non-null   object
 3   text                9294 non-null   object
 4   tweet_type          9294 non-null   object
 5   hashtags            1586 non-null   object
 6   favorite_count      9294 non-null   int64 
 7   possibly_sensitive  4033 non-null   object
 8   retweet_count       9294 non-null   int64 
 9   user_id             9294 non-null   int64 
 10  processed_text      9294 non-null   object
 11  pos_tagged          9294 non-null   object
 12  lemmatized          9294 non-null   object
 13  final_docs          9294 non-null   object
 14  LDA1_topic          9294 non-null   int64 
 15  NMF_topic           9294 non-null   int64 
dtypes: int64(6), object(10)