In [1]:
import pandas as pd
import tweepy
from tweepy import OAuthHandler
import pandas as pd
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import re

In this notebook, we will get:
- Dataframes of tweets of each president of the party. 
- Dataframes of mentions of users for each president.

### Connecting with Twitter API

In [2]:
%run ./keys.ipynb
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

# General Functions

In [3]:
def extracting_mentions(query):
    """
    This function extract a data frame with the result of a query.
    """
    mentions = [tweet for tweet in tweepy.Cursor(api.search, 
                                                q=query + ' -filter:retweets', 
                                                lang="es", 
                                                tweet_mode='extended',
                                                result_type="recent").items(2000)]
    mentions_json = [tweet._json for tweet in mentions]
    df_mentions = pd.json_normalize(mentions_json)
    
    # selecting useful columns
    columns_selected = ['user.name', 'created_at', 'id', 'full_text', 'display_text_range', 
                    'source', 'retweet_count', 'favorite_count', 'user.followers_count', 
                    'user.friends_count', 'user.statuses_count']
    
    df_mentions_filtered = df_mentions[columns_selected]
    
    # cleaning date time
    df_mentions_filtered['created_at'] = pd.to_datetime(df_mentions_filtered['created_at'])
    
    # cleaning source of the tweet
    list_sources = list(df_mentions_filtered['source'])
    df_mentions_filtered['source'] = [re.findall(r'\>(.*?)\<', s) for s in list_sources]
    
    return df_mentions_filtered

In [4]:
def adding_party_column(df, string):
    """
    Input: string = party who has tweeted.
    Output: a column with the party.
    """
    df['partido'] = string
    return df

In [5]:
def adding_person_column(df, string):
    """
    Input: string = person who has tweeted.
    Output: a column with the name of the person.
    """
    df['persona'] = string
    return df

In [6]:
def adding_type_post_column(df, string):
    """
    Input: string = publicaci√≥n or menci√≥n
    Output: a column with type of post.
    """
    df['tipo de post'] = string
    return df

In [7]:
def transforming_format_dates(column):
    return pd.to_datetime(column)

In [8]:
def extract_hashtags(column):
    """
    Input: column with all tweets. ['full_text'] in this case.
    Output: a list with used hashtags.
    """
    list_hashtags = list(column)
    hashtags = [re.findall(r"#(\w+)", tweet) for tweet in list_hashtags]
    
    return hashtags

## 1. PARTIDO POPULAR

In [14]:
pp_query = '@populares OR "partido popular" OR "el pp" OR "los del pp" -filter:retweets'

In [15]:
df_pp_mentions = extracting_mentions(pp_query)

Rate limit reached. Sleeping for: 590
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mentions_filtered['created_at'] = pd.to_datetime(df_mentions_filtered['created_at'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mentions_filtered['source'] = [re.findall(r'\>(.*?)\<', s) for s in list_sources]


In [19]:
df_pp_mentions = adding_party_column(df_pp_mentions, 'Partido Popular')
df_pp_mentions = adding_type_post_column(df_pp_mentions, 'menci√≥n')

In [20]:
df_pp_mentions['hashtags'] = extract_hashtags(df_pp_mentions['full_text'])

In [21]:
df_pp_mentions

Unnamed: 0,user.name,created_at,id,full_text,display_text_range,source,retweet_count,favorite_count,user.followers_count,user.friends_count,user.statuses_count,partido,tipo de post,hashtags
0,Juan Gonzalez,2021-04-24 10:07:57+00:00,1385898297181753344,Hay cosas que no entiendo el PP rompe ciertos ...,"[0, 275]",[Twitter for Android],0,0,896,1110,16466,Partido Popular,menci√≥n,[]
1,üü® Ge-Trooper,2021-04-24 10:07:34+00:00,1385898199152566273,"Las mentiras de @populares, al descubierto.\nP...","[0, 130]",[Twitter Web App],0,0,303,329,12977,Partido Popular,menci√≥n,[]
2,SurfGreenBirdüê¶,2021-04-24 10:07:27+00:00,1385898172325761027,@Savater_ @el_pais @elpais_opinion En serio üò≥ ...,"[35, 314]",[Twitter for iPhone],0,0,367,1692,2275,Partido Popular,menci√≥n,[]
3,afrvet,2021-04-24 10:07:08+00:00,1385898091816095746,"¬øIv√°n Redondo, trabaja para el PP o para el PS...","[0, 50]",[Twitter for Android],0,0,3467,2200,204707,Partido Popular,menci√≥n,[]
4,Juan Antonio Ortega,2021-04-24 10:06:47+00:00,1385898002456449025,Todos los que chillan con m√°s fuerza por la #L...,"[0, 276]",[Twitter for iPhone],0,0,3197,182,9978,Partido Popular,menci√≥n,[Libertad]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,Joaqu√≠n Iborraüîª,2021-04-24 01:20:32+00:00,1385765567936749570,"En pol√≠tica como en matem√°ticas, si A es a B, ...","[0, 195]",[Twitter for Android],0,1,2498,5000,12735,Partido Popular,menci√≥n,"[QueHableLaMayoria, democraciaofascismo]"
1996,Amilcar Barca,2021-04-24 01:18:13+00:00,1385764984593035265,@ClauIrenita99 @_Jaizman_ El PP sabe que no va...,"[26, 137]",[Twitter for Android],0,0,132,132,13752,Partido Popular,menci√≥n,[]
1997,mazija,2021-04-24 01:15:23+00:00,1385764272538587143,"@populares @eruizescudero @IdiazAyuso No, uste...","[38, 120]",[Twitter for Android],0,0,13,41,380,Partido Popular,menci√≥n,[]
1998,PorDiosEros üîª·ºúœÅœâœÇ,2021-04-24 01:14:03+00:00,1385763936314839045,"Anticomunistas eran Hitler, Franco o Mussolini...","[0, 93]",[Twitter Web App],0,0,894,1345,29717,Partido Popular,menci√≥n,[]


## 2. PSOE

In [22]:
psoe_query ='@psoe OR "partido socialista" OR "Partido Socialista Obrero Espa√±ol" OR "el psoe" OR "los del psoe" -filter:retweets'

In [23]:
df_psoe_mentions = extracting_mentions(psoe_query)

Rate limit reached. Sleeping for: 565
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mentions_filtered['created_at'] = pd.to_datetime(df_mentions_filtered['created_at'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mentions_filtered['source'] = [re.findall(r'\>(.*?)\<', s) for s in list_sources]


In [24]:
adding_party_column(df_psoe_mentions, 'PSOE')
adding_type_post_column(df_psoe_mentions, 'menci√≥n')
df_psoe_mentions['hashtags'] = extract_hashtags(df_psoe_mentions['full_text'])

## 3. CIUDADANOS

In [25]:
ciudadanos_query = '@CiudadanosCs OR "ciudadanos" OR "los de ciudadanos" -filter:retweets'

In [26]:
df_ciudadanos_mentions = extracting_mentions(ciudadanos_query)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mentions_filtered['created_at'] = pd.to_datetime(df_mentions_filtered['created_at'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mentions_filtered['source'] = [re.findall(r'\>(.*?)\<', s) for s in list_sources]


In [27]:
adding_type_post_column(df_ciudadanos_mentions, 'menci√≥n')
adding_party_column(df_ciudadanos_mentions, 'Ciudadanos')
df_ciudadanos_mentions['hashtags'] = extract_hashtags(df_ciudadanos_mentions['full_text'])

## 4. PODEMOS

In [28]:
podemos_query = '@PODEMOS OR "podemos" OR "los de podemos" OR "podemitas" OR "el psoe" OR "el de podemos" -filter:retweets'

In [29]:
df_podemos_mentions = extracting_mentions(podemos_query)

Rate limit reached. Sleeping for: 798
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mentions_filtered['created_at'] = pd.to_datetime(df_mentions_filtered['created_at'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mentions_filtered['source'] = [re.findall(r'\>(.*?)\<', s) for s in list_sources]


In [30]:
adding_type_post_column(df_podemos_mentions, 'menci√≥n')
adding_party_column(df_podemos_mentions, 'PODEMOS')
df_podemos_mentions['hashtags'] = extract_hashtags(df_podemos_mentions['full_text'])

## 5. VOX

In [31]:
vox_query = '@vox_es OR "vox" OR "los de vox" OR "el de VOX -filter:retweets"'

In [32]:
df_vox_mentions = extracting_mentions(vox_query)

Rate limit reached. Sleeping for: 807
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mentions_filtered['created_at'] = pd.to_datetime(df_mentions_filtered['created_at'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mentions_filtered['source'] = [re.findall(r'\>(.*?)\<', s) for s in list_sources]


In [33]:
adding_type_post_column(df_vox_mentions, 'menci√≥n')
adding_party_column(df_vox_mentions, 'Vox')
df_vox_mentions['hashtags'] = extract_hashtags(df_vox_mentions['full_text'])

## Data frame final mentions

In [34]:
frames = [df_pp_mentions, df_psoe_mentions, df_ciudadanos_mentions, df_podemos_mentions, df_vox_mentions]
df_final_mentions = pd.concat(frames)

In [35]:
# exporting
df_final_mentions.to_csv('../data/processed/df_final_mentions_2000.csv')

In [36]:
df_final_mentions.shape

(10000, 14)

In [37]:
df_final_mentions

Unnamed: 0,user.name,created_at,id,full_text,display_text_range,source,retweet_count,favorite_count,user.followers_count,user.friends_count,user.statuses_count,partido,tipo de post,hashtags
0,Juan Gonzalez,2021-04-24 10:07:57+00:00,1385898297181753344,Hay cosas que no entiendo el PP rompe ciertos ...,"[0, 275]",[Twitter for Android],0,0,896,1110,16466,Partido Popular,menci√≥n,[]
1,üü® Ge-Trooper,2021-04-24 10:07:34+00:00,1385898199152566273,"Las mentiras de @populares, al descubierto.\nP...","[0, 130]",[Twitter Web App],0,0,303,329,12977,Partido Popular,menci√≥n,[]
2,SurfGreenBirdüê¶,2021-04-24 10:07:27+00:00,1385898172325761027,@Savater_ @el_pais @elpais_opinion En serio üò≥ ...,"[35, 314]",[Twitter for iPhone],0,0,367,1692,2275,Partido Popular,menci√≥n,[]
3,afrvet,2021-04-24 10:07:08+00:00,1385898091816095746,"¬øIv√°n Redondo, trabaja para el PP o para el PS...","[0, 50]",[Twitter for Android],0,0,3467,2200,204707,Partido Popular,menci√≥n,[]
4,Juan Antonio Ortega,2021-04-24 10:06:47+00:00,1385898002456449025,Todos los que chillan con m√°s fuerza por la #L...,"[0, 276]",[Twitter for iPhone],0,0,3197,182,9978,Partido Popular,menci√≥n,[Libertad]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,Diario ¬∂ El Cambio,2021-04-24 10:07:13+00:00,1385898114847068160,Vox es una amenaza para la democracia https://...,"[0, 61]",[Twitter for iPad],0,0,27,127,7612,Vox,menci√≥n,[]
1996,Todo Por Hacer,2021-04-24 10:07:13+00:00,1385898113890758656,Cuando se hace necesario tender una l√≠nea roja...,"[0, 278]",[Twitter Web App],1,5,13323,969,15280,Vox,menci√≥n,[]
1997,Pedro G. üá™üá∏,2021-04-24 10:07:13+00:00,1385898111659356162,@Eligorricho @Santi_ABASCAL @Macarena_Olona @O...,"[176, 413]",[Twitter for iPhone],0,1,824,842,664,Vox,menci√≥n,[]
1998,Lolo üá™üá∏,2021-04-24 10:07:12+00:00,1385898108245118976,"@GuajeSalvaje Que vox busca que? No, valles ca...","[14, 72]",[Twitter for Android],0,0,21,34,3110,Vox,menci√≥n,[]
