# Retweet/Quote Analysis

This notebook deals with the questions around how the retweet/quote is used among the politicians.

In [108]:
import pandas as pd
import numpy as np

In [109]:
# When Original Tweet was created
date_parameter_tweet_selection = pd.to_datetime('2021-08-30') # Voting: '2021-09-26', Official Start: '2021-10-26'

In [110]:
user_friendships_evaluation = pd.read_csv('../data/processed/user_friendships_evaluation.csv', sep=";", na_values="", dtype={'source_id': str, 'target_id': str})
user_list = pd.read_csv('../data/processed/user_list.csv', sep=";", na_values="", dtype={'twitter_id': str})

retweet_list = pd.read_csv('../data/processed/retweet_list.csv', sep=";", na_values="", lineterminator='\n')
quote_list = pd.read_csv('../data/processed/quote_list.csv', sep=";", na_values="", lineterminator='\n')

### 1. What is the social tie between author and retweeter/quoter?

In [121]:
#### RETWEETS ####

user_list['twitter_id'] = pd.to_numeric(user_list['twitter_id'])

# Merge follow and tie information into table
retweet_list_enriched = pd.merge(retweet_list, user_friendships_evaluation, how='left', left_on=['retweeter_twitter_handle', 'author_twitter_handle'], right_on=['source_screen_name', 'target_screen_name'])
retweet_list_enriched = pd.merge(retweet_list_enriched, user_friendships_evaluation, how='left', left_on=['retweeter_twitter_handle', 'author_twitter_handle'], right_on=['target_screen_name', 'source_screen_name'])

# Filter retweets based on creation of the orignal tweet
retweet_list_enriched['tweet_created_at'] = pd.to_datetime(retweet_list_enriched['tweet_created_at']).dt.date
retweet_list_enriched = retweet_list_enriched[retweet_list_enriched['tweet_created_at'] > date_parameter_tweet_selection]

# Merge mergeable columns 
retweet_list_enriched['tie_type_x'].update(retweet_list_enriched.pop('tie_type_y'))
retweet_list_enriched['source_follows_target_x'].update(retweet_list_enriched.pop('target_follows_source_y'))
retweet_list_enriched['target_follows_source_x'].update(retweet_list_enriched.pop('source_follows_target_y'))

# Fill info for same person or not available
retweet_list_enriched['tie_type_x'][retweet_list_enriched['retweeter_twitter_handle'] == retweet_list_enriched['author_twitter_handle']] = 'same person' 
retweet_list_enriched['tie_type_x'][retweet_list_enriched['tie_type_x'].isna()] = 'not available'

# Create data frame with the results
retweet_social_ties = pd.DataFrame(columns=['tie_type', 'retweet_count'])
retweet_social_ties.loc[len(retweet_social_ties.index)] = ['weak', retweet_list_enriched[retweet_list_enriched['tie_type_x'] == 'weak'].shape[0]]
retweet_social_ties.loc[len(retweet_social_ties.index)] = ['strong', retweet_list_enriched[retweet_list_enriched['tie_type_x'] == 'strong'].shape[0]]
retweet_social_ties.loc[len(retweet_social_ties.index)] = ['no tie', retweet_list_enriched[retweet_list_enriched['tie_type_x'] == 'no tie'].shape[0]]
retweet_social_ties.loc[len(retweet_social_ties.index)] = ['same person', retweet_list_enriched[retweet_list_enriched['tie_type_x'] == 'same person'].shape[0]]
retweet_social_ties.loc[len(retweet_social_ties.index)] = ['not available', retweet_list_enriched[retweet_list_enriched['tie_type_x'] == 'not available'].shape[0]]

# Add a calculated percentage column
retweet_social_ties['retweet_count_percentage'] = round(retweet_social_ties.retweet_count / retweet_social_ties.retweet_count.sum() * 100, 1)

display(retweet_social_ties)

  result = libops.scalar_compare(x.ravel(), y, op)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  retweet_list_enriched['tie_type_x'][retweet_list_enriched['retweeter_twitter_handle'] == retweet_list_enriched['author_twitter_handle']] = 'same person'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  retweet_list_enriched['tie_type_x'][retweet_list_enriched['tie_type_x'].isna()] = 'not available'


Unnamed: 0,tie_type,retweet_count,retweet_count_percentage
0,weak,664,15.8
1,strong,3433,81.8
2,no tie,101,2.4


In [123]:
#### QUOTES ####

# Merge follow and tie information into table
quote_list_enriched = pd.merge(quote_list, user_friendships_evaluation, how='left', left_on=['quoter_twitter_handle', 'author_twitter_handle'], right_on=['source_screen_name', 'target_screen_name'])
quote_list_enriched = pd.merge(quote_list_enriched, user_friendships_evaluation, how='left', left_on=['quoter_twitter_handle', 'author_twitter_handle'], right_on=['target_screen_name', 'source_screen_name'])

# Filter retweets based on creation of the orignal tweet
quote_list_enriched['tweet_created_at'] = pd.to_datetime(quote_list_enriched['tweet_created_at']).dt.date
quote_list_enriched = quote_list_enriched[quote_list_enriched['tweet_created_at'] > date_parameter_tweet_selection]

# Merge mergeable columns 
quote_list_enriched['tie_type_x'].update(quote_list_enriched.pop('tie_type_y'))
quote_list_enriched['source_follows_target_x'].update(quote_list_enriched.pop('target_follows_source_y'))
quote_list_enriched['target_follows_source_x'].update(quote_list_enriched.pop('source_follows_target_y'))

# Fill info for same person or not available
quote_list_enriched['tie_type_x'][quote_list_enriched['quoter_twitter_handle'] == quote_list_enriched['author_twitter_handle']] = 'same person' 
quote_list_enriched['tie_type_x'][quote_list_enriched['tie_type_x'].isna()] = 'not available'

# Create data frame with the results
quote_social_ties = pd.DataFrame(columns=['tie_type', 'quote_count'])
quote_social_ties.loc[len(quote_social_ties.index)] = ['weak', quote_list_enriched[quote_list_enriched['tie_type_x'] == 'weak'].shape[0]]
quote_social_ties.loc[len(quote_social_ties.index)] = ['strong', quote_list_enriched[quote_list_enriched['tie_type_x'] == 'strong'].shape[0]]
quote_social_ties.loc[len(quote_social_ties.index)] = ['no tie', quote_list_enriched[quote_list_enriched['tie_type_x'] == 'no tie'].shape[0]]
# quote_social_ties.loc[len(quote_social_ties.index)] = ['same person', quote_list_enriched[quote_list_enriched['tie_type_x'] == 'same person'].shape[0]]
# quote_social_ties.loc[len(quote_social_ties.index)] = ['not available', quote_list_enriched[quote_list_enriched['tie_type_x'] == 'not available'].shape[0]]

quote_social_ties['quote_count_percentage'] = round(quote_social_ties.quote_count / quote_social_ties.quote_count.sum() * 100, 1)
display(quote_social_ties)

  result = libops.scalar_compare(x.ravel(), y, op)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  quote_list_enriched['tie_type_x'][quote_list_enriched['quoter_twitter_handle'] == quote_list_enriched['author_twitter_handle']] = 'same person'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  quote_list_enriched['tie_type_x'][quote_list_enriched['tie_type_x'].isna()] = 'not available'


Unnamed: 0,tie_type,quote_count,quote_count_percentage
0,weak,165,22.9
1,strong,462,64.2
2,no tie,93,12.9


### 2. How do the fractions retweet from each other?

In [119]:
# Add the account information from user_list to the retweets
retweet_list_user_list = pd.merge(retweet_list_enriched, user_list, left_on='retweeter_twitter_handle', right_on='twitter_handle')
retweet_list_user_list = pd.merge(retweet_list_user_list, user_list, left_on='author_twitter_handle', right_on='twitter_handle')
display(retweet_list_user_list)

Unnamed: 0,retweeter_twitter_handle,retweet_tweet_id,retweet_text,retweeter_twitter_id,retweet_created_at,tweet_referenced_tweet_type_x,author_twitter_handle,tweet_id,tweet_text,author_twitter_id,...,created_at_y,description_y,location_y,protected_y,followers_count_y,following_count_y,tweet_count_y,listed_count_y,verified_y,api_call_y
0,ulle_schauws,1528023751287349255,RT @Ricarda_Lang: Weil alle Frauen das Recht a...,17363943,2022-05-21 14:43:46+00:00,retweeted,Ricarda_Lang,1528023578515480576,Weil alle Frauen das Recht auf körperliche Sel...,1405886484,...,2013-05-05 19:29:13+00:00,Bundesvorsitzende von @die_Gruenen. Abgeordnet...,Berlin,False,103578.0,5260.0,14546.0,467.0,True,2022-05-31 19:40:59.026524
1,ulle_schauws,1437131139857231873,RT @Ricarda_Lang: Laschet und Scholz streiten....,17363943,2021-09-12 19:08:59+00:00,retweeted,Ricarda_Lang,1437123361168150528,Laschet und Scholz streiten. @ABaerbock redet ...,1405886484,...,2013-05-05 19:29:13+00:00,Bundesvorsitzende von @die_Gruenen. Abgeordnet...,Berlin,False,103578.0,5260.0,14546.0,467.0,True,2022-05-31 19:40:59.026524
2,emiliafester,1528349566268997632,RT @Ricarda_Lang: Weil alle Frauen das Recht a...,956609533312462850,2022-05-22 12:18:26+00:00,retweeted,Ricarda_Lang,1528023578515480576,Weil alle Frauen das Recht auf körperliche Sel...,1405886484,...,2013-05-05 19:29:13+00:00,Bundesvorsitzende von @die_Gruenen. Abgeordnet...,Berlin,False,103578.0,5260.0,14546.0,467.0,True,2022-05-31 19:40:59.026524
3,ekindeligoez,1528099016394915841,RT @Ricarda_Lang: Weil alle Frauen das Recht a...,22125770,2022-05-21 19:42:50+00:00,retweeted,Ricarda_Lang,1528023578515480576,Weil alle Frauen das Recht auf körperliche Sel...,1405886484,...,2013-05-05 19:29:13+00:00,Bundesvorsitzende von @die_Gruenen. Abgeordnet...,Berlin,False,103578.0,5260.0,14546.0,467.0,True,2022-05-31 19:40:59.026524
4,max_lucks,1470318862717566977,RT @Ricarda_Lang: Wir haben uns nicht weniger ...,481428317,2021-12-13 09:05:08+00:00,retweeted,Ricarda_Lang,1470287911392362496,Wir haben uns nicht weniger vorgenommen als un...,1405886484,...,2013-05-05 19:29:13+00:00,Bundesvorsitzende von @die_Gruenen. Abgeordnet...,Berlin,False,103578.0,5260.0,14546.0,467.0,True,2022-05-31 19:40:59.026524
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4320,DrChristinaBaum,1527239408088453122,RT @ChrWirthMdB: Ganze 138 Verdachts- oder tat...,955899834325700609,2022-05-19 10:47:04+00:00,retweeted,ChrWirthMdB,1527208883026022400,Ganze 138 Verdachts- oder tatsächliche Fälle v...,920193967937019904,...,2017-10-17 07:45:13+00:00,"Dr. Christian Wirth, für das #Saarland und die...","Saarbrücken, Deutschland",False,2775.0,364.0,669.0,81.0,False,2022-05-31 19:40:32.845917
4321,DrChristinaBaum,1476092502788747269,"RT @ChrWirthMdB: ""Nicht jeder „Corona-Patient“...",955899834325700609,2021-12-29 07:27:31+00:00,retweeted,ChrWirthMdB,1475767808004874240,"""Nicht jeder „Corona-Patient“ im Saarland lieg...",920193967937019904,...,2017-10-17 07:45:13+00:00,"Dr. Christian Wirth, für das #Saarland und die...","Saarbrücken, Deutschland",False,2775.0,364.0,669.0,81.0,False,2022-05-31 19:40:32.845917
4322,Johann_Saathoff,1468681225351176207,"RT @oezdemir_spd: Wow, was für ein Tag! Freue ...",159217491,2021-12-08 20:37:45+00:00,retweeted,oezdemir_spd,1468625624671604736,"Wow, was für ein Tag! Freue mich riesig auf di...",1399764944,...,2013-05-03 13:36:09+00:00,Duisburger und Sozialdemokrat aus Leidenschaft...,"Duisburg, Deutschland",False,3117.0,455.0,396.0,194.0,True,2022-05-31 19:25:54.889935
4323,NZippelius,1525914506391601153,RT @YannickBury: Kühnert beim Interpretieren v...,1560104953,2022-05-15 19:02:23+00:00,retweeted,YannickBury,1525912535634935808,Kühnert beim Interpretieren von Wahlergebnisse...,33738797,...,2009-04-21 00:39:30+00:00,Ökonom | Bundestagsabgeordneter für den WK Emm...,"Malterdingen, Deutschland",False,779.0,366.0,1137.0,37.0,False,2022-05-31 19:40:42.057771
