# Friendships Analysis

This notebook deals with the questions around who follows who and how these friendships can be described.

In [71]:
import pandas as pd
import numpy as np
import seaborn as sns

In [17]:
user_friendships_evaluation = pd.read_csv('../data/processed/user_friendships_evaluation.csv', sep=";", na_values="", dtype={'source_id': str, 'target_id': str})
user_list = pd.read_csv('../data/processed/user_list.csv', sep=";", na_values="", dtype={'twitter_id': str})

### 1. How differ strong/weak ties among the parties? (Correlate strong tie = same party?)

In [70]:
# Add the account information from user_list to the user_friendships
user_friendships_evaluation_with_user_list = pd.merge(user_friendships_evaluation, user_list, left_on='source_screen_name', right_on='twitter_handle')
user_friendships_evaluation_with_user_list = pd.merge(user_friendships_evaluation_with_user_list, user_list, left_on='target_screen_name', right_on='twitter_handle')

# Create new data frame with only the relevant columns
tie_evaluation_fraction = user_friendships_evaluation_with_user_list[['source_screen_name', 'target_screen_name', 'tie_type', 'fraktion_x', 'fraktion_y']]

# Create dataframe only containing rows of two users that are in the same fraction/party
same_fraction = tie_evaluation_fraction[tie_evaluation_fraction['fraktion_x'] == tie_evaluation_fraction['fraktion_y']]

# Create pivot counting how often a tie_type appears in a party/fraction
same_fraction_pivot = pd.pivot_table(same_fraction, index=['tie_type'], columns=['fraktion_y'], values=['fraktion_x'], aggfunc='count')
display(same_fraction_pivot)

Unnamed: 0_level_0,fraktion_x,fraktion_x,fraktion_x,fraktion_x,fraktion_x,fraktion_x,fraktion_x
fraktion_y,Alternative für Deutschland,Fraktion BÜNDNIS 90/DIE GRÜNEN,Fraktion DIE LINKE.,Fraktion der Christlich Demokratischen Union/Christlich - Sozialen Union,Fraktion der Freien Demokratischen Partei,Fraktion der Sozialdemokratischen Partei Deutschlands,Fraktionslos
tie_type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
no tie,362.0,655.0,57.0,3425.0,385.0,3974.0,1.0
strong,1200.0,4065.0,394.0,2973.0,2607.0,4049.0,
weak,716.0,1721.0,179.0,2248.0,924.0,3152.0,


### 2. Can difference between the twitter follower counts predict if they are weak/strong ties?

In [106]:
user_friendships_evaluation_with_user_list_and_ties = user_friendships_evaluation_with_user_list[user_friendships_evaluation_with_user_list['tie_type'] != 'no tie']

# Calulcate the absolute follower difference between two accounts
user_friendships_evaluation_with_user_list_and_ties["abs_follower_difference"] = abs(user_friendships_evaluation_with_user_list['followers_count_x'] - user_friendships_evaluation_with_user_list['followers_count_y'])

print("Mean follower difference between strong ties:", user_friendships_evaluation_with_user_list_and_ties[user_friendships_evaluation_with_user_list_and_ties['tie_type'] == 'strong']["abs_follower_difference"].mean())
print("Mean follower difference between weak ties:", user_friendships_evaluation_with_user_list_and_ties[user_friendships_evaluation_with_user_list_and_ties['tie_type'] == 'weak']["abs_follower_difference"].mean())

print("Median follower difference between strong ties:", user_friendships_evaluation_with_user_list_and_ties[user_friendships_evaluation_with_user_list_and_ties['tie_type'] == 'strong']["abs_follower_difference"].median())
print("Median follower difference between weak ties:", user_friendships_evaluation_with_user_list_and_ties[user_friendships_evaluation_with_user_list_and_ties['tie_type'] == 'weak']["abs_follower_difference"].median())


# Create data frame that only contains weak or strong ties
# user_friendships_evaluation_with_user_list_and_ties = user_friendships_evaluation_with_user_list[user_friendships_evaluation_with_user_list['tie_type'] != 'no tie']
# print(user_friendships_evaluation_with_user_list_and_ties["followers_count_y"].dtype)

# p1 = sns.relplot(x="followers_count_x", y="followers_count_y", hue="tie_type", data=user_friendships_evaluation_with_user_list_and_ties)
# p1.map_dataframe(sns.lineplot, 'x', 'y_line', color='g')
# plt.ticklabel_format(style='plain', axis='y')

# sns.scatterplot(data=user_friendships_evaluation_with_user_list_and_ties, x="followers_count_x", y="followers_count_y", hue="tie_type")

Mean follower difference between strong ties: 40678.901015228425
Mean follower difference between weak ties: 94542.86441563872
Median follower difference between strong ties: 5942.0
Median follower difference between weak ties: 11317.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  user_friendships_evaluation_with_user_list_and_ties["abs_follower_difference"] = abs(user_friendships_evaluation_with_user_list['followers_count_x'] - user_friendships_evaluation_with_user_list['followers_count_y'])


### 3. Do accounts with many followers have more weak ties?

### 4. If all edges of one party are summed together and divided by the number of party-member-accounts, which parties are the most closest connected?

### 5. Do strong ties interact more with each other (retweet/reply to/mentions)?

### 6. Are there accounts were connections and party do not align? (Users that are in the wrong “Bubble” - nearest neighbor?) 