### By years 2020 and 2021

Let us create 2 social network graphs corresponding to the years 2020 and 2021

In [11]:
import pandas as pd
import networkx as nx

In [41]:
df = pd.read_csv('./db/ep_cosponsorship_dataset.csv', header = 0, keep_default_na = False)
df2020 = df[df['Date'].str.contains("2020") == True]
df2021 = df[df['Date'].str.contains("2021") == True]

list_meps_2020 = pd.unique(df2020["OfficialMEPID"])
list_meps_2021 = pd.unique(df2021["OfficialMEPID"])

Entire_Dataset = df

In [24]:
def ListMEPs(Dataset):
    # we use MEPName instead of OfficialMEPID
    o = pd.unique(Dataset['MEPName'])
    #print(type(o)) #DEBUG
    return o

def CreateNXGraph(Dataset):
    """
    :input Dataset: A subset of the cosponsorship table in pandas df
    :return: The nx bipartite graph of the MEPs and amendments
    """
    G = nx.from_pandas_edgelist(Dataset, source='AmendmentID', target='MEPName')
    return G

def ConnectFromBipartite(myGraph, Gtype, MEP_lista):
    """
    :input myGraph: A subset of the cosponsorship table in pandas df
    :input Gtype: Type of projection to use from {"classic", "collab_weight"}
    :input MEP_lista: List of relevant MEPs
    :return: The simple projected Data set
    """
    
    
    if Gtype == "classic":
        G = nx.algorithms.bipartite.projected_graph(myGraph, MEP_lista)
    elif Gtype == "collab_weight":
        G = nx.algorithms.bipartite.collaboration_weighted_projected_graph(myGraph, MEP_lista)
    return G 

# one example: 
#ConnectFromBipartite(CreateNXGraph(Dataset), "collab_weight", ListMEPs(Dataset))

In [25]:
#mep_social_networks = ConnectFromBipartite(CreateNXGraph(Dataset), "collab_weight", ListMEPs(Dataset))

Consider some famous and important centrality measures: Degree centrality, betweenness centrality, eigenvector centrality 

For 2020:

In [30]:
def Degree_Centralities(Graph):
    """
    :input Graph: Connection graph of MEPs
    :return: List of degree centralities
    """
    
    return nx.degree_centrality(Graph)


def Eigenvector_Centralities(Graph):
    """
    :input Graph: Connection graph of MEPs
    :return: List of degree centralities
    """
    
    return nx.eigenvector_centrality(Graph, max_iter=1000, tol=1e-06, nstart=None, weight=None)


def Betweenness_Centralities(Graph):
    """
    :input Graph: Connection graph of MEPs
    :return: List of degree centralities
    """
    
    return nx.betweenness_centrality(Graph, k=None, normalized=True, weight=None, endpoints=False, seed=None)

In [31]:
def acquireSplitData():
    o = []
    for i in range(39):
    # the number is manual, TODO: make it automatic from the number of files in the folder:
    # https://stackoverflow.com/questions/2632205/how-to-count-the-number-of-files-in-a-directory-using-python
        o.append(pd.read_csv(f"./db/split/{i}.csv"))
    return o

Data_lista = acquireSplitData()


#Containing all the relevant infos of graphs
Graph_Lista_Weight = [ConnectFromBipartite(CreateNXGraph(Data), "collab_weight", ListMEPs(Data)) for Data in Data_lista]
Graph_Lista_Normal = [ConnectFromBipartite(CreateNXGraph(Data), "classic", ListMEPs(Data)) for Data in Data_lista]

In [33]:
Deg_Cent_Lista = [Degree_Centralities(graf) for graf in Graph_Lista_Normal]
Eig_Cent_Lista = [Eigenvector_Centralities(graf) for graf in Graph_Lista_Normal]
Btw_Cent_Lista = [Betweenness_Centralities(graf) for graf in Graph_Lista_Normal]

In [47]:
def Pandas_from_Centralities(Cent_lista):
    """
    :input Cent_lista: list of dictionaries, each containing the centrality values of the MEPs for a give period 
    :return DF_centrality: A pandas dataframe containing centralities for each period
    """
    List_Of_All_MEPs = ListMEPs(Entire_Dataset)
    List_of_List = []
    for Sublista in Cent_lista:
        TMP_List = []
        for MEP in List_Of_All_MEPs:
            try:
                TMP_List.append(Sublista[MEP])
            except KeyError:  # If the MEP was not in this period's connectivity graph
                TMP_List.append(0)
        List_of_List.append(TMP_List)
        
    Column_Names = ["MEPName"] + ["Interval_"+str(i) for i in range(len(List_of_List))]
    #creating the dictionary that the pandas wil recognise
    Data_Dict = {"MEPName":List_Of_All_MEPs}
    for i in range(1, len(Column_Names)):
        Data_Dict[Column_Names[i]] = List_of_List[i-1]

    DF_centrality = pd.DataFrame(Data_Dict)
    
    return DF_centrality

#print(Pandas_from_Centralities(Deg_Cent_Lista))
    

Creating an average and sorting them by value

In [48]:
Eig_df = Pandas_from_Centralities(Eig_Cent_Lista)


Let us look at betweenness centrality first

In [None]:
x = [Btw_Cent_2020[i][0] for i in range(len(Btw_Cent_2020[:10]))] # top 10 MEP according to betweenness centrality in 2020
y1 = [Btw_Cent_2020_UnSort[x[i]] for i in range(len(x)) ]         #betweenness centrality values top 10 MEP in 2020
y2=[]                                                             #betweenness centrality values top 10 MEP in 2021
for i in range(len(x)):
    try: 
        y2.append(Btw_Cent_2021_UnSort[x[i]])
    except: 
        y2.append(0)
          

print(Btw_Cent_2021[:10])
print(x)
print(y1)
print(y2)

In [None]:
X_axis = np.arange(len(x))

plt.bar(X_axis - 0.2, y1, 0.4, label = '2020 Centrality')
plt.bar(X_axis + 0.2, y2, 0.4, label = '2021 Centrality')
  
plt.xticks(X_axis, x, rotation = 90)
plt.xlabel("MEPs")
plt.ylabel("Betweenness centrality")
plt.title("Betweenness centrality changes:")
plt.legend()
plt.show()

Basically the above graph represents the betweenness centrality changes for the top 10 EMPs (according to measurements from 2020)

Let us also look at Eigenvector Centrality but now let us observe the centrality measures in 2020, for the top 10 most influential MEPs (according to 2021 measurements). That is who the benificiaries of the changes were from 2020 to 2021.

In [None]:
x = [Eig_Cent_2021[i][0] for i in range(len(Eig_Cent_2021[:10]))] # top 10 MEP according to eigenvector centrality in 2021
y2 = [Eig_Cent_2021_UnSort[x[i]] for i in range(len(x)) ]         #eigenvector centrality values top 10 MEP in 2021
y1=[]                                                             #eigenvector centrality values top 10 MEP in 2020
for i in range(len(x)):
    try: 
        y1.append(Eig_Cent_2020_UnSort[x[i]])
    except: 
        y1.append(0)
          

print(Eig_Cent_2021[:10])
print(x)
print(y1)
print(y2)

In [None]:
X_axis = np.arange(len(x))

plt.bar(X_axis - 0.2, y1, 0.4, label = '2020 Centrality')
plt.bar(X_axis + 0.2, y2, 0.4, label = '2021 Centrality')
  
plt.xticks(X_axis, x, rotation = 90)
plt.xlabel("MEPs")
plt.ylabel("Eigenvector centrality")
plt.title("Eigenvector centrality changes:")
plt.legend()
plt.show()