# Centrality with nx-package

In [17]:
import networkx as nx
import pandas as pd
import igraph

#### Data loading (From node, To node, Weight, Distance)

In [4]:
Topic = pd.read_csv('topic_citation.csv')

In [5]:
Topic.head()

Unnamed: 0,Original_Topic,Cited_Topic,weight,distance
0,1,2,41,0.02439
1,1,3,32,0.03125
2,1,4,41,0.02439
3,1,5,10,0.1
4,1,6,9,0.111111


#### Data로부터 NetworkX 그래프 형성

In [6]:
G = nx.from_pandas_edgelist(Topic, 'Original_Topic', 'Cited_Topic', ["weight", "distance"],create_using=nx.DiGraph())

#### Weighted In-degree

In [7]:
in_degree = G.in_degree(G, weight='weight')

In [8]:
in_degree = [val for (node, val) in in_degree]

In [11]:
in_degree[0:5]

[1087, 753, 1399, 1477, 298]

#### Weighted Out-degree

In [12]:
out_degree = G.out_degree(G, weight='weight')

In [13]:
out_degree = [val for (node, val) in out_degree]

In [14]:
out_degree[0:5]

[874, 1333, 971, 572, 469]

#### Weighted Betweenness (이때, weight는 위에와 다르게 distance로 정의)

In [22]:
Betweenness = nx.betweenness_centrality(G, weight='distance')
Betweenness = [Betweenness[i] for i in range(1,37)]

In [24]:
Betweenness[0:5]

[0.0, 0.0, 0.0, 0.02689075630252101, 0.0]

#### Weighted Closeness (이때, weight는 위에와 다르게 distance로 정의)

In [26]:
Closeness = nx.closeness_centrality(G, distance='distance')
Closeness = [Closeness[i] for i in range(1,37)]

In [27]:
Closeness[0:5]

[44.78717523241349,
 46.652735089198806,
 37.53782527414405,
 38.979758811140414,
 23.999862413179134]

#### Weighted Eigenvector Centrality (이때, weight는 위에와 다르게 다시 Weight로 정의)

In [28]:
Eigen = nx.eigenvector_centrality(G, max_iter=1000, weight='weight')
Eigen = [Eigen[i] for i in range(1,37)]

In [29]:
Eigen[0:5]

[0.23837325265511863,
 0.13848748761858168,
 0.24628611114117777,
 0.2976268508376953,
 0.03468196452245038]

#### Weighted Pagerank (이때, weight는 위에와 다르게 다시 Weight로 정의)

In [30]:
Pagerank = nx.pagerank(G, weight='weight')
Pagerank = [Pagerank[i] for i in range(1,37)]

In [31]:
Pagerank[0:5]

[0.03366632286686227,
 0.0269458704921563,
 0.04771967060191496,
 0.04621521575371496,
 0.013626447416539704]

## Large Graph (igraph 활용: 빠른 속도)

In [33]:
Applicant = pd.read_csv('applicant_citations.csv')

In [34]:
Applicant.head()

Unnamed: 0,Original_Applicant,Cited_Applicant,weight,distance
0,11i Networks Inc.,FUJITSU LTD,1,1.0
1,"1A Smart Start, Inc.",FUJITSU LTD,1,1.0
2,"1A Smart Start, Inc.",Intel Co.,1,1.0
3,"1PerfectID, Inc.",FUJITSU LTD,2,0.5
4,"1PerfectID, Inc.",Intel Co.,2,0.5


#### Igraph 패키지의 그래프 생성 (Tuple 사용) Betweeness와 Closeness는 distance로 정의

In [43]:
tuples = [tuple(x) for x in Applicant.values]
Gm = igraph.Graph.TupleList(tuples, directed = True, edge_attrs = ['distance'])

#### 필요한 20개의 노드 정의

In [15]:
subset_nodes = ["AMERICAN EXPRESS TRAVEL RELATED SERVICES COMPANY, INC.", "Apple Inc.", "AT&T Intellectual Property", "Boe Technology Group Co.", "Canon", "Digimarc Corporation", "ETRI",  "FUJITSU LTD",  "Google Inc.","Hitachi, Ltd.","Honeywell International Inc.","IBM", "Intel Co." ,"LG CO.","Microsoft Co.", "NEC Co.","Panasonic","Qualcomm Inc.","Samsung","SONY"]

#### Betweeness Centrality

In [45]:
Betweenness = Gm.betweenness(vertices=subset_nodes, directed=True, cutoff=None, weights=Gm.es["distance"], nobigint=True)

#### Closeness Centrality

In [48]:
Closeness = Gm.closeness(vertices=subset_nodes, cutoff=None, weights=Gm.es["distance"])

#### Igraph 패키지의 그래프 생성 (Tuple 사용) Degree, Eigenvector는 weight로 정의

In [49]:
tuples = [tuple(x) for x in Applicant.values]
Gm = igraph.Graph.TupleList(tuples, directed = True, edge_attrs = ['weight'])

#### In-degree, Out-degree

In [50]:
in_degree = Gm.strength(vertices=subset_nodes, mode="IN", weights = Gm.es["weight"])
out_degree = Gm.strength(vertices=subset_nodes, mode="OUT", weights = Gm.es["weight"])

#### Eigenvector

In [52]:
eigen = Gm.eigenvector_centrality(directed=True, weights=Gm.es["weight"])
subset_indexes = [Gm.vs["name"].index(x) for x in subset_nodes]
eigen = [eigen[index] for index in subset_indexes]

  """Entry point for launching an IPython kernel.


#### 결과를 CSV로 저장

In [54]:
Applicant_result = pd.DataFrame({'Author_id': subset_nodes,
                             'In-Degree': in_degree,
                             'Out-Degree': out_degree,
                             'Betweenness': Betweenness,
                             'Closeness': Closeness,
                             'Eigen': eigen
                            })

In [None]:
Applicant_result.to_csv('Applicant_result.csv', index = False)