# Dependencies

In [1]:
!pip install wikidata_plain_sparql

Collecting wikidata_plain_sparql
  Downloading wikidata_plain_sparql-0.0.6-py3-none-any.whl (5.1 kB)
Installing collected packages: wikidata-plain-sparql
Successfully installed wikidata-plain-sparql-0.0.6


In [2]:
import wikidata_plain_sparql as wikidata
import pandas as pd
import numpy as np
import random
import networkx as nx
from tqdm import tqdm
import re
import matplotlib.pyplot as plt

# Pengambilan Data

In [3]:
df = wikidata.query('''
SELECT ?dramaLabel ?artistLabel
WHERE{
  ?a wdt:P31 wd:Q5398426; 
     wdt:P495 wd:Q884; 
     rdfs:label ?dramaLabel;
     wdt:P161 ?b .
  ?b rdfs:label ?artistLabel;
  MINUS{?a wdt:P136 wd:Q182415 .}
  FILTER(LANG(?dramaLabel) = "en")
  FILTER(LANG(?artistLabel) = "en")
}
''')

In [4]:
df

Unnamed: 0,dramaLabel,artistLabel
0,Faith,Lee Min Ho
1,Dae Jang Geum,Yang Mi-kyung
2,Dae Jang Geum,Hong Ri-na
3,Dae Jang Geum,Im Ho
4,Dae Jang Geum,Ji Jin-hee
...,...,...
2841,Childless Comfort,Lee Soon-jae
2842,Cheongdam-dong Alice,So I-hyeon
2843,Cheongdam-dong Alice,Kim Ji-seok
2844,Cheongdam-dong Alice,Park Shi-hoo


In [5]:
artistName = list(set(df['artistLabel']))
dramaName = list(set(df['dramaLabel']))

In [6]:
len(artistName)

1317

# Pembuatan graph

In [7]:
dramaAndArtis = {}
for i in dramaName:
  dramaAndArtis[i] = []

for index, row in df.iterrows():
  tmp = dramaAndArtis[row['dramaLabel']]
  tmp.append(row['artistLabel'])
  dramaAndArtis[row['dramaLabel']] = tmp

In [8]:
# Adjecency List
graphArtis = {}

for i in artistName:
  a = df[df['artistLabel'] == i]
  tmp = []
  for j in a['dramaLabel']:
    tmp = tmp + dramaAndArtis[j]
  coStaring = list(set(tmp))
  coStaring.remove(i)
  graphArtis[i] = coStaring

In [9]:
# capture nodes in 2 separate lists
node_list_1 = []
node_list_2 = []
for i in (graphArtis):
  for j in (graphArtis[i]):
    node_list_1.append(i)
    node_list_2.append(j)

artis_edge = pd.DataFrame({'source': node_list_1, 'target': node_list_2})

In [117]:
artis_edge.to_csv("KoreanArtistEdges.csv")

In [11]:
!pip install stellargraph

Collecting stellargraph
  Downloading stellargraph-1.2.1-py3-none-any.whl (435 kB)
[?25l[K     |▊                               | 10 kB 13.4 MB/s eta 0:00:01[K     |█▌                              | 20 kB 13.4 MB/s eta 0:00:01[K     |██▎                             | 30 kB 8.9 MB/s eta 0:00:01[K     |███                             | 40 kB 8.0 MB/s eta 0:00:01[K     |███▊                            | 51 kB 5.3 MB/s eta 0:00:01[K     |████▌                           | 61 kB 5.3 MB/s eta 0:00:01[K     |█████▎                          | 71 kB 5.5 MB/s eta 0:00:01[K     |██████                          | 81 kB 6.1 MB/s eta 0:00:01[K     |██████▊                         | 92 kB 4.7 MB/s eta 0:00:01[K     |███████▌                        | 102 kB 5.1 MB/s eta 0:00:01[K     |████████▎                       | 112 kB 5.1 MB/s eta 0:00:01[K     |█████████                       | 122 kB 5.1 MB/s eta 0:00:01[K     |█████████▉                      | 133 kB 5.1 MB/s eta 0:

In [12]:
import stellargraph as SG
from stellargraph import StellarGraph

In [13]:
# create graph
artists = nx.from_pandas_edgelist(artis_edge, "source", "target", create_using=nx.Graph())
artists_graph = StellarGraph.from_networkx(artists)

In [14]:
print(artists_graph.info())

StellarGraph: Undirected multigraph
 Nodes: 1230, Edges: 13869

 Node types:
  default: [1230]
    Features: none
    Edge types: default-default->default

 Edge types:
    default-default->default: [13869]
        Weights: all 1 (default)
        Features: none


# Link Prediction Model

reference: https://stellargraph.readthedocs.io/en/stable/demos/link-prediction/node2vec-link-prediction.html


In [15]:
from stellargraph.data import EdgeSplitter
from stellargraph.mapper import FullBatchLinkGenerator
from stellargraph.layer import GCN, LinkEmbedding


from tensorflow import keras
from sklearn import preprocessing, feature_extraction, model_selection

from stellargraph import globalvar
from stellargraph import datasets
from IPython.display import display, HTML
%matplotlib inline

In [16]:
# Define an edge splitter on the reduced graph G_test:
edge_splitter_test = EdgeSplitter(artists_graph)

# Randomly sample a fraction p=0.1 of all positive links, and same number of negative links, from G, and obtain the
# reduced graph G_test with the sampled links removed:
G_test, edge_ids_test, edge_labels_test = edge_splitter_test.train_test_split(
    p=0.1, method="global", keep_connected=True
)

** Sampled 1386 positive and 1386 negative edges. **


In [17]:
# Define an edge splitter on the reduced graph G_test:
edge_splitter_train = EdgeSplitter(G_test)

# Randomly sample a fraction p=0.1 of all positive links, and same number of negative links, from G_test, and obtain the
# reduced graph G_train with the sampled links removed:
G_train, edge_ids_train, edge_labels_train = edge_splitter_train.train_test_split(
    p=0.1, method="global", keep_connected=True
)

(
    examples_train,
    examples_model_selection,
    labels_train,
    labels_model_selection,
) = model_selection.train_test_split(edge_ids_train, edge_labels_train, train_size=0.75, test_size=0.25)

** Sampled 1248 positive and 1248 negative edges. **


In [55]:
print(examples_model_selection[1][0])

Choi Ji-woo


In [18]:
pd.DataFrame(
    [
        (
            "Training Set",
            len(examples_train),
            "Train Graph",
            "Test Graph",
            "Train the Link Classifier",
        ),
        (
            "Model Selection",
            len(examples_model_selection),
            "Train Graph",
            "Test Graph",
            "Select the best Link Classifier model",
        ),
        (
            "Test set",
            len(edge_ids_test),
            "Test Graph",
            "Full Graph",
            "Evaluate the best Link Classifier",
        ),
    ],
    columns=("Split", "Number of Examples", "Hidden from", "Picked from", "Use"),
).set_index("Split")

Unnamed: 0_level_0,Number of Examples,Hidden from,Picked from,Use
Split,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Training Set,1872,Train Graph,Test Graph,Train the Link Classifier
Model Selection,624,Train Graph,Test Graph,Select the best Link Classifier model
Test set,2772,Test Graph,Full Graph,Evaluate the best Link Classifier


In [19]:
import multiprocessing

In [20]:
p = 1.0
q = 1.0
dimensions = 128
num_walks = 10
walk_length = 80
window_size = 10
num_iter = 1
workers = multiprocessing.cpu_count()

In [21]:
from stellargraph.data import BiasedRandomWalk
from gensim.models import Word2Vec


def node2vec_embedding(graph, name):
    rw = BiasedRandomWalk(graph)
    walks = rw.run(graph.nodes(), n=num_walks, length=walk_length, p=p, q=q)
    print(f"Number of random walks for '{name}': {len(walks)}")

    model = Word2Vec(
        walks,
        size=dimensions,
        window=window_size,
        min_count=0,
        sg=1,
        workers=workers,
        iter=num_iter,
    )

    def get_embedding(u):
        return model.wv[u]

    return get_embedding

In [22]:
embedding_train = node2vec_embedding(G_train, "Train Graph")

Number of random walks for 'Train Graph': 12300


In [23]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegressionCV
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import StandardScaler


# 1. link embeddings
def link_examples_to_features(link_examples, transform_node, binary_operator):
    return [
        binary_operator(transform_node(src), transform_node(dst))
        for src, dst in link_examples
    ]


# 2. training classifier
def train_link_prediction_model(
    link_examples, link_labels, get_embedding, binary_operator
):
    clf = link_prediction_classifier()
    link_features = link_examples_to_features(
        link_examples, get_embedding, binary_operator
    )
    clf.fit(link_features, link_labels)
    return clf


def link_prediction_classifier(max_iter=2000):
    lr_clf = LogisticRegressionCV(Cs=10, cv=10, scoring="roc_auc", max_iter=max_iter)
    return Pipeline(steps=[("sc", StandardScaler()), ("clf", lr_clf)])


# 3. and 4. evaluate classifier
def evaluate_link_prediction_model(
    clf, link_examples_test, link_labels_test, get_embedding, binary_operator
):
    link_features_test = link_examples_to_features(
        link_examples_test, get_embedding, binary_operator
    )
    score = evaluate_roc_auc(clf, link_features_test, link_labels_test)
    return score


def evaluate_roc_auc(clf, link_features, link_labels):
    predicted = clf.predict_proba(link_features)

    # check which class corresponds to positive links
    positive_column = list(clf.classes_).index(1)
    return roc_auc_score(link_labels, predicted[:, positive_column])

In [24]:
def operator_hadamard(u, v):
    return u * v


def operator_l1(u, v):
    return np.abs(u - v)


def operator_l2(u, v):
    return (u - v) ** 2


def operator_avg(u, v):
    return (u + v) / 2.0


def run_link_prediction(binary_operator):
    clf = train_link_prediction_model(
        examples_train, labels_train, embedding_train, binary_operator
    )
    score = evaluate_link_prediction_model(
        clf,
        examples_model_selection,
        labels_model_selection,
        embedding_train,
        binary_operator,
    )

    return {
        "classifier": clf,
        "binary_operator": binary_operator,
        "score": score,
        "model": clf
    }


binary_operators = [operator_hadamard, operator_l1, operator_l2, operator_avg]

In [25]:
results = [run_link_prediction(op) for op in binary_operators]
best_result = max(results, key=lambda result: result["score"])

print(f"Best result from '{best_result['binary_operator'].__name__}'")

pd.DataFrame(
    [(result["binary_operator"].__name__, result["score"]) for result in results],
    columns=("name", "ROC AUC score"),
).set_index("name")

{'classifier': Pipeline(steps=[('sc', StandardScaler()),
                ('clf',
                 LogisticRegressionCV(cv=10, max_iter=2000,
                                      scoring='roc_auc'))]), 'binary_operator': <function operator_l2 at 0x7fac1d4509e0>, 'score': 0.9678397746316688, 'model': Pipeline(steps=[('sc', StandardScaler()),
                ('clf',
                 LogisticRegressionCV(cv=10, max_iter=2000,
                                      scoring='roc_auc'))])}
Best result from 'operator_l2'


Unnamed: 0_level_0,ROC AUC score
name,Unnamed: 1_level_1
operator_hadamard,0.949446
operator_l1,0.967017
operator_l2,0.96784
operator_avg,0.699074


In [30]:
print()

Pipeline(steps=[('sc', StandardScaler()),
                ('clf',
                 LogisticRegressionCV(cv=10, max_iter=2000,
                                      scoring='roc_auc'))])


In [27]:
test_score = evaluate_link_prediction_model(
    best_result["classifier"],
    edge_ids_test, edge_labels_test,
    embedding_test,
    best_result["binary_operator"],
)
print(
    f"ROC AUC score on test set using '{best_result['binary_operator'].__name__}': {test_score}"
)

ROC AUC score on test set using 'operator_l2': 0.9758401370955483


In [31]:
best_model = best_result["model"]

## Perbandingan dengan hasil metode similarity

In [32]:
# combine all nodes in a list
node_list = node_list_1 + node_list_2

# remove duplicate items from the list
node_list = list(dict.fromkeys(node_list))

# build adjacency matrix
adj_G = nx.to_numpy_matrix(artists, nodelist = node_list)

# get unconnected node-pairs
all_unconnected_pairs = []

# traverse adjacency matrix
for i in tqdm(range(adj_G.shape[0])):
  for j in range(0,i):
    if adj_G[i,j] == 0:
      all_unconnected_pairs.append([node_list[i],node_list[j]])

100%|██████████| 1230/1230 [00:05<00:00, 229.64it/s]


### Jaccard Similarity

In [35]:
def jaccardSimilarity(A,B):
  neighborsA = list(artists.neighbors(A))
  neighborsB = list(artists.neighbors(B))
  intersection = set(neighborsA) & set(neighborsB)
  union = set(neighborsA) | set(neighborsB)
  jaccScore = len(intersection) / len(union)
  return jaccScore

In [36]:
score_jaccard = {}

for i in all_unconnected_pairs:
  tmp = jaccardSimilarity(i[0],i[1])
  score_jaccard[i[0] + ' - ' + i[1]] = tmp

In [37]:
score_jaccard.items()[0]

Output hidden; open in https://colab.research.google.com to view.

In [None]:
score = pd.DataFrame(score_jaccard.items(), columns=['Edge', 'Score'])
scoreFinalJaccard = score.sort_values(by='Score', ascending=False).reset_index(drop=True)
scoreFinalJaccard.head(20)

In [41]:
top20_list_1 = []
top20_list_2 = [] 
for idx, row in scoreFinalJaccard.head(20).iterrows():
  a, b = row["Edge"].split(" - ")
  top20_list_1.append(a)
  top20_list_2.append(b)
print(top20_list_1)
print(top20_list_2)  

['Cha Hwa-yeon', 'Baek Jin-hee', 'Go Yoon', 'Yoo Se-yoon', 'Jun Hyo-seong', 'Park Ha-na', 'Jun Hyun-moo', 'Jun Hyun-moo', 'Jun Hyun-moo', 'Kim Hee-ae', 'Yoo Se-yoon', 'Jang Shin-young', 'Ha Yeon-ju', 'Ahn Jae-mo', 'Jang Jin-young', 'Kim Sang-joong', 'Ahn Jae-mo', 'Seo Jun-young', 'Lee Sun-bin', 'Won Bin']
['Kim Yu-mi', 'Nam Sang-mi', 'Seo Hyun-woo', 'Kang Ho-dong', 'Eric Mun', 'Kim Suk-hoon', 'Song Min-ho', 'Kang Ho-dong', 'Cho Kyuhyun', 'Jin Tae-hyun', 'Song Min-ho', 'Ma Dong-seok', 'Eric Mun', 'Lee Kwang-ki', 'Chae Rim', 'Jin Tae-hyun', 'Lee Yeong-ho', 'Kim Suk-hoon', 'Seo Hyun-woo', 'Kim Seong-su']


In [43]:
top20_edge = pd.DataFrame({'source': top20_list_1, 'target': top20_list_2})
top20_edge.head()

Unnamed: 0,source,target
0,Cha Hwa-yeon,Kim Yu-mi
1,Baek Jin-hee,Nam Sang-mi
2,Go Yoon,Seo Hyun-woo
3,Yoo Se-yoon,Kang Ho-dong
4,Jun Hyo-seong,Eric Mun


In [84]:
def getNPArrayFromEdges(df):
  node_array = []
  for idx, row in df.iterrows():
    obj = []
    obj.append(row.source)
    obj.append(row.target)
    np_obj = np.array(obj)
    node_array.append(np_obj)
  np_node = np.array(node_array)
  return np_node

In [85]:
np_of_top20_jaccard = getNPArrayFromEdges(top20_edge)

In [81]:
#menggunakan opperator l2 karena merupakan yang terbaik
link_features_predict = link_examples_to_features(
        np_node, embedding_test, binary_operators[2]
    )

In [82]:
result = best_model.predict(link_features_predict)

In [None]:
pdResult = pd.DataFrame(result, columns = ["Hasil Prediksi"])
pdResult

In [80]:
final_result = pd.concat([scoreFinalJaccard.head(20), pdResult], axis = 1)
final_result

Unnamed: 0,Edge,Score,Hasil Prediksi
0,Cha Hwa-yeon - Kim Yu-mi,0.333333,0
1,Baek Jin-hee - Nam Sang-mi,0.333333,1
2,Go Yoon - Seo Hyun-woo,0.25,1
3,Yoo Se-yoon - Kang Ho-dong,0.25,1
4,Jun Hyo-seong - Eric Mun,0.25,1
5,Park Ha-na - Kim Suk-hoon,0.25,1
6,Jun Hyun-moo - Song Min-ho,0.25,1
7,Jun Hyun-moo - Kang Ho-dong,0.25,1
8,Jun Hyun-moo - Cho Kyuhyun,0.25,1
9,Kim Hee-ae - Jin Tae-hyun,0.25,1


### Adamic Measure

In [88]:
import math

In [89]:
def adamicAdar(A,B):
  neighborsA = list(artists.neighbors(A))
  neighborsB = list(artists.neighbors(B))
  intersection = set(neighborsA) & set(neighborsB)
  score = 0
  for i in intersection:
    tmp = 1/math.log(len(list(artists.neighbors(i))))
    score = score + tmp
  return score

In [90]:
score_adamadar = {}
for i in all_unconnected_pairs:
  tmp = adamicAdar(i[0],i[1])
  score_adamadar[i[0] + ' - ' + i[1]] = tmp

In [91]:
score = pd.DataFrame(score_adamadar.items(), columns=['Edge', 'Score'])

In [92]:
scoreFinalAdamadar = score.sort_values(by='Score', ascending=False).reset_index(drop=True)

In [93]:
scoreFinalAdamadar.head(20)

Unnamed: 0,Edge,Score
0,Um Hyo-sup - Kang Sin-il,4.762552
1,Choi Woong - Im Se-mi,3.960515
2,Kim Soo-hyun - Chun Jung-myung,3.83231
3,Choi Woong - Yoo Yeon-seok,3.734029
4,Jeong Man-sik - Um Hyo-sup,3.633643
5,Kim Chang-wan - Chun Jung-myung,3.575241
6,Choi Woong - Kim Dong-gyun,3.513405
7,Moon Chae-won - Chun Jung-myung,3.509008
8,Yoo In-na - Moon Chae-won,3.482441
9,Daniel L - Moon Chae-won,3.470074


In [100]:
top20_adamic_list_1 = []
top20_adamic_list_2 = [] 
for idx, row in scoreFinalAdamadar.head(20).iterrows():
  a, b = row["Edge"].split(" - ")
  top20_adamic_list_1.append(a)
  top20_adamic_list_2.append(b)
top20_adamic_edge = pd.DataFrame({'source': top20_adamic_list_1, 'target': top20_adamic_list_2})

In [101]:
np_adamic = getNPArrayFromEdges(top20_adamic_edge)
print(np_adamic)

[['Um Hyo-sup' 'Kang Sin-il']
 ['Choi Woong' 'Im Se-mi']
 ['Kim Soo-hyun' 'Chun Jung-myung']
 ['Choi Woong' 'Yoo Yeon-seok']
 ['Jeong Man-sik' 'Um Hyo-sup']
 ['Kim Chang-wan' 'Chun Jung-myung']
 ['Choi Woong' 'Kim Dong-gyun']
 ['Moon Chae-won' 'Chun Jung-myung']
 ['Yoo In-na' 'Moon Chae-won']
 ['Daniel L' 'Moon Chae-won']
 ['Kim So-hyun' 'Kim Gap-su']
 ['Jo Jung-suk' 'Um Hyo-sup']
 ['Kim Chang-wan' 'Lee Seung-gi']
 ['Jo Jung-suk' 'Chun Jung-myung']
 ['Jin Kyeong' 'Park Yeong-gyu']
 ['Um Hyo-sup' 'Kim Gap-su']
 ['Choi Woong' 'Kim Gap-su']
 ['Kang Sin-il' 'Kim Hee-won']
 ['Jo Jung-suk' 'Jo Seong-ha']
 ['Choi Woong' 'Moon Chae-won']]


In [102]:
#menggunakan opperator l2 karena merupakan yang terbaik
link_features_predict = link_examples_to_features(
        np_adamic, embedding_test, binary_operators[2]
    )

In [103]:
result = best_model.predict(link_features_predict)

In [None]:
pdResult = pd.DataFrame(result, columns = ["Hasil Prediksi"])
pdResult

In [105]:
final_result = pd.concat([scoreFinalAdamadar.head(20), pdResult], axis = 1)
final_result

Unnamed: 0,Edge,Score,Hasil Prediksi
0,Um Hyo-sup - Kang Sin-il,4.762552,0
1,Choi Woong - Im Se-mi,3.960515,1
2,Kim Soo-hyun - Chun Jung-myung,3.83231,0
3,Choi Woong - Yoo Yeon-seok,3.734029,1
4,Jeong Man-sik - Um Hyo-sup,3.633643,0
5,Kim Chang-wan - Chun Jung-myung,3.575241,0
6,Choi Woong - Kim Dong-gyun,3.513405,0
7,Moon Chae-won - Chun Jung-myung,3.509008,0
8,Yoo In-na - Moon Chae-won,3.482441,0
9,Daniel L - Moon Chae-won,3.470074,0


### Preferential Attachment

In [106]:
def preferential(A,B):
  G = artists
  neighborsA = len(list(G.neighbors(A)))
  neighborsB = len(list(G.neighbors(B)))
  return neighborsA * neighborsB

In [107]:
score_preferential = {}
for i in all_unconnected_pairs:
  tmp = preferential(i[0],i[1])
  score_preferential[i[0] + ' - ' + i[1]] = tmp

In [108]:
score = pd.DataFrame(score_preferential.items(), columns=['Edge', 'Score'])

In [109]:
scoreFinalPreferential = score.sort_values(by='Score', ascending=False).reset_index(drop=True)

In [110]:
scoreFinalPreferential.head(10)

Unnamed: 0,Edge,Score
0,Choi Woong - Chun Jung-myung,21978
1,Jung Dong-hwan - Chun Jung-myung,20394
2,Kim So-hyun - Um Hyo-sup,20160
3,Moon Chae-won - Chun Jung-myung,19800
4,Kwak Dong-yeon - Chun Jung-myung,19008
5,Kim Soo-hyun - Chun Jung-myung,18414
6,Jo Jung-suk - Chun Jung-myung,18414
7,Na Young-hee - Chun Jung-myung,18018
8,Um Hyo-sup - Kang Sin-il,17640
9,Jo Seong-ha - Chun Jung-myung,17226


In [111]:
top20_1 = []
top20_2 = [] 
for idx, row in scoreFinalPreferential.head(20).iterrows():
  a, b = row["Edge"].split(" - ")
  top20_1.append(a)
  top20_2.append(b)
top20_edge = pd.DataFrame({'source': top20_1, 'target': top20_2})

In [112]:
np_pref = getNPArrayFromEdges(top20_edge)
print(np_pref)

[['Choi Woong' 'Chun Jung-myung']
 ['Jung Dong-hwan' 'Chun Jung-myung']
 ['Kim So-hyun' 'Um Hyo-sup']
 ['Moon Chae-won' 'Chun Jung-myung']
 ['Kwak Dong-yeon' 'Chun Jung-myung']
 ['Kim Soo-hyun' 'Chun Jung-myung']
 ['Jo Jung-suk' 'Chun Jung-myung']
 ['Na Young-hee' 'Chun Jung-myung']
 ['Um Hyo-sup' 'Kang Sin-il']
 ['Jo Seong-ha' 'Chun Jung-myung']
 ['Yoo In-na' 'Chun Jung-myung']
 ['Kim Chang-wan' 'Chun Jung-myung']
 ['Chun Jung-myung' 'Kim Dong-gyun']
 ['Daniel L' 'Chun Jung-myung']
 ['Um Hyo-sup' 'Kwak Dong-yeon']
 ['Song Jong-ho' 'Chun Jung-myung']
 ['Jo Jung-suk' 'Um Hyo-sup']
 ['Ahn Nae-sang' 'Um Hyo-sup']
 ['Lee Seung-joon' 'Chun Jung-myung']
 ['Kim Ji-won' 'Chun Jung-myung']]


In [113]:
#menggunakan opperator l2 karena merupakan yang terbaik
link_features_predict = link_examples_to_features(
        np_pref, embedding_test, binary_operators[2]
    )

In [114]:
result = best_model.predict(link_features_predict)

In [115]:
pdResult = pd.DataFrame(result, columns = ["Hasil Prediksi"])
pdResult

Unnamed: 0,Hasil Prediksi
0,0
1,0
2,0
3,0
4,0
5,0
6,0
7,0
8,0
9,0


In [116]:
final_result = pd.concat([scoreFinalPreferential.head(20), pdResult], axis = 1)
final_result

Unnamed: 0,Edge,Score,Hasil Prediksi
0,Choi Woong - Chun Jung-myung,21978,0
1,Jung Dong-hwan - Chun Jung-myung,20394,0
2,Kim So-hyun - Um Hyo-sup,20160,0
3,Moon Chae-won - Chun Jung-myung,19800,0
4,Kwak Dong-yeon - Chun Jung-myung,19008,0
5,Kim Soo-hyun - Chun Jung-myung,18414,0
6,Jo Jung-suk - Chun Jung-myung,18414,0
7,Na Young-hee - Chun Jung-myung,18018,0
8,Um Hyo-sup - Kang Sin-il,17640,0
9,Jo Seong-ha - Chun Jung-myung,17226,0
