In [1]:
import requests
from bs4 import BeautifulSoup

In [2]:
def get_article(url):
    # Obtain three types of information about a news article
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'lxml')
    news_title = soup.title.text
    publisher = soup.find('meta', attrs={'name':'twitter:creator'}).get('content')
    news_content = soup.find('div', attrs = {'id':'articleBodyContents'}).text
    news_content = news_content.split('{}')[1].strip()
    return news_title, publisher, news_content

In [3]:
url = 'https://news.naver.com/main/read.nhn?oid=421&sid1=100&aid=0003646082&mid=shm&mode=LSD&nh=20181018225255'
title, publisher, content = get_article(url)

In [6]:
publisher

'뉴스1'

In [5]:
stopwords = ['재테크','배포','금지', '기자', 'co','kr','나가기','페이스북','com', '.kr', '뉴스1']

In [6]:
from kornounextractor.noun_extractor import extract

In [7]:
with open('dic.txt', 'w', encoding='utf8') as f:
    for word in sorted(extract(content, freq=2.0)):
        f.write(word+'\tNNG\n')

In [8]:
import konlpy.tag
komoran = konlpy.tag.Komoran(userdic='dic.txt')

In [9]:
Nouns = komoran.nouns(content)
final_nouns = Nouns.copy()
unique_nouns = set(Nouns)
for word in unique_nouns:
    if len(word) == 1:
        while word in final_nouns:
            final_nouns.remove(word)
    if word in stopwords:
        while word in final_nouns:
            final_nouns.remove(word)

In [10]:
print(final_nouns)

['아셈', '계기', '영국', '독일', '태국', '정상회담', '대북제재', '완화', '설득', '가속', '문재인', '대통령', '오후', '현지시간', '바티칸', '교황청', '방문', '프란치스코', '교황', '악수', '청와대', '바티칸', '소영', '프란치스코', '교황', '현지시간', '문재인', '대통령', '만남', '사실', '방북', '의사', '대통령', '한반도', '평화프로세스', '추진', '상당', '대통령', '교황', '지지', '기반', '북미', '정상회담', '신속', '개최', '촉구', '국제사회', '북한', '비핵화', '촉진', '대북제재', '완화', '일정', '정도', '필요', '주장', '대통령', '이날', '오후', '바티칸', '교황', '교황', '서재', '동안', '프란치스코', '교황', '단독', '예방', '교황', '만남', '원활', '의사소통', '통역', '배석', '비밀', '원칙', '이날', '예방', '통역', '자리', '한현택', '신부', '배석', '다만', '사전', '청와대', '바티칸', '사이', '협의', '이례', '면담', '내용', '공개', '윤영', '청와대', '국민', '소통', '수석', '교황', '대통령', '김정은', '북한', '국무위원', '장의', '방북', '초청', '의사', '위원장', '초청장', '공식', '초청장', '교황', '초청장', '응답', '있다', '북한', '있다', '언급', '문맥', '북한', '해석', '교황', '한반도', '평화프로세스', '추진', '한국', '정부', '노력', '지지', '말라', '해석', '수석', '교황', '방북', '시점', '언급', '시점', '한편', '교황', '원론', '의미', '풀이', '교황', '방북', '자체', '의의', '대통령', '추진', '유럽', '한반도', '평화프로세스', '설득', '작업', '모양새', '감지', '대

In [11]:
from kr_sna import do_kr_sna

In [12]:
g = do_kr_sna(content, final_nouns, stopwords)

In [13]:
g.nodes()

NodeView(('교황', '대통령', '한반도', '바티칸', '방북', '평화프로세스', '북한', '영국', '독일', '대북제재', '완화', '정상회담', '설득', '프란치스코', '청와대', '의사', '추진', '초청장', '프랑스', '안보리'))

In [14]:
import networkx as nx

In [15]:
g['교황']

AtlasView({'영국': {'weight': 1}, '설득': {'weight': 1}, '프란치스코': {'weight': 3}, '대북제재': {'weight': 2}, '독일': {'weight': 1}, '정상회담': {'weight': 2}, '대통령': {'weight': 6}, '완화': {'weight': 2}, '바티칸': {'weight': 4}, '방북': {'weight': 5}, '한반도': {'weight': 3}, '의사': {'weight': 2}, '평화프로세스': {'weight': 3}, '추진': {'weight': 2}, '북한': {'weight': 3}, '초청장': {'weight': 2}, '청와대': {'weight': 1}})

In [17]:
x = nx.degree_centrality(g)
sorted(x.items(), key=lambda kv: kv[1], reverse=True)

[('대통령', 1.0),
 ('교황', 0.894736842105263),
 ('바티칸', 0.7894736842105263),
 ('대북제재', 0.7894736842105263),
 ('완화', 0.7894736842105263),
 ('의사', 0.7894736842105263),
 ('프란치스코', 0.7368421052631579),
 ('한반도', 0.6842105263157894),
 ('평화프로세스', 0.6842105263157894),
 ('영국', 0.6842105263157894),
 ('독일', 0.631578947368421),
 ('설득', 0.631578947368421),
 ('방북', 0.5789473684210527),
 ('정상회담', 0.5263157894736842),
 ('북한', 0.47368421052631576),
 ('추진', 0.47368421052631576),
 ('청와대', 0.3684210526315789),
 ('안보리', 0.3684210526315789),
 ('초청장', 0.3157894736842105),
 ('프랑스', 0.3157894736842105)]

In [26]:
x = nx.closeness_centrality(g, distance='weight')
sorted(x.items(), key=lambda kv: kv[1], reverse=True)

[('북한', 0.6551724137931034),
 ('바티칸', 0.6333333333333333),
 ('영국', 0.6333333333333333),
 ('프란치스코', 0.6333333333333333),
 ('독일', 0.6129032258064516),
 ('설득', 0.6129032258064516),
 ('의사', 0.6129032258064516),
 ('초청장', 0.59375),
 ('프랑스', 0.59375),
 ('방북', 0.5757575757575758),
 ('안보리', 0.5757575757575758),
 ('추진', 0.5588235294117647),
 ('청와대', 0.5428571428571428),
 ('한반도', 0.5135135135135135),
 ('평화프로세스', 0.5135135135135135),
 ('대북제재', 0.48717948717948717),
 ('완화', 0.48717948717948717),
 ('대통령', 0.4634146341463415),
 ('교황', 0.4523809523809524),
 ('정상회담', 0.4523809523809524)]

In [18]:
x = nx.betweenness_centrality(g)
sorted(x.items(), key=lambda kv: kv[1], reverse=True)

[('대통령', 0.1006845818249327),
 ('의사', 0.061897800055694785),
 ('교황', 0.05721479624988399),
 ('대북제재', 0.031121785946347343),
 ('완화', 0.031121785946347343),
 ('바티칸', 0.02806553420588508),
 ('영국', 0.018453541260558804),
 ('방북', 0.016673628515733778),
 ('프란치스코', 0.013738048825768123),
 ('북한', 0.012141464773043719),
 ('한반도', 0.009263900491970666),
 ('평화프로세스', 0.009263900491970666),
 ('독일', 0.008512020792722548),
 ('설득', 0.005601967882669637),
 ('정상회담', 0.005075187969924812),
 ('안보리', 0.0024041585445094215),
 ('청와대', 0.0019005847953216374),
 ('추진', 0.001485194467650608),
 ('프랑스', 0.0005847953216374269),
 ('초청장', 0.0)]