<a href="https://colab.research.google.com/github/pdh93621/Deep-learning/blob/NLP/Prac_LSA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from sklearn.decomposition import TruncatedSVD

In [3]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import Normalizer
import pandas as pd
import numpy as np

In [11]:
def get_dtm(sentences):
  vectorizer = CountVectorizer(min_df=1, stop_words='english', dtype=np.float32)
  dtm = vectorizer.fit_transform(sentences)
  return dtm, vectorizer

In [12]:
def print_dtm_matrix(dtm, vectorizer, sentences):
  return pd.DataFrame(dtm.toarray(), index = sentences, columns=vectorizer.get_feature_names())

In [16]:
def lsa_tsvd(n_components, dtm, vectorizer, sentences):
  lsa = TruncatedSVD(n_components, algorithm='arpack')
  dtm_lsa = lsa.fit_transform(dtm)
  dtm_lsa = Normalizer(copy=False).fit_transform(dtm_lsa)

  components = pd.DataFrame(lsa.components_, index=['components-1','components-2'], columns=vectorizer.get_feature_names())
  lsa = pd.DataFrame(dtm_lsa, index=sentences, columns=['components-1', 'components-2'])

  similarity = np.array(np.asmatrix(dtm_lsa)*np.asmatrix(dtm_lsa).T)
  similarity = pd.DataFrame(similarity, index = sentences, columns=sentences)

  return components, lsa, similarity

In [17]:
def main():
  sentences = ['중앙방역대책본부는 오늘 0시 기준 코로나19 확진 환자가 44명 추가 확인돼 모두 13,417명으로 늘었다고 밝혔습니다.', 
                 '신규 확진 환자 가운데 해외 유입 사례는 23명, 국내 발생은 21명입니다.', 
                 '국내 발생 환자는 지역별로 서울 7명, 경기 8명 등 수도권에서 15명이 확인됐고, 광주에서도 5명, 대전에서도 1명이 확진됐습니다.', 
                 '또한 코로나19로 1명이 추가 사망해 누적 사망자는 모두 289명으로 늘었습니다.']
  dtm, vectorizer = get_dtm(sentences)
  d = print_dtm_matrix(dtm, vectorizer, sentences)
  c, l, s = lsa_tsvd(n_components=2, dtm = dtm, vectorizer=vectorizer, sentences=sentences)
  return d, c,l, s


In [18]:
d, c, l, s = main()

In [19]:
d

Unnamed: 0,0시,13,15명이,1명이,21명입니다,23명,289명으로,417명으로,44명,5명,7명,8명,가운데,경기,광주에서도,국내,기준,누적,늘었다고,늘었습니다,대전에서도,또한,모두,발생,발생은,밝혔습니다,사례는,사망자는,사망해,서울,수도권에서,신규,오늘,유입,중앙방역대책본부는,지역별로,추가,코로나19,코로나19로,해외,확인돼,확인됐고,확진,확진됐습니다,환자,환자가,환자는
"중앙방역대책본부는 오늘 0시 기준 코로나19 확진 환자가 44명 추가 확인돼 모두 13,417명으로 늘었다고 밝혔습니다.",1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0
"신규 확진 환자 가운데 해외 유입 사례는 23명, 국내 발생은 21명입니다.",0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0
"국내 발생 환자는 지역별로 서울 7명, 경기 8명 등 수도권에서 15명이 확인됐고, 광주에서도 5명, 대전에서도 1명이 확진됐습니다.",0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0
또한 코로나19로 1명이 추가 사망해 누적 사망자는 모두 289명으로 늘었습니다.,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
c

Unnamed: 0,0시,13,15명이,1명이,21명입니다,23명,289명으로,417명으로,44명,5명,7명,8명,가운데,경기,광주에서도,국내,기준,누적,늘었다고,늘었습니다,대전에서도,또한,모두,발생,발생은,밝혔습니다,사례는,사망자는,사망해,서울,수도권에서,신규,오늘,유입,중앙방역대책본부는,지역별로,추가,코로나19,코로나19로,해외,확인돼,확인됐고,확진,확진됐습니다,환자,환자가,환자는
components-1,0.115642,0.115642,0.198653,0.263701,0.056029,0.056029,0.065048,0.115642,0.115642,0.198653,0.198653,0.198653,0.056029,0.198653,0.198653,0.254682,0.115642,0.065048,0.115642,0.065048,0.198653,0.065048,0.180691,0.198653,0.056029,0.115642,0.056029,0.065048,0.065048,0.198653,0.198653,0.056029,0.115642,0.056029,0.115642,0.198653,0.180691,0.115642,0.065048,0.056029,0.115642,0.198653,0.171671,0.198653,0.056029,0.115642,0.198653
components-2,0.20545,0.20545,-0.139691,-0.090793,0.014464,0.014464,0.048898,0.20545,0.20545,-0.139691,-0.139691,-0.139691,0.014464,-0.139691,-0.139691,-0.125227,0.20545,0.048898,0.20545,0.048898,-0.139691,0.048898,0.254348,-0.139691,0.014464,0.20545,0.014464,0.048898,0.048898,-0.139691,-0.139691,0.014464,0.20545,0.014464,0.20545,-0.139691,0.254348,0.20545,0.048898,0.014464,0.20545,-0.139691,0.219914,-0.139691,0.014464,0.20545,-0.139691


In [21]:
l

Unnamed: 0,components-1,components-2
"중앙방역대책본부는 오늘 0시 기준 코로나19 확진 환자가 44명 추가 확인돼 모두 13,417명으로 늘었다고 밝혔습니다.",0.515354,0.856977
"신규 확진 환자 가운데 해외 유입 사례는 23명, 국내 발생은 21명입니다.",0.972027,0.23487
"국내 발생 환자는 지역별로 서울 7명, 경기 8명 등 수도권에서 15명이 확인됐고, 광주에서도 5명, 대전에서도 1명이 확진됐습니다.",0.835306,-0.549785
또한 코로나19로 1명이 추가 사망해 누적 사망자는 모두 289명으로 늘었습니다.,0.817844,0.575441


In [22]:
s

Unnamed: 0,"중앙방역대책본부는 오늘 0시 기준 코로나19 확진 환자가 44명 추가 확인돼 모두 13,417명으로 늘었다고 밝혔습니다.","신규 확진 환자 가운데 해외 유입 사례는 23명, 국내 발생은 21명입니다.","국내 발생 환자는 지역별로 서울 7명, 경기 8명 등 수도권에서 15명이 확인됐고, 광주에서도 5명, 대전에서도 1명이 확진됐습니다.",또한 코로나19로 1명이 추가 사망해 누적 사망자는 모두 289명으로 늘었습니다.
"중앙방역대책본부는 오늘 0시 기준 코로나19 확진 환자가 44명 추가 확인돼 모두 13,417명으로 늘었다고 밝혔습니다.",1.0,0.702216,-0.040675,0.914619
"신규 확진 환자 가운데 해외 유입 사례는 23명, 국내 발생은 21명입니다.",0.702216,1.0,0.682812,0.93012
"국내 발생 환자는 지역별로 서울 7명, 경기 8명 등 수도권에서 15명이 확인됐고, 광주에서도 5명, 대전에서도 1명이 확진됐습니다.",-0.040675,0.682812,1.0,0.366781
또한 코로나19로 1명이 추가 사망해 누적 사망자는 모두 289명으로 늘었습니다.,0.914619,0.93012,0.366781,1.0
