## ref : https://blog.naver.com/21ahn/221329975739 (Crawling by POST method)

In [1]:
from bs4 import BeautifulSoup
import json, random
import requests

## 1. Define functions

In [2]:
# function1. generate sentece
def make_sentence(dic):
    ret = []
    if not "@" in dic:
        return "not an appropraite json format"
    
    beginner_dic = dic["@"]
    w1 = word_choice(beginner_dic)
    w2 = word_choice(beginner_dic[w1])
    ret.append(w1)
    ret.append(w2)
    
    while True:
        w3 = word_choice(dic[w1][w2])
        ret.append(w3)
        if w3 in (".", "\n"):
            break
        w1, w2 = w2, w3
    ret = "".join(ret)
    ret_daum_corrected = kor_corrector_daum(ret)
    # ret_pusan_corrected = kor_corrector_pusan(ret)  # deleted : Not properly runnging
    
    r_tuple = (ret, ret_daum_corrected)
    
    return r_tuple

# function1-1. randomly choose one key from the input dictionary
def word_choice(dict1):
    keys = list(dict1.keys())
    return random.choice(keys)

# function1-2. correct grammar using the Daum grammar checker
def kor_corrector_daum(text):
    main_url = 'https://alldic.daum.net/grammar_checker.do'
    form_data = {'sentence':text}

    response = requests.post(main_url, data=form_data)
    html = response.text
    soup = BeautifulSoup(html, 'html.parser')
    
    try:
        span = soup.select_one('#resultForm > div.cont_grammar > div > a > span')
        r_text = span.get_text()[:-2]
    except Exception as e:
        r_text = f'Error - {str(e)}  |  status_code : {response.status_code}'
    
    if r_text == '오류의심':
        r_text = 'Error - Failed to get the result'
    
    return r_text

# function1-3. correct grammar using the Pusan-univ grammar checker - deleted : Not properly runnging
def kor_corrector_pusan(text):
    main_url = 'http://speller.cs.pusan.ac.kr/'
    form_data = {'text1':text}

    response = requests.post(main_url, data=form_data)
    html = response.text
    soup = BeautifulSoup(html, 'html.parser')
    
    try:
        span = soup.select_one('#tableErr_0 > tbody > tr:nth-child(2)')
        r_text = span.get_text()[3:]
    except Exception as e:
        r_text = f'Error - {str(e)}  |  status_code : {response.status_code}'
    
    return r_text

## 2. Load json data

In [3]:
dict_file = "./200205_markov-BEXX0014.json"
dic = json.load(open(dict_file))

## 3. Generate random sentences using json data

In [4]:
for i in range(3):
    s1, s2 = make_sentence(dic)
    print('original text :', s1, end='\n\n')
    print('corrected by DAUM :', s2)
    print("\n---\n")

original text : 윤도집하고환이형님하고대립이되어서판이갈라졌을때요


corrected by DAUM : 윤도 집하 고환이 형님하고 대립이 되어서 판이 갈라 졌을 때요

---

original text : 다수에따르지요.

corrected by DAUM : 다수에 따르지요.

---

original text : 신발장에는적잖은신발이들어있습니다


corrected by DAUM : Error - Failed to get the result

---

