In [24]:
import re, json, operator, sys, urllib, requests, string
import numpy as np
from collections import Counter
from math import log10, log


# model path
problem_word_path = 'model.txt'

# linggle api url
NGRAM_API_URI = "https://{0}.linggle.com/query/"
EXP_API_URI = "https://{0}.linggle.com/example/"

# 設定最大可分析長度
max_len = 5

punc = [i for i in string.punctuation]

###################
# Linggle api 
###################
class Linggle:
	def __init__(self, ver='www'):
		self.ver = ver
	def __getitem__(self, query):
		return self.search(query)
	def search(self, query):
		query = query.replace('/', '@')
		query = urllib.parse.quote(query, safe='')
		req = requests.get(NGRAM_API_URI.format(self.ver) + query)
		results = req.json()
		return results.get("ngrams", [])
	def get_example(self, ngram_str):
		res = requests.post(EXP_API_URI.format(self.ver), json={'ngram': ngram_str})
		if res.status_code == 200:
			result = res.json()
			return result.get("examples", [])
		return []


# 開linggle api
ling = Linggle()

#####################
# Ngram probability #
#####################
def P(ngram, logN=12., MINCOUNT=40.): 
    "Probability of ngram based Web 1T using Linggle API"
    leng = float(len(ngram.split()))
    linggle_ngram = ling.search(ngram) # research times
    linggle_ngram = linggle_ngram[0][1] if len(linggle_ngram)>0 else 0
    return (log(linggle_ngram,10)-12)/pow(leng,1./2.5) if linggle_ngram>0 else (log10(MINCOUNT)-12)

##############################################
# 編輯(Insert, Delete, Replace)一步之後的結果
##############################################
def edits1(ngram, model):
#"TODO: handle possible Insert, Delete, Replace edits using data from model"
    words = ngram.split()
    store = []
    for w_idx, word in enumerate(words):
        corr_arr = channel_model(word, model)
        for corr in corr_arr:
            if corr[0] == 'I': #需insert的情況
                in_idx = corr[2]
                in_word = corr[1]
                in_pos = w_idx + in_idx #插入的位置
                if in_idx<0: #做此修正插入位置才正確
                    in_pos+=1 
                    
                if in_pos>=0 and in_pos<=len(words):
                    store.append(' '.join(words[:in_pos]+[in_word]+words[in_pos:]))

                


            elif corr[0] == 'D': #需delete的情況
                d_word = corr[1]
                d_idx = corr[2]
                d_pos = w_idx + d_idx #del的位置
                
                if d_pos>=0 and d_pos<len(words) :
                    
                    new_words = words[:] #使用deep copy
                    if new_words[d_pos] == d_word:
                        del new_words[d_pos]
                        store.append(' '.join(new_words))
                    

            else:      #需replace的情況
                br_word = corr[1]
                r_word = corr[2]
                if br_word in words:
                    br_idx = words.index(br_word)
                    new_words = words[:]
                    new_words[br_idx] = r_word
                    store.append(' '.join(new_words))
                
    return set(store)
                
##########################
# 編輯兩步之後的結果
##########################
def edits2(ngram, model): 
#"All changes that are two edits away from ngram"
	return set(e2 for e1 in edits1(ngram, model) for e2 in edits1(e1, model))

#############################
# edit 2次之後的 candidates
#############################
def candidates(ngram, model): 
	"TODO: Generate possible correction"
	return set.union({ngram}, edits1(ngram, model),edits2(ngram, model))

###############################
# 找最好的編輯
###############################
def correction(ngram, model): 
#"TODO: Return most probable grammatical error correction for ngram."
    #print(candidates(ngram, model))
    #print([P(g) for g in candidates(ngram, model)])
    return max(candidates(ngram, model), key=P)

################################
# 找出problem word有沒有在model裡
################################
def channel_model(problem_word, model):
	return model[problem_word] if(problem_word in model) else []



##############################
# 把model json檔獨進來
##############################
def read_problem_word(path):
	with open(path, 'r') as f:
		model = json.load(f)
	return model

with open('model.txt', 'r') as f:
    model = json.load(f)
    
with open('input.txt','r') as inputf, open('output.txt', 'w') as outputf:
    lines = inputf.readlines()
    
    for line in lines:
        corr_line = correction(line[:-1], model) #line[:-1]去除最後面的'\n'
        outputf.write(corr_line + '\n')
        print(corr_line)


discuss the issue
listen to the music
study at university
stay at home
search for more information
