In [1]:
import cv2, matplotlib
import numpy as np
from sklearn.cluster import KMeans
from pyclustering.cluster.xmeans import xmeans
from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer
# OCR
import io
import os

# Imports the Google Cloud client library
from google.cloud import vision

In [158]:
img_path = "006.png"
def refresh_img(img_path = img_path):
	return cv2.imread(img_path)

In [3]:
def get_gray_img(img):
	return cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

In [4]:
def get_img_size(img):
	return img.shape[:2] # height, width
get_img_size(refresh_img())

(2490, 1720)

In [5]:
def to_odd(x: int):
	return x if x % 2 ==1 else x-1

In [6]:
def display_img(mat):
   cv2.namedWindow("image", cv2.WINDOW_NORMAL)	# 可変ウインドウ「image」を開く
   cv2.imshow("image", mat)	# imageウインドウの中に受け取った画像を展開する
   cv2.waitKey(1)

In [7]:
def close_all_img():
	cv2.destroyAllWindows()
	cv2.waitKey(1)

In [8]:
def get_bw_image(img, gauss_x: int, gauss_y:int, thr:int = 250):
	gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	img_blur = cv2.GaussianBlur(gray_img, (to_odd(gauss_x),to_odd(gauss_y)), 0)
	_, img_thr = cv2.threshold(img_blur, thr, 255, cv2.THRESH_BINARY)
	return cv2.bitwise_not(img_thr)

In [9]:
def get_effective_zone(img, offset_percent = 1, thr = 250):
	gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	height, width = get_img_size(gray_img)
	img_reversed = get_bw_image(img = img, gauss_x= to_odd(max(width//100*5,1)), gauss_y=to_odd(max(height//100*5,1)),thr=thr)
	contours, _ = cv2.findContours(img_reversed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
	# get the right and left end
	left_end: int = width
	right_end: int = 0
	upper_end: int = height
	lower_end: int = 0
	for contour in contours:
		x, y, w, h = cv2.boundingRect(contour)
		left_end = min(left_end,x)
		right_end = max(right_end, x+w)
		upper_end = min(upper_end,y)
		lower_end = max(lower_end, y+h)
	return left_end, right_end, upper_end, lower_end

In [10]:
def get_frame(img, blur_rate:int =3):
	img_gray = get_gray_img(img)
	height, width = get_img_size(img_gray)
	img_blur = cv2.GaussianBlur(img_gray, (to_odd(blur_rate*width//100),to_odd(blur_rate*height//100)),3)

	avarage_color_per_row = np.average(img_blur, axis=1)
	# detect by kmeans the rows of no information
	n_clstrs = 3
	algorithm = KMeans(n_clusters = n_clstrs)
	labels = algorithm.fit_predict(avarage_color_per_row.reshape(-1, 1))
	# center of each clusters
	centers = [np.average(avarage_color_per_row[labels==i]) for i in np.arange(0,n_clstrs)]
	# this cluster has very likely to have no text
	label_for_absence = np.argmax(centers)
	# create frame for contours.
	# it should have the same size as input image
	frame = np.zeros((height,width,3),np.uint8)
	frame += 255	# make them all white
	if(np.any(labels!=label_for_absence)):
		frame[labels!=label_for_absence]=0

	# fill non-effective zone black
	left_end, right_end, _, _ = get_effective_zone(img)
	# fill horizontal end
	frame[0:height,0:left_end]=255
	frame[0:height, right_end:width]=255
	frame = cv2.bitwise_not(frame)
	_, frame = cv2.threshold(frame, 127, 255, cv2.THRESH_BINARY)
	return cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)

In [101]:
def get_contours(frame):
	contours, _ = cv2.findContours(frame, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	return np.array(contours)

In [12]:
def get_img_with_contours(img, contours, color = (0,255,0), line_width: int=3):
	return cv2.drawContours(img, contours, -1, color, line_width)

In [38]:
close_all_img()

In [39]:
def exe_xmeans(data, number_initial_centers = 2, number_max_centers = 4):
	initial_centers = kmeans_plusplus_initializer(data, number_initial_centers).initialize()
	xmeans_instance = xmeans(data=data, initial_centers=initial_centers, kmax=number_max_centers, ccore=True)
	xmeans_instance.process()
	return xmeans_instance

In [40]:
contours = get_contours(get_frame(refresh_img(),blur_rate=1))
contours_height = [np.abs(x-y)[0,1] for x,y,_,_ in contours]
contours_height = np.reshape(contours_height,(-1,1))
xmeans_instance = exe_xmeans(contours_height)
clusters = xmeans_instance.get_clusters()
predict = xmeans_instance.predict(contours_height)
classes = len(xmeans_instance._xmeans__centers)
med_classes = [np.median(contours_height[predict == i]) for i in range(0,classes)]
med_classes

[29.0, 55.0]

In [38]:
def get_contours_height(contours):
	return np.array([np.abs(x-y)[0,1] for x,y,_,_ in contours])

In [155]:
# XMeans. no longer used
clusters = xmeans_instance.get_clusters()
predict = xmeans_instance.predict(contours_height)
classes = len(xmeans_instance._xmeans__centers)
med_classes = [np.median(contours_height[predict == i]) for i in range(0,classes)]
# flatten to 1 vector for ease of calculation
contours_shape = np.shape(contours)
contours = np.reshape(contours,(-1,))
for cls in range(0,classes):
	label_match = predict == cls
	where_small = np.reshape(contours_height<med_classes[cls],(-1,))
	idx = label_match & where_small
	# flatten to 1 vector for ease of calculation
	padding = np.size(contours)//np.size(idx)
	idx_pad = [[i]*padding for i in idx]
	idx_pad = np.reshape(idx_pad, (-1,))
	contours_modified = np.reshape(contours, (-1,))[idx_pad]
	L = len(contours_modified)
	contours_modified = np.reshape(contours_modified, (L//(4*2),4,1,2))
	for i in range(0,len(contours_modified)):
		a,b,c,d = contours_modified[i]
		y_mid = (a[0,1]+b[0,1])//2
		y_len = b[0,1] - a[0,1]
		diff = med_classes[cls] - y_len
		slide = diff//2 + 1
		a[0,1] -= slide
		b[0,1] += slide
		c[0,1] += slide
		d[0,1] -= slide
	contours[idx_pad] = np.reshape(contours_modified, (-1,))
contours = np.reshape(contours, contours_shape)

NameError: name 'xmeans_instance' is not defined

In [102]:
def sort_contours(contours):
	return contours[contours[:,0,0,1].argsort(),:]

In [159]:
def expand_contours(img, contours):
	Len = len(contours)
	contours = sort_contours(contours)
	# calculate ratio of internal division by which the amount of expansion is determined
	# the ratio is based on the squared heights of adjacent contours
	contours_height = get_contours_height(contours)
	height_sq = contours_height**2
	denom = np.roll(height_sq,-1) + height_sq
	ratio_for_lower = height_sq[:-1]/denom[:-1]
	# vertical distance between neighboring contours
	dist = [(contours[i+1][0]-contours[i][1]) for i in range(0,Len-1)]
	dist = np.array(dist).flatten()[1::2].astype('int32')
	# amount of expansion
	plus_for_lower_contour = (ratio_for_lower * dist).astype('int32') # amount for the lower of edge of upper contours 
	plus_for_upper_contour = dist - plus_for_lower_contour # amount for the upper of edge of lower contours
	for i in range(0,Len):
		if(i<Len-1):
			contours[i]=__get_expanded_contour(contours[i],plus_for_upper_contour[i], expand_upper_edge=False)
			contours[i+1]=__get_expanded_contour(contours[i+1],plus_for_lower_contour[i],expand_upper_edge=True)
		# expand the free edge of the first and the last contour by the same amount
		if(i==0):
			contours[i]=__get_expanded_contour(contours[i],plus_for_upper_contour[i], expand_upper_edge=True)
		if(i==Len-2):
			contours[i+1]=__get_expanded_contour(contours[i+1],plus_for_lower_contour[i],expand_upper_edge=False)
	return contours

In [132]:
def __get_expanded_contour(contour, add, expand_upper_edge:bool=True):
	a,b,c,d = contour # upper-left, lower-left, lower-right,upper-right
	plus = [[0,add]]
	if expand_upper_edge:
		a -= plus # take effect on contours
		d -= plus
	else:
		b += plus
		c += plus
	return contour

In [160]:
img_with_contours = cv2.drawContours(refresh_img(), contours, -1, (0, 255 ,0), 1)
cv2.imwrite("saved.png",img_with_contours)

True

In [140]:
def crop_image(img, contours):
	images = []
	for contour in contours:
		x, y, w, h = cv2.boundingRect(contour)
		images.append(img[y:y+h,x:x+w])
	return images

In [141]:
def ocr_doc_image(img_bytes):
	# Instantiates a client
	client = vision.ImageAnnotatorClient()

	image = vision.Image(content=img_bytes)

	# Performs label detection on the image file
	# doc detection
	response = client.document_text_detection(
		image=image, image_context={"language_hints": ["ja"]}
	)

	# レスポンスからテキストデータを抽出
	output_text = ""
	for page in response.full_text_annotation.pages:
		for block in page.blocks:
			for paragraph in block.paragraphs:
				for word in paragraph.words:
					output_text += "".join([symbol.text for symbol in word.symbols])
	return output_text


In [143]:
path_w = "./toc.txt"
def save_text(text:str, path = path_w):
	with open(path, mode='w') as f:
		f.write(text)

In [151]:
img_crps = crop_image(refresh_img(),sort_contours(contours))
text =""
for img_crp in img_crps:
	_, num_bytes = cv2.imencode('.jpeg', img_crp)
	num_bytes = num_bytes.tobytes()
	text += ocr_doc_image(num_bytes)
	text += "\n"
save_text(text)
print(text)

1.7.3正則条件つき確率48
1.8確率変数の収束50
1.8.1概収束と確率収束50
1.8.2法則収束53
1.8.3連続写像定理55
1.8.4大数の法則と中心極限定理61
1.8.5期待値の収束64
2.線形推測論67
2.1射影行列と逆行列67
2.2カイ2乗分布72
2.3フィッシャー・コクランの定理74
2.4t分布とF分布78
2.5ガウス・マルコフモデル80
2.6仮説検定88
2.7平均の検定91
2.8重回帰分析92
2.9一元配置99
2.10二元配置･･102
3.統計的決定理論・108
3.1統計推測と統計的決定理論108
3.2十分性と完備性111
3.2.1十分統計量111
3.2.2因子分解定理･・116
3.2.3ラオ・ブラックウェルの定理120
3.2.4完備性121
3.3指数型分布族124
3.4統計的推定131

