-
Notifications
You must be signed in to change notification settings - Fork 202
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Feature Request] Text recognition for images #220
Comments
Sounds great! |
import SwiftUI
import PlaygroundSupport
import Vision
let defImage = UIImage(#imageLiteral(resourceName: "照片.png"))
struct TextGroup: Identifiable {
init(items: [TextPos]) {
self.items = items;
self.text = items.compactMap { v in
return v.text
}.joined(separator: " ");
}
var id: UUID = UUID()
var items: [TextPos];
var text: String;
var rect: CGRect {
get {
var minX = items.first!.topLeft.x;
var maxX = items.first!.topLeft.x;
var minY = items.first!.topLeft.y;
var maxY = items.first!.topLeft.y;
items.forEach { item in
item.polygon.forEach { point in
minX = min(minX, point.x);
maxX = max(maxX, point.x);
minY = min(minY, point.y);
maxY = max(maxY, point.y);
}
}
return CGRect.init(x: minX, y: minY, width: maxX - minX, height: maxY - minY);
}
}
}
struct TextPos: Identifiable {
var id: UUID = UUID()
var text: String;
var topLeft: CGPoint
var topRight: CGPoint
var bottomLeft: CGPoint
var bottomRight: CGPoint
var polygon: [CGPoint] {
get {
return [topLeft, topRight, bottomRight, bottomLeft]
}
}
var leftHeight: Double {
get {
return abs(sqrt(pow(topLeft.x - bottomLeft.x, 2) + pow(topLeft.y - bottomLeft.y, 2)));
}
}
var radian: Double {
get {
let cx = topLeft.x;
let cy = topLeft.y;
let x1 = topRight.x;
let y1 = topRight.y;
let radian = atan2(y1 - cy, x1 - cx);
return radian
}
}
var angle: Double {
get {
return 180.0 / Double.pi * radian;
}
}
}
func hypotenuse(long: Double, angle: Double) -> CGPoint{
var radian = 2 * Double.pi / 360 * angle;
return CGPoint(x: sin(radian) * long, y: cos(radian) * long);
}
func polygonsIntersecting(a: [CGPoint], b: [CGPoint]) -> Bool {
for points in [a, b] {
for i1 in 0..<points.count {
let i2 = (i1 + 1) % points.count
let p1 = points[i1]
let p2 = points[i2]
let normal = CGPoint(x: p2.y - p1.y, y: p1.x - p2.x);
var minA: Double?;
var maxA: Double?;
a.forEach { p in
let projected = normal.x * p.x + normal.y * p.y
if(minA == nil || projected < minA!) {
minA = projected
}
if(maxA == nil || projected > maxA!) {
maxA = projected
}
}
var minB: Double?;
var maxB: Double?;
b.forEach { p in
let projected = normal.x * p.x + normal.y * p.y
if(minB == nil || projected < minA!) {
minB = projected
}
if(maxB == nil || projected > maxA!) {
maxB = projected
}
}
if(maxA! < minB! || maxB! < minA!) {
return false;
}
}
}
return true;
}
struct ContentView: View {
@State var showImagePicker: Bool = false
@State var image: UIImage? = nil
let frameW = 300.0
let frameH = 450.0
@State var data: [TextPos] = []
@State var textGroupList: [TextGroup] = []
func visionText() {
// Get the CGImage on which to perform requests.
guard let cgImage = (image ?? defImage).cgImage else { return }
// Create a new image-request handler.
let requestHandler = VNImageRequestHandler(cgImage: cgImage)
// Create a new request to recognize text.
let request = VNRecognizeTextRequest(completionHandler: recognizeTextHandler)
do {
// Perform the text-recognition request.
try requestHandler.perform([request])
} catch {
print("Unable to perform the requests: \(error).")
}
}
func recognizeTextHandler(request: VNRequest, error: Error?) {
guard let observations =
request.results as? [VNRecognizedTextObservation] else {
return
}
data = observations.compactMap({ observation in
return TextPos(
text: observation.topCandidates(1)[0].string,
topLeft: observation.topLeft,
topRight: observation.topRight,
bottomLeft: observation.bottomLeft,
bottomRight: observation.bottomRight
);
})
for index in 0..<data.count {
let item = data[index]
let angle = 360 - item.angle;
let tl = item.topLeft;
let ptl = hypotenuse(long: item.leftHeight/2, angle: angle);
data[index].topLeft = CGPoint(x:tl.x + ptl.x, y: tl.y + ptl.y);
let tr = item.topRight;
let ptr = hypotenuse(long: item.leftHeight/2, angle: angle);
data[index].topRight = CGPoint(x:tr.x + ptr.x, y: tr.y + ptr.y);
let bl = item.bottomLeft;
let pbl = hypotenuse(long: item.leftHeight/2, angle: angle + 180);
data[index].bottomLeft = CGPoint(x:bl.x + pbl.x, y: bl.y + pbl.y);
let br = item.bottomRight;
let pbr = hypotenuse(long: item.leftHeight/2, angle: angle + 180);
data[index].bottomRight = CGPoint(x:br.x + pbr.x, y: br.y + pbr.y);
}
var groupData: [[TextPos]] = []
data.forEach { newItem in
let groupIndex = groupData.firstIndex { items in
return nil != items.first { item in
let angleOk = abs(item.angle - newItem.angle) < 5
let heightOk = abs(item.leftHeight - newItem.leftHeight) < (min(item.leftHeight, newItem.leftHeight) / 2)
if( angleOk && heightOk) {
return polygonsIntersecting(a: item.polygon, b: newItem.polygon)
}
return false
}
}
if(groupIndex != nil) {
groupData[groupIndex!].append(newItem);
} else {
groupData.append([newItem])
}
}
textGroupList = groupData.compactMap({ items in
return TextGroup(items: items)
})
}
var body: some View {
VStack {
ZStack{
Image(uiImage: image ?? defImage)
.resizable()
.frame(width: frameW, height: frameH)
ForEach(textGroupList){ textGroup in
ForEach(textGroup.items) { item in
Path { path in
path.move(to: CGPoint(x: item.topLeft.x * frameW, y: frameH - item.topLeft.y * frameH))
path.addLine(to: CGPoint(x: item.topRight.x * frameW, y: frameH - item.topRight.y * frameH))
path.addLine(to: CGPoint(x: item.bottomRight.x * frameW, y: frameH - item.bottomRight.y * frameH))
path.addLine(to: CGPoint(x: item.bottomLeft.x * frameW, y: frameH - item.bottomLeft.y * frameH))
path.addLine(to: CGPoint(x: item.topLeft.x * frameW, y: frameH - item.topLeft.y * frameH))
}.fill(Color.green.opacity(0.5)).onTapGesture {
print(textGroup.text)
}
}
}
}
.frame(width: frameW, height: frameH)
Button("选择图片") {
showImagePicker = true
}
}
.sheet(isPresented: $showImagePicker) {
ImagePicker(sourceType: .photoLibrary) { image in
self.image = image
showImagePicker = false
visionText()
}
}
.onAppear(perform: visionText)
}
}
struct ImagePicker: UIViewControllerRepresentable {
@Environment(\.presentationMode)
private var presentationMode
let sourceType: UIImagePickerController.SourceType
let onImagePicked: (UIImage) -> Void
final class Coordinator: NSObject,
UINavigationControllerDelegate,
UIImagePickerControllerDelegate {
@Binding
private var presentationMode: PresentationMode
private let sourceType: UIImagePickerController.SourceType
private let onImagePicked: (UIImage) -> Void
init(presentationMode: Binding<PresentationMode>,
sourceType: UIImagePickerController.SourceType,
onImagePicked: @escaping (UIImage) -> Void) {
_presentationMode = presentationMode
self.sourceType = sourceType
self.onImagePicked = onImagePicked
}
func imagePickerController(_ picker: UIImagePickerController,
didFinishPickingMediaWithInfo info: [UIImagePickerController.InfoKey : Any]) {
let uiImage = info[UIImagePickerController.InfoKey.originalImage] as! UIImage
onImagePicked(uiImage)
presentationMode.dismiss()
}
func imagePickerControllerDidCancel(_ picker: UIImagePickerController) {
presentationMode.dismiss()
}
}
func makeCoordinator() -> Coordinator {
return Coordinator(presentationMode: presentationMode,
sourceType: sourceType,
onImagePicked: onImagePicked)
}
func makeUIViewController(context: UIViewControllerRepresentableContext<ImagePicker>) -> UIImagePickerController {
let picker = UIImagePickerController()
picker.sourceType = sourceType
picker.delegate = context.coordinator
return picker
}
func updateUIViewController(_ uiViewController: UIImagePickerController,
context: UIViewControllerRepresentableContext<ImagePicker>) {
}
}
PlaygroundPage.current.setLiveView(ContentView()) |
@xioxin Thanks! You have done most of it! Would you mind to remove |
@caxerx
|
@tatsuz0u 我再完善一下代码并增加一些注释。 |
最新的代码 https://gist.github.com/xioxin/5c3d3c77721784fb690be90bc56f07a8 |
我只是想加 credits 好鼓勵更多貢獻者出現...如果可以的話隨便丟進專案下面一個地方就可以了。 |
@xioxin That's a bad news :( |
Done. #227 |
Is your feature request related to a problem? Please describe.
I'm always frustrated when some of my favorite doujinshi only have Russian version. However, I can't even recognize any of the Russian characters. It will be nice if I am able to copy the text in the image and translate them.
Describe the solution you'd like
Adding an options to enable text recognition for the selected image. The Apple Vision framework might help for the image text recognize implementation.
Describe alternatives you've considered
Currently, there is the copy image option that we might copy the image to other 3rd party text recognition service. However, this method is super inconvenience when reading some continuous contents.
Additional context
Telegram has a similar feature that you can reference to:
The text was updated successfully, but these errors were encountered: