Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Code sample for original dataset using clstm python and using opencv2 #73

Open
kendemu opened this issue Mar 30, 2016 · 0 comments
Open

Comments

@kendemu
Copy link

kendemu commented Mar 30, 2016

I suffered a lot to come to code clstm python with the original dataset using opencv2 and python. Hope this code helps other people trying to use clstm python.

import clstm
import numpy as np
import os
from scipy.ndimage import filters
import cv2

def mktarget(transcript,noutput):
    N = len(transcript)
    target = np.zeros((2*N+1,noutput),'f')
    #assert 0 not in transcript
    target[0,0] = 1
    for i,c in enumerate(transcript):
        target[2*i+1,c] = 1
        target[2*i+2,0] = 1
    return target

def decode(pred, codec, threshold = .5):
    eps = filters.gaussian_filter(pred[:,0,0],2,mode='nearest')
    loc = (np.roll(eps,-1)>eps) & (np.roll(eps,1)>eps) & (np.eps<threshold)
    classes = np.argmax(pred,axis=1)[:,0]
    codes = classes[loc]
    chars = [chr(codec[c]) for c in codes]
    return "".join(chars)    

if __name__ == "__main__":
    f = open("words.txt","r")
    lines  = f.read().split("\n")
    context_lines.pop()
    codec = list(set("".join(ans)))
    ninput = 100
    noutput = len(codec)
    print "noutput : ", noutput
    #define network and learning rate
    net = clstm.make_net_init("bidi","ninput=%d:nhidden=200:noutput=%d"%(ninput,noutput))
    net.setLearningRate(1e-4,0.9)
    iteration = 200000

    #input files data
    img_files = filter(lambda n: n.find(".bin.txt") == -1, os.listdir("dataset/"))
    img_name  = [img_files[i].replace(".png","") for i in range(len(img_files))]

    transcripts = []

    #load transcripts
    for i in range(len(img_name)):
        print "loading file", float(i)/float(len(img_name)) * 100, "percent complete"
        f = open("dataset/"+img_name[i]+".bin.txt","r")
        transcript_text = f.read()
        transcripts.append([codec.index(transcript_text[j]) for j in range(len(transcript_text))])
        f.close()

    #learning
    for i in range(iteration):
        print float(i)/float(iteration) * 100, "% complete"
        index = int(np.random.rand()*len(img_name))
        #set input
        img = cv2.imread("dataset/" + img_name[i]+".png",0)
        img_input = [list([0.0 if img[j][k] == -1 else float(img[j][k])]) for j in range(len(img)) for k in range(len(img[j]))]
        #same type as input provided in the tutorial
        xs=np.ndarray(shape=(img.shape[1], 100, 1), buffer=np.array(img_input),dtype=np.float32)
        net.inputs.aset(xs)
        #forward propagation
        net.forward()
        #prediction
        pred = net.outputs.array()
        target = mktarget(transcripts[index],noutput)
        seq = clstm.Sequence()
        seq.aset(target.reshape(-1,noutput,1))
        #align ctc
        aligned = clstm.Sequence()
        clstm.seq_ctc_align(aligned,net.outputs,seq)
        aligned = aligned.array()
        #delta val
        deltas = aligned - net.outputs.array()
        #input delta of aligned ctc and output of network
        net.d_outputs.aset(deltas)
        #backward propagation
        net.backward()
        #update network
        net.update()

    #save network
    clstm.save_net("sample.clstm",net)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant