Browse files

update gem, this makes it work with newer versions of ffmpeg. also fi…

…x the splitting algorithm - it actually works now... also no way to say how long this will be valid for use as the google api is still not public
  • Loading branch information...
1 parent 399d4e2 commit a3bce5eb5960072d8a961e76138b6840453466d7 @taf2 committed Oct 7, 2012
View
2 Gemfile.lock
@@ -1,7 +1,7 @@
GEM
remote: http://rubygems.org/
specs:
- curb (0.7.15)
+ curb (0.8.1)
json (1.5.1)
PLATFORMS
View
10 lib/speech/audio_inspector.rb
@@ -14,7 +14,7 @@ def initialize(duration_str)
def to_s
s,f = seconds.split('.')
- sprintf "%.2d:%.2d:%.2d:%.2d", self.hours.to_i, self.minutes.to_i, s.to_i, (f||0).to_i
+ sprintf "%.2d:%.2d:%.2d.%.2d", self.hours.to_i, self.minutes.to_i, s.to_i, (f||0).to_i
#"#{hours}:#{minutes}:#{seconds}:#{f}"
end
@@ -47,7 +47,13 @@ def +(b)
end
def initialize(file)
- self.duration = Duration.new(`ffmpeg -i #{file} 2>&1`.strip.scan(/Duration: (.*),/).first.first)
+ out = `ffmpeg -i #{file} 2>&1`.strip
+ if out.match(/No such file or directory/)
+ raise "No such file or directory: #{file}"
+ else
+ out = out.scan(/Duration: (.*),/)
+ self.duration = Duration.new(out.first.first)
+ end
end
end
View
26 lib/speech/audio_splitter.rb
@@ -25,13 +25,13 @@ def self.copy(splitter)
# given the original file from the splitter and the chunked file name with duration and offset run the ffmpeg command
def build
return self if self.copied
- # ffmpeg -y -i sample.audio.wav -acodec copy -vcodec copy -ss 00:00:00:00 -t 00:00:30:00 sample.audio.out.wav
- offset_ts = AudioInspector::Duration.from_seconds(self.offset)
- duration_ts = AudioInspector::Duration.from_seconds(self.duration)
+ # ffmpeg -y -i sample.audio.wav -acodec copy -vcodec copy -ss 00:00:00.00 -t 00:00:30.00 sample.audio.out.wav
+ offset_ts = AudioInspector::Duration.from_seconds(self.offset).to_s
+ duration_ts = AudioInspector::Duration.from_seconds(self.duration).to_s
# NOTE: kind of a hack, but if the original source is less than or equal to 1 second, we should skip ffmpeg
- puts "building chunk: #{duration_ts.inspect} and offset: #{offset_ts}"
+ #puts "building chunk: #{duration_ts.inspect} and offset: #{offset_ts}"
#puts "offset: #{ offset_ts.to_s }, duration: #{duration_ts.to_s}"
- cmd = "ffmpeg -y -i #{splitter.original_file} -acodec copy -vcodec copy -ss #{offset_ts} -t #{duration_ts} #{self.chunk}"# >/dev/null 2>&1"
+ cmd = "ffmpeg -y -i #{splitter.original_file} -acodec copy -vcodec copy -ss #{offset_ts} -t #{duration_ts} #{self.chunk} >/dev/null 2>&1"
if system(cmd)
self
else
@@ -41,10 +41,8 @@ def build
# convert the audio file to flac format
def to_flac
- puts "convert: #{chunk} to flac"
chunk_outputfile = chunk.gsub(/#{File.extname(chunk)}$/, ".flac")
- if system("ffmpeg -i #{chunk} -acodec flac #{chunk_outputfile}")
- puts "success?"
+ if system("ffmpeg -i #{chunk} -acodec flac #{chunk_outputfile} >/dev/null 2>&1")
self.flac_chunk = chunk.gsub(/#{File.extname(chunk)}$/, ".flac")
# convert the audio file to 16K
self.flac_rate = `ffmpeg -i #{self.flac_chunk} 2>&1`.strip.scan(/Audio: flac, (.*) Hz/).first.first.strip
@@ -73,7 +71,7 @@ def clean
end
- def initialize(file, chunk_size=30)
+ def initialize(file, chunk_size=5)
self.original_file = file
self.duration = AudioInspector.new(file).duration
self.size = chunk_size
@@ -87,15 +85,21 @@ def split
#puts "generate: #{full_chunks} chunks of #{size} seconds, last: #{last_chunk} seconds"
(full_chunks-1).times do|chunkid|
- chunks << AudioChunk.new(self, chunkid * self.size, self.size)
+ if chunkid > 0
+ chunks << AudioChunk.new(self, chunkid * self.size, self.size)
+ else
+ off = (chunkid * self.size)-(self.size/2)
+ off = 0 if off < 0
+ chunks << AudioChunk.new(self, off, self.size)
+ end
end
if chunks.empty?
chunks << AudioChunk.copy(self)#, 0, self.duration.to_f)
else
chunks << AudioChunk.new(self, chunks.last.offset.to_i + chunks.last.duration.to_i, self.size + last_chunk)
end
- puts "Chunk count: #{chunks.size}"
+ #puts "Chunk count: #{chunks.size}"
chunks
end
View
57 lib/speech/audio_to_text.rb
@@ -2,63 +2,80 @@
module Speech
class AudioToText
- attr_accessor :file, :rate, :captured_json, :captured_file
+ attr_accessor :file, :rate, :captured_json
+ attr_accessor :best_match_text, :score, :verbose, :segments
- def initialize(file)
+ def initialize(file, options={})
+ self.verbose = false
self.file = file
- self.captured_file = self.file.gsub(/\.wav$/,'.json')
self.captured_json = {}
+ self.best_match_text = ""
+ self.score = 0.0
+ self.segments = 0
+
+ self.verbose = !!options[:verbose] if options.key?(:verbose)
+ end
+
+ def to_text(max=2,lang="en-US")
+ to_json(max,lang)
+ self.best_match_text
end
- def to_text
- url = "https://www.google.com/speech-api/v1/recognize?xjerr=1&client=speech2text&lang=en-US&maxresults=10"
+ def to_json(max=2,lang="en-US")
+ self.best_match_text = ""
+ self.score = 0.0
+ self.segments = 0
+
+ url = "https://www.google.com/speech-api/v1/recognize?xjerr=1&client=speech2text&lang=#{lang}&maxresults=#{max}"
splitter = Speech::AudioSplitter.new(file) # based off the wave file because flac doesn't tell us the duration
easy = Curl::Easy.new(url)
splitter.split.each do|chunk|
chunk.build.to_flac
convert_chunk(easy, chunk)
end
- JSON.parse(File.read(self.captured_file))
- end
-
- def clean
- File.unlink self.captured_file if self.captured_file && File.exist?(self.captured_file)
+ self.best_match_text = self.best_match_text.strip
+ self.score /= self.segments
+ self.captured_json
end
protected
def convert_chunk(easy, chunk, options={})
- puts "sending chunk of size #{chunk.duration}..."
+ puts "sending chunk of size #{chunk.duration}..." if self.verbose
retrying = true
retry_count = 0
while retrying && retry_count < 3 # 3 retries
- #easy.verbose = true
+ easy.verbose = self.verbose
easy.headers['Content-Type'] = "audio/x-flac; rate=#{chunk.flac_rate}"
easy.headers['User-Agent'] = "https://github.com/taf2/speech2text"
- #puts chunk.inspect
easy.post_body = "Content=#{chunk.to_flac_bytes}"
- easy.on_progress {|dl_total, dl_now, ul_total, ul_now| printf("%.2f/%.2f\r", ul_now, ul_total); true }
+ if self.verbose
+ easy.on_progress {|dl_total, dl_now, ul_total, ul_now| printf("%.2f/%.2f\r", ul_now, ul_total); true }
+ end
easy.on_complete {|easy| puts }
easy.http_post
- #puts easy.header_str
- #puts easy.body_str
if easy.response_code == 500
- puts "500 from google retry after 0.5 seconds"
+ puts "500 from google retry after 0.5 seconds" if self.verbose
retrying = true
retry_count += 1
sleep 0.5 # wait longer on error?, google??
else
- # {"status":0,"id":"ce178ea89f8b17d8e8298c9c7814700a-1","hypotheses":[{"utterance":"I like pickles","confidence":0.92731786}]}
+ # {"status":0,"id":"ce178ea89f8b17d8e8298c9c7814700a-1","hypotheses":[{"utterance"=>"I like pickles", "confidence"=>0.59408695}, {"utterance"=>"I like turtles"}, {"utterance"=>"I like tickles"}, {"utterance"=>"I like to Kohl's"}, {"utterance"=>"I Like tickles"}, {"utterance"=>"I lyk tickles"}, {"utterance"=>"I liked to Kohl's"}]}
data = JSON.parse(easy.body_str)
self.captured_json['status'] = data['status']
self.captured_json['id'] = data['id']
self.captured_json['hypotheses'] = data['hypotheses'].map {|ut| [ut['utterance'], ut['confidence']] }
- puts self.captured_json.inspect
- File.open("#{self.captured_file}", "wb") {|f| f << captured_json.to_json }
+ if data.key?('hypotheses') && ['hypotheses'].first
+ self.best_match_text += " " + data['hypotheses'].first['utterance']
+ self.score += data['hypotheses'].first['confidence']
+ self.segments += 1
+ end
retrying = false
end
sleep 0.1 # not too fast there tiger
end
+ puts "#{segments} processed: #{self.captured_json.inspect}" if self.verbose
+ self.captured_json
ensure
chunk.clean
end
View
2 lib/speech/version.rb
@@ -1,6 +1,6 @@
# -*- encoding: binary -*-
module Speech
class Info
- VERSION='0.3.5'
+ VERSION='0.3.6'
end
end
View
6 test/audio_splitter_test.rb
@@ -6,10 +6,10 @@
class SpeechAudioSplitterTest < Test::Unit::TestCase
def test_audio_splitter
- splitter = Speech::AudioSplitter.new("samples/i-like-pickles.wav", 1)
+ splitter = Speech::AudioSplitter.new(File.expand_path(File.join(File.dirname(__FILE__),"samples/i-like-pickles.wav")), 1)
- assert_equal '00:00:03:52', splitter.duration.to_s
- assert_equal 3.52, splitter.duration.to_f
+ assert_equal '00:00:03.51', splitter.duration.to_s
+ assert_equal 3.51, splitter.duration.to_f
chunks = splitter.split
assert_equal 3, chunks.size
View
42 test/audio_to_text_test.rb
@@ -4,39 +4,19 @@
require 'speech'
class SpeechAudioToTextTest < Test::Unit::TestCase
- def test_audio_to_text
- audio = Speech::AudioToText.new("samples/i-like-pickles.wav")
- captured_json = audio.to_text
- assert captured_json
- assert captured_json.key?("hypotheses")
- assert !captured_json['hypotheses'].empty?
- assert captured_json.keys.include?('status')
- assert captured_json.keys.include?('id')
- assert captured_json.keys.include?('hypotheses')
+ def setup
+ super
+ end
- assert_equal "I like pickles", captured_json['hypotheses'].first.first
- assert captured_json['hypotheses'].first.last > 0.9
-# {"hypotheses"=>[["I like pickles", 0.92731786]]}
-# puts captured_json.inspect
- ensure
- audio.clean
+ def test_audio_to_text
+ audio = Speech::AudioToText.new(File.expand_path(File.join(File.dirname(__FILE__),"samples/i-like-pickles.wav")))
+ assert_equal "I like pickles", audio.to_text
end
- def test_short_audio_clip
- audio = Speech::AudioToText.new("samples/i-like-pickles.chunk5.wav")
- captured_json = audio.to_text
- assert captured_json
- assert captured_json.key?("hypotheses")
- assert !captured_json['hypotheses'].empty?
- #{"status"=>0, "id"=>"552de5ba35bb769ce3493ff113e158a8-1", "hypotheses"=>[["eagles", 0.7214844], ["pickles", nil], ["michaels", nil], ["giggles", nil], ["tickles", nil]]}
- assert captured_json.keys.include?('status')
- assert captured_json.keys.include?('id')
- assert captured_json.keys.include?('hypotheses')
- puts captured_json.inspect
- assert_equal "eagles", captured_json['hypotheses'][0].first
- assert_equal "pickles", captured_json['hypotheses'][1].first
- #assert captured_json['confidence'] > 0.9
- ensure
- audio.clean
+ def test_longer_audio
+ audio = Speech::AudioToText.new(File.expand_path(File.join(File.dirname(__FILE__),"/SampleAudio.wav")), :verbose => true)
+ puts audio.to_text
+ puts audio.score
+ puts audio.segments
end
end

0 comments on commit a3bce5e

Please sign in to comment.