Permalink
Browse files

initial commit

  • Loading branch information...
0 parents commit 250c5378a869c8e545f78b34f783dc72e2ac2db5 @taf2 committed Mar 25, 2011
4 Gemfile
@@ -0,0 +1,4 @@
+source :rubygems
+
+gem 'curb'
+gem 'json'
12 Gemfile.lock
@@ -0,0 +1,12 @@
+GEM
+ remote: http://rubygems.org/
+ specs:
+ curb (0.7.15)
+ json (1.5.1)
+
+PLATFORMS
+ ruby
+
+DEPENDENCIES
+ curb
+ json
10 README
@@ -0,0 +1,10 @@
+== Speech2Text
+
+Using the power of ffmpeg/flac/Google and ruby here is a simple interface to play with to convert speech to text.
+
+At this point the API from Google is not documented and seemly free.
+The Google API will frequently return 500 errors without providing much reason as to why.
+
+It's possible that Google will decide to not open this API up and this effort my completely be for not...
+
+This was all made possible in short order all thanks to Chrome 11 and http://mikepultz.com/2011/03/accessing-google-speech-api-chrome-11/
2 bin/speech2text
@@ -0,0 +1,2 @@
+#!/this/will/be/replaced/by/rubygems
+# -*- encoding: binary -*-
8 lib/speech.rb
@@ -0,0 +1,8 @@
+require 'curb'
+require 'json'
+
+module Speech; end
+
+require 'speech/audio_inspector'
+require 'speech/audio_splitter'
+require 'speech/audio_to_text'
42 lib/speech/audio_inspector.rb
@@ -0,0 +1,42 @@
+module Speech
+
+ class AudioInspector
+ attr_accessor :duration
+
+ class Duration
+ attr_accessor :hours, :minutes, :seconds, :total_seconds
+
+ def initialize(duration_str)
+ self.hours, self.minutes, self.seconds = duration_str.split(':')
+ self.total_seconds = (self.hours.to_i * 3600) + (self.minutes.to_i * 60) + self.seconds.to_f
+ end
+
+ def to_s
+ s,f = seconds.split('.')
+ sprintf "%.2d:%.2d:%.2d:%.2d", self.hours, self.minutes, s, (f||0)
+ #"#{hours}:#{minutes}:#{seconds}:#{f}"
+ end
+
+ def to_f
+ self.total_seconds
+ end
+
+ def self.from_seconds(seconds)
+ duration = Duration.new("00:00:00.00")
+ duration.hours = (seconds.to_i / 3600).to_i
+ duration.minutes = (seconds / 60).to_i
+ duration.seconds = (seconds - (duration.minutes*60) - (duration.hours*3600)).to_s
+ duration.hours = duration.hours.to_s
+ duration.minutes = duration.minutes.to_s
+
+ duration
+ end
+
+ end
+
+ def initialize(file)
+ self.duration = Duration.new(`ffmpeg -i #{file} 2>&1`.strip.scan(/Duration: (.*),/).first.first)
+ end
+
+ end
+end
86 lib/speech/audio_splitter.rb
@@ -0,0 +1,86 @@
+module Speech
+
+ class AudioSplitter
+ attr_accessor :original_file, :size, :duration, :chunks
+
+ class AudioChunk
+ attr_accessor :splitter, :chunk, :flac_chunk, :offset, :duration, :flac_rate
+
+ def initialize(splitter, offset, duration)
+ self.offset = offset
+ self.chunk = "chunk-" + splitter.original_file.gsub(/\.(.*)$/, "-#{offset}" + '.\1')
+ self.duration = duration
+ self.splitter = splitter
+ end
+
+ # given the original file from the splitter and the chunked file name with duration and offset run the ffmpeg command
+ def build
+ # ffmpeg -y -i sample.audio.wav -acodec copy -vcodec copy -ss 00:00:00:00 -t 00:00:30:00 sample.audio.out.wav
+ offset_ts = AudioInspector::Duration.from_seconds(self.offset)
+ duration_ts = AudioInspector::Duration.from_seconds(self.duration)
+ #puts "offset: #{ offset_ts.to_s }, duration: #{duration_ts.to_s}"
+ cmd = "ffmpeg -y -i #{splitter.original_file} -acodec copy -vcodec copy -ss #{offset_ts} -t #{duration_ts} #{self.chunk} >/dev/null 2>&1"
+ if system(cmd)
+ self
+ else
+ raise "Failed to generate chunk at offset: #{offset_ts}, duration: #{duration_ts}\n#{cmd}"
+ end
+ end
+
+ # convert the audio file to flac format
+ def to_flac
+ if system("flac #{chunk} >/dev/null 2>&1")
+ self.flac_chunk = chunk.gsub(File.extname(chunk), ".flac")
+ # convert the audio file to 16K
+ self.flac_rate = `ffmpeg -i #{self.flac_chunk} 2>&1`.strip.scan(/Audio: flac, (.*) Hz/).first.first.strip
+ down_sampled = self.flac_chunk.gsub(/\.flac$/, '-sampled.flac')
+ if system("ffmpeg -i #{self.flac_chunk} -ar 16000 -y #{down_sampled} >/dev/null 2>&1")
+ system("mv #{down_sampled} #{self.flac_chunk} 2>&1 >/dev/null")
+ self.flac_rate = 16000
+ else
+ raise "failed to convert to lower audio rate"
+ end
+
+ end
+ end
+
+ def to_flac_bytes
+ File.read(self.flac_chunk)
+ end
+
+ # delete the chunk file
+ def clean
+ File.unlink self.chunk if File.exist?(self.chunk)
+ File.unlink self.flac_chunk if self.flac_chunk && File.exist?(self.flac_chunk)
+ end
+
+ end
+
+ def initialize(file, chunk_size=30)
+ self.original_file = file
+ self.duration = AudioInspector.new(file).duration
+ self.size = chunk_size
+ self.chunks = []
+ end
+
+ def split
+ # compute the total number of chunks
+ full_chunks = (self.duration.to_f / size).to_i
+ last_chunk = ((self.duration.to_f % size) * 100).round / 100.0
+ #puts "generate: #{full_chunks} chunks of #{size} seconds, last: #{last_chunk} seconds"
+
+ (full_chunks-1).times do|chunkid|
+ chunks << AudioChunk.new(self, chunkid * self.size, self.size)
+ end
+
+ if chunks.empty?
+ chunks << AudioChunk.new(self, 0, self.duration.to_f)
+ else
+ chunks << AudioChunk.new(self, chunks.last.offset.to_i + chunks.last.duration.to_i, self.size + last_chunk)
+ end
+
+ chunks
+ end
+
+ end
+end
68 lib/speech/audio_to_text.rb
@@ -0,0 +1,68 @@
+module Speech
+
+ class AudioToText
+ attr_accessor :file, :rate, :captured_json, :confidence, :captured_file
+
+ def initialize(file)
+ self.file = file
+ self.captured_file = self.file.gsub(/\.wav$/,'.json')
+ self.captured_json = []
+ self.confidence = 0.0
+ end
+
+ def to_text
+ url = "https://www.google.com/speech-api/v1/recognize?xjerr=1&client=speech2text&lang=en-US"
+ splitter = Speech::AudioSplitter.new(file) # based off the wave file because flac doesn't tell us the duration
+ easy = Curl::Easy.new(url)
+ splitter.split.each do|chunk|
+ chunk.build.to_flac
+ convert_chunk(easy, chunk)
+ end
+ JSON.parse(File.read(self.captured_file))
+ end
+
+ protected
+
+ def convert_chunk(easy, chunk, options={})
+ puts "sending chunk of size #{chunk.duration}..."
+ retrying = true
+ while retrying
+ #easy.verbose = true
+ easy.headers['Content-Type'] = "audio/x-flac; rate=#{chunk.flac_rate}"
+ easy.post_body = "Content=#{chunk.to_flac_bytes}"
+ easy.on_progress {|dl_total, dl_now, ul_total, ul_now| printf("%.2f/%.2f\r", ul_now, ul_total); true }
+ easy.on_complete {|easy| puts }
+ easy.http_post
+ #puts easy.header_str
+ #puts easy.body_str
+ if easy.response_code == 500
+ puts "500 from google retry after 0.5 seconds"
+ retrying = true
+ sleep 0.5 # wait longer on error?
+ else
+ # {"status":0,"id":"ce178ea89f8b17d8e8298c9c7814700a-1","hypotheses":[{"utterance":"I like pickles","confidence":0.92731786}]}
+ data = JSON.parse(easy.body_str)
+ data['hypotheses'].each {|utterance|
+ self.captured_json << [utterance['utterance'], utterance['confidence']]
+ self.confidence += utterance['confidence']
+ }
+ File.open("#{self.captured_file}", "wb") {|f|
+ size = self.captured_json.size
+ if size > 0
+ confidence_calc = self.confidence / size
+ else
+ confidence_calc = 0
+ end
+ f << {:captured_json => captured_json, :confidence => confidence_calc}.to_json
+ }
+ retrying = false
+ end
+ sleep 0.1 # not too fast there tiger
+ end
+ ensure
+ chunk.clean
+ end
+
+ end
+
+end
11 lib/speech/text.rb
@@ -0,0 +1,11 @@
+module Speech
+ class Text
+
+ def initialize(audio_file, options={})
+ end
+
+ def decode_audio(flac16k_audio)
+ end
+
+ end
+end
5 lib/speech/version.rb
@@ -0,0 +1,5 @@
+module Speech
+ class Info
+ VERSION='0.01'
+ end
+end
BIN test/SampleAudio.wav
Binary file not shown.
8 test/audio_inspector_test.rb
@@ -0,0 +1,8 @@
+require 'test/unit'
+$:.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
+require 'speech'
+
+class SpeechAudioInspectorTest < Test::Unit::TestCase
+ def test_audio_inspector
+ end
+end
25 test/audio_splitter_test.rb
@@ -0,0 +1,25 @@
+require 'test/unit'
+$:.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
+require 'speech'
+
+class SpeechAudioSplitterTest < Test::Unit::TestCase
+
+ def test_audio_splitter
+ splitter = Speech::AudioSplitter.new("i-like-pickles.wav", 1)
+
+ assert_equal '00:00:03:52', splitter.duration.to_s
+ assert_equal 3.52, splitter.duration.to_f
+
+ chunks = splitter.split
+ assert_equal 3, chunks.size
+ chunks.each do|chunk|
+ chunk.build.to_flac
+ assert File.exist? chunk.chunk
+ assert File.exist? chunk.flac_chunk
+ chunk.clean
+ assert !File.exist?(chunk.chunk)
+ assert !File.exist?(chunk.flac_chunk)
+ end
+ end
+
+end
BIN test/i-like-pickles.wav
Binary file not shown.
BIN test/samples/i-like-pickles.wav
Binary file not shown.

0 comments on commit 250c537

Please sign in to comment.