From 66f249027fa653d25e4045c95c5178d07c3616d2 Mon Sep 17 00:00:00 2001 From: Saurabh Shrivastava Date: Thu, 8 Jun 2017 20:27:04 +0530 Subject: [PATCH] Trying to read wave file as per specs. Wave file specs : http://soundfile.sapp.org/doc/WaveFormat/ --- src/CMakeLists.txt | 2 +- src/ccaligner.cpp | 8 +- src/lib_ccaligner/read_wav_file.cpp | 238 ++++++++++++++++++++++++++++ src/lib_ccaligner/read_wav_file.h | 35 ++++ 4 files changed, 277 insertions(+), 6 deletions(-) create mode 100644 src/lib_ccaligner/read_wav_file.cpp create mode 100644 src/lib_ccaligner/read_wav_file.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 440ba82..0a1db33 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -39,7 +39,7 @@ set(SOURCE_FILES ccaligner.cpp lib_ccaligner/generate_approx_timestamp.cpp lib_ccaligner/generate_approx_timestamp.h - lib_ccaligner/voice_activity_detection.h lib_ccaligner/voice_activity_detection.cpp lib_ccaligner/common_header_files.h) + lib_ccaligner/voice_activity_detection.h lib_ccaligner/voice_activity_detection.cpp lib_ccaligner/common_header_files.h lib_ccaligner/read_wav_file.h lib_ccaligner/read_wav_file.cpp) add_executable(ccaligner ${SOURCE_FILES}) target_link_libraries(ccaligner webRTC) diff --git a/src/ccaligner.cpp b/src/ccaligner.cpp index 4cf5612..388b65e 100644 --- a/src/ccaligner.cpp +++ b/src/ccaligner.cpp @@ -1,4 +1,5 @@ #include "generate_approx_timestamp.h" +#include "read_wav_file.h" int main(int argc, char *argv[]) { @@ -10,10 +11,7 @@ int main(int argc, char *argv[]) std::string filename(argv[1]); - ApproxAligner * aligner = new ApproxAligner(filename); - aligner->align(); - - delete aligner; - + WaveFileData * file = new WaveFileData(filename); + file->openFile(); return 0; } diff --git a/src/lib_ccaligner/read_wav_file.cpp b/src/lib_ccaligner/read_wav_file.cpp new file mode 100644 index 0000000..9a33128 --- /dev/null +++ b/src/lib_ccaligner/read_wav_file.cpp @@ -0,0 +1,238 @@ +/* + * Author : Saurabh Shrivastava + * Email : saurabh.shrivastava54@gmail.com + * Link : https://github.com/saurabhshri +*/ + +#include "read_wav_file.h" + +WaveFileData::WaveFileData(std::string fileName) +{ + _fileName = fileName; + _samples.resize(0); +} + +bool WaveFileData::checkValidWave (std::vector& fileData) +{ + /*Offset Size Name Description + * 0 4 ChunkID Contains the letters "RIFF" in ASCII form + */ + std::string chunkID (fileData.begin(), fileData.begin() + 4); + if (chunkID == "RIFF") + return true; + else + return false; + +} + +bool WaveFileData::openFile () +{ + std::ifstream infile (_fileName, std::ios::binary); + + if (!infile) + { + std::cout<<"\nError opening file : "<<_fileName; + return false; + + } + + + /* + * When reading characters, std::istream_iterator skips whitespace by default + * (unless disabled with std::noskipws or equivalent).. + * http://www.enseignement.polytechnique.fr/informatique/INF478/docs/Cpp/en/cpp/iterator/istream_iterator.html#Notes + */ + + std::noskipws(infile); + std::istream_iterator begin (infile), end; + std::vector fileData (begin, end); + + if(checkValidWave(fileData)) + { + _fileData = fileData; + parse(); + return true; + } + + else + { + std::cout<<"\nInvalid WAV file!"; + return false; + } + +} + +bool WaveFileData::parse() +{ + /* Wave file format : + + Offset Size Name Description + + The canonical WAVE format starts with the RIFF header: + + 0 4 ChunkID Contains the letters "RIFF" in ASCII form + (0x52494646 big-endian form). + 4 4 ChunkSize 36 + SubChunk2Size, or more precisely: + 4 + (8 + SubChunk1Size) + (8 + SubChunk2Size) + This is the size of the rest of the chunk + following this number. This is the size of the + entire file in bytes minus 8 bytes for the + two fields not included in this count: + ChunkID and ChunkSize. + 8 4 Format Contains the letters "WAVE" + (0x57415645 big-endian form). + + The "WAVE" format consists of two subchunks: "fmt " and "data": + The "fmt " subchunk describes the sound data's format: + + 12 4 Subchunk1ID Contains the letters "fmt " + (0x666d7420 big-endian form). + 16 4 Subchunk1Size 16 for PCM. This is the size of the + rest of the Subchunk which follows this number. + 20 2 AudioFormat PCM = 1 (i.e. Linear quantization) + Values other than 1 indicate some + form of compression. + 22 2 NumChannels Mono = 1, Stereo = 2, etc. + 24 4 SampleRate 8000, 44100, etc. + 28 4 ByteRate == SampleRate * NumChannels * BitsPerSample/8 + 32 2 BlockAlign == NumChannels * BitsPerSample/8 + The number of bytes for one sample including + all channels. I wonder what happens when + this number isn't an integer? + 34 2 BitsPerSample 8 bits = 8, 16 bits = 16, etc. + 2 ExtraParamSize if PCM, then doesn't exist + X ExtraParams space for extra parameters + + The "data" subchunk contains the size of the data and the actual sound: + + 36 4 Subchunk2ID Contains the letters "data" + (0x64617461 big-endian form). + 40 4 Subchunk2Size == NumSamples * NumChannels * BitsPerSample/8 + This is the number of bytes in the data. + You can also think of this as the size + of the read of the subchunk following this + number. + 44 * Data The actual sound data. + + SOURCE : http://soundfile.sapp.org/doc/WaveFormat/ + + */ + + std::string format(_fileData.begin() + 8, _fileData.begin() + 12); + + if(format != "WAVE") + { + std::cout<<"\nInvalid Format : "<& fileData, int index) +{ + return ((fileData[index + 3] << 24) | (fileData[index + 2] << 16) | (fileData[index + 1] << 8) | fileData[index]); +} + +int WaveFileData::twoBytesToInt (std::vector& fileData, int index) +{ + return ((fileData[index + 1] << 8) | fileData[index]); +} + +/* Convert signed int (2bytes, 16 bits) in double format + * https://stackoverflow.com/a/4619787/6487831 + */ +double WaveFileData::twoBytesToDouble (int value) +{ + return (double)value / (double) 32768.0; +} diff --git a/src/lib_ccaligner/read_wav_file.h b/src/lib_ccaligner/read_wav_file.h new file mode 100644 index 0000000..fa97528 --- /dev/null +++ b/src/lib_ccaligner/read_wav_file.h @@ -0,0 +1,35 @@ +/* + * Author : Saurabh Shrivastava + * Email : saurabh.shrivastava54@gmail.com + * Link : https://github.com/saurabhshri +*/ + +#ifndef CCALIGNER_READ_WAV_FILE_H +#define CCALIGNER_READ_WAV_FILE_H + +#include +#include +#include +#include +#include + +class WaveFileData +{ + std::string _fileName; + std::vector _fileData; + std::vector> _samples; + + bool checkValidWave (std::vector& fileData); + bool parse(); + + unsigned long fourBytesToInt (std::vector& fileData, int index); + int twoBytesToInt (std::vector& fileData, int index); + double twoBytesToDouble (int sample); + +public: + WaveFileData(std::string fileName); + bool openFile(); + ~WaveFileData(); +}; + +#endif //CCALIGNER_READ_WAV_FILE_H