Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,6 @@ build
.venv
.mypy_cache

target
target

.DS_Store
132 changes: 98 additions & 34 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[package]
name = "transcriber"
name = "capski"
version = "0.1.0"
edition = "2024"

Expand Down
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,15 @@ The Engineering Requirements Document (**ERD**) is available here :
brew install ffmpeg
```

### 2. Build Transcriber
You can now build the transcriber using Rust Cargo.
### 2. Install Capski
You can now install Capski using Rust Cargo.
```bash
cargo build
cargo install --path .
```

### 3. Run Transcriber
### 3. Run Capski
```bash
cargo run -- --input "example/input_audio.wav"
capski --input "example/input_audio.wav"
```

This runs the pipeline end-to-end:
Expand Down
39 changes: 39 additions & 0 deletions src/audio/extractor.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
use super::Extractor;
use anyhow::{Context, Result};
use log::info;
use std::process::Command;

pub struct FfmpegExtractor;

impl Extractor for FfmpegExtractor {
// Function to extract audio from a video file
fn extract(input: &str, output: &str) -> Result<()> {
info!("Extracting audio from {} to {}", input, output);

let status = Command::new("ffmpeg")
.args(&[
"-y",
"-i",
input,
"-vn",
"-acodec",
"pcm_s16le",
"-ar",
"16000",
"-ac",
"1",
output,
])
.status()
.context("Failed to extract audio via FFmpeg")?;

if !status.success() {
anyhow::bail!(
"ffmpeg failed to extract audio: exited with code {}",
status
);
}

Ok(())
}
}
16 changes: 16 additions & 0 deletions src/audio/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
mod extractor;
mod whisper;

pub use extractor::FfmpegExtractor;
pub use whisper::WhisperCapski;

use crate::types::Segment;
use anyhow::Result;

pub trait Capski {
fn transcribe(model_path: &str, audio_path: &str) -> Result<Vec<Segment>>;
}

pub trait Extractor {
fn extract(input: &str, output: &str) -> Result<()>;
}
39 changes: 6 additions & 33 deletions src/audio.rs → src/audio/whisper.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
use crate::types::Segment;

use super::Capski;
use anyhow::{Context, Result};
use log::info;
use std::process::Command;
use whisper_rs::{FullParams, SamplingStrategy, WhisperContext, WhisperContextParameters};

pub struct Audio;
pub struct WhisperCapski;

impl Audio {
impl Capski for WhisperCapski {
// Function to transcribe audio using the Whisper model
pub fn transcribe(model_path: &str, audio_path: &str) -> Result<Vec<Segment>> {
fn transcribe(model_path: &str, audio_path: &str) -> Result<Vec<Segment>> {
info!("Transcribing with Whisper...");

let reader = hound::WavReader::open(audio_path).expect("failed to open file");
let reader = hound::WavReader::open(audio_path)
.with_context(|| format!("failed to open audio file: {}", audio_path))?;

// Read WAV file and collect samples
let samples: Vec<i16> = reader.into_samples::<i16>().map(|x| x.unwrap()).collect();
Expand Down Expand Up @@ -91,32 +92,4 @@ impl Audio {

Ok(segments)
}

// Function to extract audio from a video file
pub fn extract(video_path: &str, audio_path: &str) -> Result<()> {
info!("Extracting audio from {} to {}", video_path, audio_path);

let status = Command::new("ffmpeg")
.args(&[
"-y",
"-i",
video_path,
"-vn",
"-acodec",
"pcm_s16le",
"-ar",
"16000",
"-ac",
"1",
audio_path,
])
.status()
.context("Failed to extract audio")?;

if !status.success() {
return Err(anyhow::anyhow!("ffmpeg failed to extract audio"));
}

Ok(())
}
}
Loading