### Imports

In [None]:
from pathlib import Path
import librosa
import torch
from transformers import (
    AutoModelForSpeechSeq2Seq,
    AutoProcessor,
    pipeline,
)
import pandas as pd

### Lecture des metadata

In [None]:
# Répertoire du script
path = Path.cwd()

# Charger les données
df_meta = pd.read_csv(path.joinpath("911/911_metadata.csv"))
df_meta = df_meta.dropna(subset=["file_name"])
df_meta.head()

Unnamed: 0,id,link,title,date,state,civilian_initiated,deaths,potential_death,false_alarm,description,file_name
0,1,https://web.archive.org/web/20150417085342/htt...,North Hollywood bank robbery,2/97,California,0.0,2.0,1.0,0.0,– The unforgettable collection of radio loggin...,call_1.wav
1,2,https://web.archive.org/web/20150417085342/htt...,Detroit Child’s 911 Call – audio,2/06,Michigan,1.0,1.0,1.0,0.0,– In Feb. 2006 5 year-old Robert Turner called...,call_2.wav
2,3,https://web.archive.org/web/20150417085342/htt...,Sept. 11th Fire Tapes,9/01,,,,,,,
3,4,https://web.archive.org/web/20150417085342/htt...,Sept. 11th Fire Tapes #2,9/01,,,,,,,
4,5,https://web.archive.org/web/20150417085342/htt...,Sept. 11th Tape Archive,9/01,,,,,,,


### Ajouter la taille du fichier dans le df

In [None]:
# Ajouter la taille du fichier dans le df
df_meta["file_size"] = df_meta["file_name"].apply(
    lambda x: Path(path.joinpath("911", x)).stat().st_size
)

File sizes added to dataframe:
      file_name   file_size  file_size_mb
0    call_1.wav  80705358.0     76.966627
1    call_2.wav   1383756.0      1.319653
2           NaN         NaN           NaN
3           NaN         NaN           NaN
4           NaN         NaN           NaN
5           NaN         NaN           NaN
6           NaN         NaN           NaN
7    call_8.wav  10273464.0      9.797539
8    call_9.wav   3215020.0      3.066082
9   call_10.wav   2262072.0      2.157280
10  call_11.wav   4191052.0      3.996899
11  call_12.wav  12643344.0     12.057632
12  call_13.wav   3081274.0      2.938532
13          NaN         NaN           NaN
14  call_15.wav  25656084.0     24.467548
15  call_16.wav   8533326.0      8.138014
16  call_17.wav   4153756.0      3.961330
17  call_18.wav  14114380.0     13.460522
18  call_19.wav   8849110.0      8.439169
19  call_20.wav   3288582.0      3.136236


In [12]:
# proportion de false alarm
df_meta["false_alarm"].value_counts(normalize=True, dropna=False)

false_alarm
0.0    0.931267
1.0    0.048518
NaN    0.020216
Name: proportion, dtype: float64

In [13]:
# Extraire les appels qui n'ont pas nan dans false alarm, et qui possède un référence fichier
df_meta = df_meta.dropna(subset=["false_alarm", "file_name"])
df_meta.shape

(704, 13)

In [14]:
# Trier le df par ordre croissant de file_size
df_meta = df_meta.sort_values("file_size", ascending=True)
df_meta.head()

Unnamed: 0,id,link,title,date,state,civilian_initiated,deaths,potential_death,false_alarm,description,file_name,file_size,file_size_mb
58,59,https://web.archive.org/web/20150417085342/htt...,#1,12/07,Nebraska,1.0,9.0,1.0,0.0,– In Dec. 2007 Robert Hawkins entered the West...,call_59.wav,316054.0,0.301413
281,282,https://web.archive.org/web/20150417085349/htt...,Murder-suicide,4/13,Michigan,1.0,3.0,1.0,0.0,– In April 2013 a woman in Newaygo County (Mic...,call_282.wav,403826.0,0.385118
298,299,https://web.archive.org/web/20150417085349/htt...,Shooting,6/13,California,1.0,0.0,1.0,0.0,– In June 2013 a Bakersfield (Calif.) man dial...,call_299.wav,734850.0,0.700808
400,401,https://web.archive.org/web/20150417085338/htt...,Dog dials 911,9/08,Arizona,1.0,0.0,1.0,0.0,"– In Sept. 2008 “Buddy,” a trained German Shep...",call_401.wav,998722.0,0.952456
738,740,https://web.archive.org/web/20150417085349/htt...,Call from murder suspect,8/10,California,1.0,0.0,1.0,0.0,– In August 2010 the suspect in the murder of ...,call_740.wav,1065678.0,1.01631


In [15]:
# Ne prendre que les 350 premiers appels
df_meta = df_meta.head(350)

In [16]:
device = "cuda" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

model_ids = [
    "openai/whisper-tiny",
    "openai/whisper-base",
    "openai/whisper-small",
    "openai/whisper-medium",
    "openai/whisper-large",
    "openai/whisper-large-v2",
    "openai/whisper-large-v3",
    "openai/whisper-large-v3-turbo",
]

In [17]:
modele = "openai/whisper-small"

for index, row in df_meta.iterrows():
    audio_file = row["file_name"]
    audio_title = row["title"]
    audio_id = row["id"]

    urgent = "urgent" if not row["false_alarm"] else "non_urgent"

    audio_path = path.joinpath(f"911/{audio_file}")

    print(f"\t\tTranscribing file {audio_file}...")

    audio_data_test, sr_test = librosa.load(audio_path, sr=16000)

    tmp_model = AutoModelForSpeechSeq2Seq.from_pretrained(
        modele, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
    ).to(device)

    tmp_processor = AutoProcessor.from_pretrained(modele)

    tmp_pipe = pipeline(
        task="automatic-speech-recognition",
        model=tmp_model,
        tokenizer=tmp_processor.tokenizer,
        feature_extractor=tmp_processor.feature_extractor,
        torch_dtype=torch_dtype,
        device=device,
        return_timestamps=True,
    )

    output = tmp_pipe(audio_data_test)

    # Écrire le fichier texte correspondant

    with open(
        f"{audio_file}-{urgent}.txt", "w"
    ) as f:  # Ajout du suffixe -fr pour indiquer la traduction
        f.write(output["text"])

		Transcribing and translating file call_59.wav...


Device set to use cpu


		Transcribing and translating file call_282.wav...


Device set to use cpu


		Transcribing and translating file call_299.wav...


Device set to use cpu


		Transcribing and translating file call_401.wav...


Device set to use cpu


		Transcribing and translating file call_740.wav...


Device set to use cpu


		Transcribing and translating file call_415.wav...


Device set to use cpu


		Transcribing and translating file call_497.wav...


Device set to use cpu


		Transcribing and translating file call_199.wav...


Device set to use cpu


		Transcribing and translating file call_2.wav...


Device set to use cpu


		Transcribing and translating file call_662.wav...


Device set to use cpu


		Transcribing and translating file call_486.wav...


Device set to use cpu


		Transcribing and translating file call_27.wav...


Device set to use cpu


		Transcribing and translating file call_167.wav...


Device set to use cpu


		Transcribing and translating file call_617.wav...


Device set to use cpu


		Transcribing and translating file call_398.wav...


Device set to use cpu


		Transcribing and translating file call_621.wav...


Device set to use cpu


		Transcribing and translating file call_569.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_334.wav...


Device set to use cpu


		Transcribing and translating file call_732.wav...


Device set to use cpu


		Transcribing and translating file call_592.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_184.wav...


Device set to use cpu


		Transcribing and translating file call_376.wav...


Device set to use cpu


		Transcribing and translating file call_494.wav...


Device set to use cpu


		Transcribing and translating file call_218.wav...


Device set to use cpu


		Transcribing and translating file call_158.wav...


Device set to use cpu


		Transcribing and translating file call_564.wav...


Device set to use cpu


		Transcribing and translating file call_411.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_397.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_312.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_10.wav...


Device set to use cpu


		Transcribing and translating file call_556.wav...


Device set to use cpu


		Transcribing and translating file call_547.wav...


Device set to use cpu


		Transcribing and translating file call_540.wav...


Device set to use cpu


		Transcribing and translating file call_527.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_503.wav...


Device set to use cpu


		Transcribing and translating file call_180.wav...


Device set to use cpu


		Transcribing and translating file call_492.wav...


Device set to use cpu


		Transcribing and translating file call_31.wav...


Device set to use cpu


		Transcribing and translating file call_332.wav...


Device set to use cpu


		Transcribing and translating file call_32.wav...


Device set to use cpu


		Transcribing and translating file call_183.wav...


Device set to use cpu


		Transcribing and translating file call_532.wav...


Device set to use cpu


		Transcribing and translating file call_204.wav...


Device set to use cpu


		Transcribing and translating file call_73.wav...


Device set to use cpu


		Transcribing and translating file call_623.wav...


Device set to use cpu


		Transcribing and translating file call_587.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_562.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_673.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_704.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_400.wav...


Device set to use cpu


		Transcribing and translating file call_586.wav...


Device set to use cpu


		Transcribing and translating file call_580.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_548.wav...


Device set to use cpu


		Transcribing and translating file call_607.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_608.wav...


Device set to use cpu


		Transcribing and translating file call_13.wav...


Device set to use cpu


		Transcribing and translating file call_480.wav...


Device set to use cpu


		Transcribing and translating file call_626.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_412.wav...


Device set to use cpu


		Transcribing and translating file call_201.wav...


Device set to use cpu


		Transcribing and translating file call_258.wav...


Device set to use cpu


		Transcribing and translating file call_9.wav...


Device set to use cpu


		Transcribing and translating file call_36.wav...


Device set to use cpu


		Transcribing and translating file call_475.wav...


Device set to use cpu


		Transcribing and translating file call_188.wav...


Device set to use cpu


		Transcribing and translating file call_115.wav...


Device set to use cpu


		Transcribing and translating file call_316.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_20.wav...


Device set to use cpu


		Transcribing and translating file call_289.wav...


Device set to use cpu


		Transcribing and translating file call_353.wav...


Device set to use cpu


		Transcribing and translating file call_319.wav...


Device set to use cpu


		Transcribing and translating file call_191.wav...


Device set to use cpu


		Transcribing and translating file call_39.wav...


Device set to use cpu


		Transcribing and translating file call_483.wav...


Device set to use cpu


		Transcribing and translating file call_189.wav...


Device set to use cpu


		Transcribing and translating file call_445.wav...


Device set to use cpu


		Transcribing and translating file call_674.wav...


Device set to use cpu


		Transcribing and translating file call_422.wav...


Device set to use cpu


		Transcribing and translating file call_232.wav...


Device set to use cpu


		Transcribing and translating file call_169.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_642.wav...


Device set to use cpu


		Transcribing and translating file call_477.wav...


Device set to use cpu


		Transcribing and translating file call_222.wav...


Device set to use cpu


		Transcribing and translating file call_137.wav...


Device set to use cpu


		Transcribing and translating file call_716.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_52.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_244.wav...


Device set to use cpu


		Transcribing and translating file call_680.wav...


Device set to use cpu


		Transcribing and translating file call_725.wav...


Device set to use cpu


		Transcribing and translating file call_209.wav...


Device set to use cpu


		Transcribing and translating file call_458.wav...


Device set to use cpu


		Transcribing and translating file call_676.wav...


Device set to use cpu


		Transcribing and translating file call_301.wav...


Device set to use cpu


		Transcribing and translating file call_709.wav...


Device set to use cpu


		Transcribing and translating file call_464.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_38.wav...


Device set to use cpu


		Transcribing and translating file call_686.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_352.wav...


Device set to use cpu


		Transcribing and translating file call_135.wav...


Device set to use cpu


		Transcribing and translating file call_687.wav...


Device set to use cpu


		Transcribing and translating file call_367.wav...


Device set to use cpu


		Transcribing and translating file call_358.wav...


Device set to use cpu


		Transcribing and translating file call_257.wav...


Device set to use cpu


		Transcribing and translating file call_17.wav...


Device set to use cpu


		Transcribing and translating file call_11.wav...


Device set to use cpu


		Transcribing and translating file call_702.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_87.wav...


Device set to use cpu


		Transcribing and translating file call_706.wav...


Device set to use cpu


		Transcribing and translating file call_565.wav...


Device set to use cpu


		Transcribing and translating file call_361.wav...


Device set to use cpu


		Transcribing and translating file call_178.wav...


Device set to use cpu


		Transcribing and translating file call_247.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_571.wav...


Device set to use cpu


		Transcribing and translating file call_485.wav...


Device set to use cpu


		Transcribing and translating file call_360.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_126.wav...


Device set to use cpu


		Transcribing and translating file call_460.wav...


Device set to use cpu


		Transcribing and translating file call_383.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_144.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_370.wav...


Device set to use cpu


		Transcribing and translating file call_414.wav...


Device set to use cpu


		Transcribing and translating file call_563.wav...


Device set to use cpu


		Transcribing and translating file call_683.wav...


Device set to use cpu


		Transcribing and translating file call_625.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_310.wav...


Device set to use cpu


		Transcribing and translating file call_138.wav...


Device set to use cpu


		Transcribing and translating file call_157.wav...


Device set to use cpu


		Transcribing and translating file call_648.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_609.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_274.wav...


Device set to use cpu


		Transcribing and translating file call_637.wav...


Device set to use cpu


		Transcribing and translating file call_66.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_731.wav...


Device set to use cpu


		Transcribing and translating file call_472.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_291.wav...


Device set to use cpu


		Transcribing and translating file call_88.wav...


Device set to use cpu


		Transcribing and translating file call_646.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_707.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_484.wav...


Device set to use cpu


		Transcribing and translating file call_294.wav...


Device set to use cpu


		Transcribing and translating file call_585.wav...


Device set to use cpu


		Transcribing and translating file call_278.wav...


Device set to use cpu


		Transcribing and translating file call_108.wav...


Device set to use cpu


		Transcribing and translating file call_649.wav...


Device set to use cpu


		Transcribing and translating file call_595.wav...


Device set to use cpu
Whisper did not predict an ending timestamp, which can happen if audio is cut off in the middle of a word. Also make sure WhisperTimeStampLogitsProcessor was used during generation.


		Transcribing and translating file call_256.wav...


Device set to use cpu


		Transcribing and translating file call_567.wav...


Device set to use cpu


		Transcribing and translating file call_741.wav...


Device set to use cpu


		Transcribing and translating file call_388.wav...


  audio_data_test, sr_test = librosa.load(audio_path, sr=16000)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


FileNotFoundError: [Errno 2] No such file or directory: 'd:\\GitHub\\LLM\\data\\911\\call_388.wav'