[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://drive.google.com/file/d/1kQH1dEgUOTRvgJr75waOas6pSqmIfEIy/view?usp=sharing)

# For Full data

## For Clean

In [None]:
from datasets import load_dataset, load_metric
from transformers import Speech2TextForConditionalGeneration, Speech2TextProcessor
import soundfile as sf

librispeech_eval = load_dataset("librispeech_asr", "clean", split="test")  # change to "other" for other test dataset
wer = load_metric("wer")

Reusing dataset librispeech_asr (C:\Users\phvpa\.cache\huggingface\datasets\librispeech_asr\clean\2.1.0\f58a17d82b157feddbc407342a657fee7389b87c33d8331312edf7a604392896)


In [None]:
model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-medium-librispeech-asr").to("cuda")
processor = Speech2TextProcessor.from_pretrained("facebook/s2t-medium-librispeech-asr", do_upper_case=True)

In [None]:
def map_to_array(batch):
    speech, _ = sf.read(batch["file"])
    batch["speech"] = speech
    return batch


def map_to_pred(batch):
    features = processor(batch["speech"], sampling_rate=16000, padding=True, return_tensors="pt")
    input_features = features.input_features.to("cuda")
    attention_mask = features.attention_mask.to("cuda")

    gen_tokens = model.generate(input_ids=input_features, attention_mask=attention_mask)
    batch["transcription"] = processor.batch_decode(gen_tokens, skip_special_tokens=True)
    return batch

In [None]:
librispeech_eval = librispeech_eval.map(map_to_array)
result = librispeech_eval.map(map_to_pred, batched=True, batch_size=8, remove_columns=["speech"])

  0%|          | 0/2620 [00:00<?, ?ex/s]

  0%|          | 0/328 [00:00<?, ?ba/s]

  fft = torch.rfft(strided_input, 1, normalized=False, onesided=True)


In [None]:
#wer?

In [None]:
print("WER:", wer.compute(predictions=result["transcription"], references=result["text"]))

WER: 0.03528225806451613


In [None]:
from jiwer import wer

In [None]:
NoLMwer_list = []
for i in range(len(result["transcription"])):
  wer_temp2 = wer(result["transcription"][i],result["text"][i])
  NoLMwer_list.append(wer_temp2)

In [None]:
Avg_wer = sum(NoLMwer_list) / len(NoLMwer_list)
Avg_wer

0.03682147017003704

## For Others

In [None]:
from datasets import load_dataset, load_metric
from transformers import Speech2TextForConditionalGeneration, Speech2TextProcessor
import soundfile as sf

librispeech_eval = load_dataset("librispeech_asr", "other", split="test")  # change to "other" for other test dataset
wer = load_metric("wer")

Reusing dataset librispeech_asr (C:\Users\phvpa\.cache\huggingface\datasets\librispeech_asr\other\2.1.0\f58a17d82b157feddbc407342a657fee7389b87c33d8331312edf7a604392896)


In [None]:
model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-medium-librispeech-asr").to("cuda")
processor = Speech2TextProcessor.from_pretrained("facebook/s2t-medium-librispeech-asr", do_upper_case=True)

In [None]:
def map_to_array(batch):
    speech, _ = sf.read(batch["file"])
    batch["speech"] = speech
    return batch


def map_to_pred(batch):
    features = processor(batch["speech"], sampling_rate=16000, padding=True, return_tensors="pt")
    input_features = features.input_features.to("cuda")
    attention_mask = features.attention_mask.to("cuda")

    gen_tokens = model.generate(input_ids=input_features, attention_mask=attention_mask)
    batch["transcription"] = processor.batch_decode(gen_tokens, skip_special_tokens=True)
    return batch

In [None]:
librispeech_eval = librispeech_eval.map(map_to_array)
result = librispeech_eval.map(map_to_pred, batched=True, batch_size=8, remove_columns=["speech"])

  0%|          | 0/2939 [00:00<?, ?ex/s]

  0%|          | 0/368 [00:00<?, ?ba/s]

  fft = torch.rfft(strided_input, 1, normalized=False, onesided=True)


In [None]:
print("WER:", wer.compute(predictions=result["transcription"], references=result["text"]))

WER: 0.07831037579045909


In [None]:
from jiwer import wer

In [None]:
NoLMwer_list = []
for i in range(len(result["transcription"])):
  wer_temp2 = wer(result["transcription"][i],result["text"][i])
  NoLMwer_list.append(wer_temp2)

In [None]:
Avg_wer = sum(NoLMwer_list) / len(NoLMwer_list)
Avg_wer

0.09109444526012021

# For 10 Samples

## For Clean

In [None]:
import torch
from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration , Speech2TextModel
from datasets import load_dataset
import soundfile as sf



In [None]:
from datasets import load_dataset, load_metric

librispeech_eval = load_dataset("librispeech_asr", "clean", split="test")  # change to "other" for other test dataset

Reusing dataset librispeech_asr (C:\Users\phvpa\.cache\huggingface\datasets\librispeech_asr\clean\2.1.0\f58a17d82b157feddbc407342a657fee7389b87c33d8331312edf7a604392896)


In [None]:
model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-medium-librispeech-asr").to("cuda")
processor = Speech2TextProcessor.from_pretrained("facebook/s2t-medium-librispeech-asr", do_upper_case=True)

In [None]:
def map_to_array(batch):
    speech, _ = sf.read(batch["file"])
    batch["speech"] = speech
    return batch

In [None]:
librispeech_eval = librispeech_eval.map(map_to_array)

Loading cached processed dataset at C:\Users\phvpa\.cache\huggingface\datasets\librispeech_asr\clean\2.1.0\f58a17d82b157feddbc407342a657fee7389b87c33d8331312edf7a604392896\cache-80cdf746232595e2.arrow


In [None]:
def map_to_pred(batch, batched=True, batch_size=1, remove_columns=["speech"]):
    features = processor(batch["speech"], sampling_rate=16000, padding=True, return_tensors="pt")
    input_features = features.input_features.to("cuda")
    attention_mask = features.attention_mask.to("cuda")

    gen_tokens = model.generate(input_ids=input_features, attention_mask=attention_mask)
    batch["transcription"] = processor.batch_decode(gen_tokens, skip_special_tokens=True)
    return batch

In [None]:
from jiwer import wer

In [None]:
wer_list = []
for i in range(10):
    temp = map_to_pred(librispeech_eval[i])
    wer_temp = wer(temp["text"],temp["transcription"][0])
    wer_list.append(wer_temp)

  fft = torch.rfft(strided_input, 1, normalized=False, onesided=True)


In [None]:
wer_list

[0.07142857142857142,
 0.25,
 0.0,
 0.0,
 0.09090909090909091,
 0.0,
 0.0,
 0.0,
 0.06666666666666667,
 0.0]

In [None]:
Avg_wer = sum(wer_list) / len(wer_list)
Avg_wer

0.0479004329004329

## for other

In [None]:
import torch
from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration , Speech2TextModel
from datasets import load_dataset
import soundfile as sf



In [None]:
from datasets import load_dataset, load_metric

librispeech_eval = load_dataset("librispeech_asr", "other", split="test")  # change to "other" for other test dataset

Reusing dataset librispeech_asr (C:\Users\phvpa\.cache\huggingface\datasets\librispeech_asr\other\2.1.0\f58a17d82b157feddbc407342a657fee7389b87c33d8331312edf7a604392896)


In [None]:
model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-medium-librispeech-asr").to("cuda")
processor = Speech2TextProcessor.from_pretrained("facebook/s2t-medium-librispeech-asr", do_upper_case=True)

In [None]:
def map_to_array(batch):
    speech, _ = sf.read(batch["file"])
    batch["speech"] = speech
    return batch

In [None]:
librispeech_eval = librispeech_eval.map(map_to_array)

Loading cached processed dataset at C:\Users\phvpa\.cache\huggingface\datasets\librispeech_asr\other\2.1.0\f58a17d82b157feddbc407342a657fee7389b87c33d8331312edf7a604392896\cache-5ada55e1bc9f7ab3.arrow


In [None]:
def map_to_pred(batch, batched=True, batch_size=1, remove_columns=["speech"]):
    features = processor(batch["speech"], sampling_rate=16000, padding=True, return_tensors="pt")
    input_features = features.input_features.to("cuda")
    attention_mask = features.attention_mask.to("cuda")

    gen_tokens = model.generate(input_ids=input_features, attention_mask=attention_mask)
    batch["transcription"] = processor.batch_decode(gen_tokens, skip_special_tokens=True)
    return batch

In [None]:
from jiwer import wer

In [None]:
wer_list = []
for i in range(10):
    temp = map_to_pred(librispeech_eval[i])
    wer_temp = wer(temp["text"],temp["transcription"][0])
    wer_list.append(wer_temp)

  fft = torch.rfft(strided_input, 1, normalized=False, onesided=True)


In [None]:
Avg_wer = sum(wer_list) / len(wer_list)
Avg_wer

0.053715034965034957

# For Converting the Clean flac files to .wav and storing information in .csv

In [None]:
import pandas as pd
FullData = pd.DataFrame(librispeech_eval)

In [None]:
FullData

Unnamed: 0,file,audio,text,speaker_id,chapter_id,id
0,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,HE HOPED THERE WOULD BE STEW FOR DINNER TURNIP...,1089,134686,1089-134686-0000
1,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,STUFF IT INTO YOU HIS BELLY COUNSELLED HIM,1089,134686,1089-134686-0001
2,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD L...,1089,134686,1089-134686-0002
3,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,HELLO BERTIE ANY GOOD IN YOUR MIND,1089,134686,1089-134686-0003
4,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD ...,1089,134686,1089-134686-0004
...,...,...,...,...,...,...
2615,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,OH TO SHOOT MY SOUL'S FULL MEANING INTO FUTURE...,908,31957,908-31957-0021
2616,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,THEN I LONG TRIED BY NATURAL ILLS RECEIVED THE...,908,31957,908-31957-0022
2617,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,I LOVE THEE FREELY AS MEN STRIVE FOR RIGHT I L...,908,31957,908-31957-0023
2618,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,I LOVE THEE WITH THE PASSION PUT TO USE IN MY ...,908,31957,908-31957-0024


In [None]:
FullData.to_csv("C:/Users/phvpa/Desktop/Speech_FinalProject/14-12-2021/WavFiles/DataFrame/FullData.csv")

In [None]:
Tempu = pd.read_csv("C:/Users/phvpa/Desktop/Speech_FinalProject/14-12-2021/WavFiles/DataFrame/FullData.csv")

In [None]:
Tempu['file'][0]

'C:\\Users\\phvpa\\.cache\\huggingface\\datasets\\downloads\\extracted\\25864abeda6abd17a174980d34d41dba22fc4bb8b375739eb56badf206ffe112\\LibriSpeech\\test-clean\\1089\\134686\\1089-134686-0000.flac'

In [None]:
from pathlib import PurePath

In [None]:
import torchaudio

In [None]:
FullData.shape[0]

2620

In [None]:
file_path = PurePath(Tempu['file'][0])

In [None]:
waveform, sample_rate = torchaudio.load(file_path)
torchaudio.save(r"C:\Users\phvpa\Desktop\Speech_FinalProject\14-12-2021\WavFiles\file_0"+str(0)+".wav", waveform, sample_rate)

In [None]:
waveform0, sample_rate0 = torchaudio.load(r"C:\Users\phvpa\Desktop\Speech_FinalProject\14-12-2021\WavFiles\file_00.wav")
sample_rate0

16000

In [None]:
for i in range(FullData.shape[0]):
    file_path = PurePath(Tempu['file'][i])
    waveform, sample_rate = torchaudio.load(file_path)  # load tensor from file, as usual
    torchaudio.save(r"C:\Users\phvpa\Desktop\Speech_FinalProject\14-12-2021\WavFiles\audio\file_0"+str(i)+".wav", waveform, sample_rate)

In [None]:
Tempu['text'].to_csv(r"C:\Users\phvpa\Desktop\Speech_FinalProject\14-12-2021\WavFiles\text.csv")

In [None]:
import os.path
import pandas as pd


# Path
path = r"C:\Users\phvpa\Desktop\Speech_FinalProject\14-12-2021\WavFiles\audio"

Filenames = os.listdir(path)
df = pd.DataFrame(data={"wav_filename": Filenames})



In [None]:
df['wav_filesize'] = "UNUSED"

In [None]:
df['transcript'] = "UNUSED"

In [None]:
df

Unnamed: 0,wav_filename,wav_filesize,transcript
0,file_00.wav,UNUSED,UNUSED
1,file_01.wav,UNUSED,UNUSED
2,file_010.wav,UNUSED,UNUSED
3,file_0100.wav,UNUSED,UNUSED
4,file_01000.wav,UNUSED,UNUSED
...,...,...,...
2615,file_0995.wav,UNUSED,UNUSED
2616,file_0996.wav,UNUSED,UNUSED
2617,file_0997.wav,UNUSED,UNUSED
2618,file_0998.wav,UNUSED,UNUSED


In [None]:
# saving the dataframe
df.to_csv(r"C:\Users\phvpa\Desktop\Speech_FinalProject\14-12-2021\WavFiles\test.csv",index=False)

# For Converting the Other flac files to .wav and storing information in .csv

In [None]:
from datasets import load_dataset, load_metric

librispeech_eval = load_dataset("librispeech_asr", "other", split="test")  # change to "other" for other test dataset

Reusing dataset librispeech_asr (C:\Users\phvpa\.cache\huggingface\datasets\librispeech_asr\other\2.1.0\f58a17d82b157feddbc407342a657fee7389b87c33d8331312edf7a604392896)


In [None]:
import pandas as pd
FullData = pd.DataFrame(librispeech_eval)

In [None]:
FullData

Unnamed: 0,file,audio,text,speaker_id,chapter_id,id
0,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,THERE'S IRON THEY SAY IN ALL OUR BLOOD AND A G...,1688,142285,1688-142285-0000
1,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,MARGARET SAID MISTER HALE AS HE RETURNED FROM ...,1688,142285,1688-142285-0001
2,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,YOU DON'T MEAN THAT YOU THOUGHT ME SO SILLY,1688,142285,1688-142285-0002
3,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,I REALLY LIKED THAT ACCOUNT OF HIMSELF BETTER ...,1688,142285,1688-142285-0003
4,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,HIS STATEMENT OF HAVING BEEN A SHOP BOY WAS TH...,1688,142285,1688-142285-0004
...,...,...,...,...,...,...
2934,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,POOR ISAAC WAS HURRIED OFF ACCORDINGLY AND EXP...,8461,281231,8461-281231-0034
2935,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,THE ASSURANCE THAT SHE POSSESSED SOME FRIEND I...,8461,281231,8461-281231-0035
2936,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,SHE GAZED ACCORDINGLY UPON A SCENE WHICH MIGHT...,8461,281231,8461-281231-0036
2937,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,AT HIS FEET WAS PLACED A TABLE OCCUPIED BY TWO...,8461,281231,8461-281231-0037


In [None]:
FullData.to_csv("C:/Users/phvpa/Desktop/Speech_FinalProject/14-12-2021/OtherWavFiles/DataFrame/FullData.csv")

In [None]:
Tempu = pd.read_csv("C:/Users/phvpa/Desktop/Speech_FinalProject/14-12-2021/OtherWavFiles/DataFrame/FullData.csv")

In [None]:
Tempu

Unnamed: 0.1,Unnamed: 0,file,audio,text,speaker_id,chapter_id,id
0,0,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,THERE'S IRON THEY SAY IN ALL OUR BLOOD AND A G...,1688,142285,1688-142285-0000
1,1,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,MARGARET SAID MISTER HALE AS HE RETURNED FROM ...,1688,142285,1688-142285-0001
2,2,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,YOU DON'T MEAN THAT YOU THOUGHT ME SO SILLY,1688,142285,1688-142285-0002
3,3,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,I REALLY LIKED THAT ACCOUNT OF HIMSELF BETTER ...,1688,142285,1688-142285-0003
4,4,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,HIS STATEMENT OF HAVING BEEN A SHOP BOY WAS TH...,1688,142285,1688-142285-0004
...,...,...,...,...,...,...,...
2934,2934,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,POOR ISAAC WAS HURRIED OFF ACCORDINGLY AND EXP...,8461,281231,8461-281231-0034
2935,2935,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,THE ASSURANCE THAT SHE POSSESSED SOME FRIEND I...,8461,281231,8461-281231-0035
2936,2936,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,SHE GAZED ACCORDINGLY UPON A SCENE WHICH MIGHT...,8461,281231,8461-281231-0036
2937,2937,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,AT HIS FEET WAS PLACED A TABLE OCCUPIED BY TWO...,8461,281231,8461-281231-0037


In [None]:
from pathlib import PurePath

In [None]:
import torchaudio

In [None]:
for i in range(FullData.shape[0]):
    file_path = PurePath(Tempu['file'][i])
    waveform, sample_rate = torchaudio.load(file_path)  # load tensor from file, as usual
    torchaudio.save(r"C:\Users\phvpa\Desktop\Speech_FinalProject\14-12-2021\OtherWavFiles\audio\file_0"+str(i)+".wav", waveform, sample_rate)

In [None]:
Tempu['text'].to_csv(r"C:\Users\phvpa\Desktop\Speech_FinalProject\14-12-2021\OtherWavFiles\text.csv")

In [None]:
import os.path
import pandas as pd

# Path
path = r"C:\Users\phvpa\Desktop\Speech_FinalProject\14-12-2021\OtherWavFiles\audio"

Filenames = os.listdir(path)
df = pd.DataFrame(data={"wav_filename": Filenames})

In [None]:
df['wav_filesize'] = "UNUSED"

In [None]:
df['transcript'] = "UNUSED"

In [None]:
df.to_csv(r"C:\Users\phvpa\Desktop\Speech_FinalProject\14-12-2021\OtherWavFiles\test.csv",index=False)

In [None]:
df

Unnamed: 0,wav_filename,wav_filesize,transcript
0,file_00.wav,UNUSED,UNUSED
1,file_01.wav,UNUSED,UNUSED
2,file_010.wav,UNUSED,UNUSED
3,file_0100.wav,UNUSED,UNUSED
4,file_01000.wav,UNUSED,UNUSED
...,...,...,...
2934,file_0995.wav,UNUSED,UNUSED
2935,file_0996.wav,UNUSED,UNUSED
2936,file_0997.wav,UNUSED,UNUSED
2937,file_0998.wav,UNUSED,UNUSED


# Rough Work

## Taking 10 other samples

In [None]:
from datasets import load_dataset, load_metric

librispeech_eval = load_dataset("librispeech_asr", "other", split="test")  # change to "other" for other test dataset

Reusing dataset librispeech_asr (C:\Users\phvpa\.cache\huggingface\datasets\librispeech_asr\other\2.1.0\f58a17d82b157feddbc407342a657fee7389b87c33d8331312edf7a604392896)


In [None]:
import pandas as pd
FullData = pd.DataFrame(librispeech_eval)

In [None]:
FullData.to_csv("C:/Users/phvpa/Desktop/Speech_FinalProject/14-12-2021/10OtherWavFiles/DataFrame/FullData.csv")

In [None]:
Tempu = pd.read_csv("C:/Users/phvpa/Desktop/Speech_FinalProject/14-12-2021/10OtherWavFiles/DataFrame/FullData.csv")

In [None]:
from pathlib import PurePath

In [None]:
torchaudio.USE_SOUNDFILE_LEGACY_INTERFACE = False

In [None]:
import torchaudio

In [None]:
for i in range(10):
    file_path = PurePath(Tempu['file'][i])
    waveform, sample_rate = torchaudio.load(file_path)  # load tensor from file, as usual
    torchaudio.save(r"C:\Users\phvpa\Desktop\Speech_FinalProject\14-12-2021\10OtherWavFiles\audio\file_0"+str(i)+".wav", waveform, sample_rate)

In [None]:
import os.path
import pandas as pd

# Path
path = r"C:\Users\phvpa\Desktop\Speech_FinalProject\14-12-2021\10OtherWavFiles\audio"

Filenames = os.listdir(path)
df = pd.DataFrame(data={"wav_filename": Filenames})

## roughwork2

In [None]:
import pandas as pd
FullData = pd.DataFrame(librispeech_eval)

In [None]:
FullData

Unnamed: 0,file,audio,text,speaker_id,chapter_id,id
0,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,HE HOPED THERE WOULD BE STEW FOR DINNER TURNIP...,1089,134686,1089-134686-0000
1,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,STUFF IT INTO YOU HIS BELLY COUNSELLED HIM,1089,134686,1089-134686-0001
2,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,AFTER EARLY NIGHTFALL THE YELLOW LAMPS WOULD L...,1089,134686,1089-134686-0002
3,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,HELLO BERTIE ANY GOOD IN YOUR MIND,1089,134686,1089-134686-0003
4,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD ...,1089,134686,1089-134686-0004
...,...,...,...,...,...,...
2615,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,OH TO SHOOT MY SOUL'S FULL MEANING INTO FUTURE...,908,31957,908-31957-0021
2616,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,THEN I LONG TRIED BY NATURAL ILLS RECEIVED THE...,908,31957,908-31957-0022
2617,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,I LOVE THEE FREELY AS MEN STRIVE FOR RIGHT I L...,908,31957,908-31957-0023
2618,C:\Users\phvpa\.cache\huggingface\datasets\dow...,C:\Users\phvpa\.cache\huggingface\datasets\dow...,I LOVE THEE WITH THE PASSION PUT TO USE IN MY ...,908,31957,908-31957-0024


In [None]:
FullData.to_csv('../14-12-2021/Codes/FullData.csv')

In [None]:
Tempu = pd.read_csv('../14-12-2021/Codes/FullData.csv') 

In [None]:
import librosa
import IPython.display as ipd

In [None]:
import soundfile as sf

In [None]:
from pathlib import PurePath

In [None]:
FullData.shape[0]

2620

In [None]:
x , sr = librosa.load(Tempu['file'][0])
sr

22050

In [None]:
for i in range(FullData.shape[0]):
    file_path = PurePath(Tempu['file'][i])
    x , sr = librosa.load(file_path)
    sf.write('C:/Users/phvpa/Desktop/Speech_FinalProject/14-12-2021/file_0'+str(i)+'.wav', x, sr)

In [None]:
from scipy.io.wavfile import read as read_wav
import os

In [None]:
sampling_rate, data=read_wav(r"C:\Users\phvpa\Desktop\Speech_FinalProject\14-12-2021\file_00.wav") # enter your filename

In [None]:
sampling_rate

22050

In [None]:
import torchaudio

In [None]:
torchaudio.set_audio_backend("soundfile")  # switch backend



In [None]:
waveform, sample_rate = torchaudio.load(Tempu['file'][0])  # load tensor from file, as usual

In [None]:
sample_rate

16000

In [None]:
torchaudio.save('test1.wav', waveform, sample_rate)

In [None]:
file_path1 = PurePath(Tempu['file'][0])

In [None]:
ls

 Volume in drive C is Acer
 Volume Serial Number is 3818-7542

 Directory of C:\Users\phvpa\Desktop\Speech_FinalProject\FinalWorkingCodes

14-12-2021  23:34    <DIR>          .
14-12-2021  23:05    <DIR>          ..
22-11-2021  07:01    <DIR>          .ipynb_checkpoints
14-12-2021  23:34           482,633 FAIRSEQ_S2T.ipynb
14-12-2021  22:56           411,078 Jasper.ipynb
14-12-2021  23:13             6,734 Top10.csv
               3 File(s)        900,445 bytes
               3 Dir(s)  122,891,563,008 bytes free
