## Libraries

In [8]:
from IPython.display import Audio
from scipy.io import wavfile
import pandas as pd
from sklearn.decomposition import PCA
import numpy as np
import librosa

audio_file_path='C:\MBA\project\data\Speech_4.wav'
speech,samplerate=librosa.load(audio_file_path)



## PCA Transform

In [9]:

def pca_reduce(signal, n_components):
    
    # PCA process
    pca = PCA(n_components=n_components)
    pca.fit(signal)
    
    transformed = pca.transform(signal)
    reconstructed = pca.inverse_transform(transformed)
    return transformed, reconstructed

### Convert to MB

In [11]:
def raw_estimate(signal):
    # We assume that we'll be storing things as 16-bit WAV,
    # meaning two bytes per sample
    t_size = signal.nbytes
    
    # Return a result in megabytes
    return t_size/ (2**20)

### Iteration 

In [17]:
compression_attempts = [
    (4, 16),
    (8, 16),
    (16, 16),
    (16, 64),
    (32, 64),
    (64, 64),
    (32, 128),
    (64, 128),
    (128, 128)
]

### Main Pipeline

In [21]:
def build_estimates(signal, pca, mfcc):
    mfcc_holder=librosa.feature.mfcc(y=signal, sr=samplerate, n_mfcc=mfcc)
    transformed, recon = pca_reduce(mfcc_holder, pca)
    transform_size = raw_estimate(transformed)
    pca_reconv_size= raw_estimate(recon)
    mfcc_size= raw_estimate(mfcc_holder)
    return transform_size, mfcc_size

In [22]:
pca_compression_results = pd.DataFrame([
        build_estimates(speech, n, bs)
        for n, bs in compression_attempts
    ])

In [23]:
pca_compression_results.columns = ["Raw_PCA_Transformed","Raw_mfcc"]
pca_compression_results.index = compression_attempts
pca_compression_results

Unnamed: 0,Raw_PCA_Transformed,Raw_mfcc
"(4, 16)",0.000244,0.043396
"(8, 16)",0.000488,0.043396
"(16, 16)",0.000977,0.043396
"(16, 64)",0.003906,0.173584
"(32, 64)",0.007812,0.173584
"(64, 64)",0.015625,0.173584
"(32, 128)",0.015625,0.347168
"(64, 128)",0.03125,0.347168
"(128, 128)",0.0625,0.347168


### Re-Create Signal

In [24]:
def build_signal(signal, pca, mfcc):
    mfcc_holder=librosa.feature.mfcc(y=signal, sr=samplerate, n_mfcc=mfcc)
    transformed, recon = pca_reduce(mfcc_holder, pca)
    return recon

In [30]:
mfcc_64_64=build_signal(speech,64,64)

In [29]:
mfccre_64=librosa.feature.inverse.mfcc_to_audio(mfcc_64_64,n_mels=64)

In [31]:
Audio(data=mfccre_64, rate=samplerate)

In [32]:
mfcc_128_128=build_signal(speech,128,128)

In [33]:
mfcc_re_128_128=librosa.feature.inverse.mfcc_to_audio(mfcc_128_128,n_mels=128)

In [34]:
Audio(data=mfcc_re_128_128, rate=samplerate)

In [35]:
mfcc_32_32=build_signal(speech,32,32)

In [38]:
mfccre_32_32=librosa.feature.inverse.mfcc_to_audio(mfcc_32_32,n_mels=32)

In [39]:
Audio(data=mfccre_32_32, rate=samplerate)