In [1]:
# # to delete folders
# !rm -r /content/real_audios
# !rm -r /content/fake_audios
# !rm -r /content/real_mfcc
# !rm -r /content/fake_mfcc

Install Required Libraries

In [None]:
!pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib

Authenticate and Set Up Google Drive API

In [3]:
from google.colab import auth
auth.authenticate_user()

from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
from googleapiclient.errors import HttpError
import io

# Set up the Google Drive API
def setup_drive_service():
    from googleapiclient.discovery import build
    from google.auth import default

    creds, _ = default()
    service = build('drive', 'v3', credentials=creds)
    return service

drive_service = setup_drive_service()

In [4]:
import os

def list_files_in_folder(folder_id):
    query = f"'{folder_id}' in parents and mimeType != 'application/vnd.google-apps.folder'"
    results = drive_service.files().list(q=query, fields="files(id, name)").execute()
    return results.get('files', [])

def download_file(file_id, destination):
    request = drive_service.files().get_media(fileId=file_id)
    fh = io.FileIO(destination, mode='wb')
    downloader = MediaIoBaseDownload(fh, request)
    done = False
    while done is False:
        status, done = downloader.next_chunk()
        print(f"Download {int(status.progress() * 100)}%.")

def download_files_from_folder(folder_id, destination_folder):
    files = list_files_in_folder(folder_id)
    for file in files:
        file_id = file['id']
        file_name = file['name']
        destination = os.path.join(destination_folder, file_name)
        print(f"Downloading {file_name}...")
        download_file(file_id, destination)
        print(f"Saved to {destination}")

In [None]:
# Real audio files folder
folder_id_real = '1G-m8xRyHEIhsjWydgUObdjsECPfpU_vP'
destination_folder_real = '/content/real_audios'

# Fake audio files folder
folder_id_fake = '1b-c9h0y0SE2hbsF5gDIs07ykkdMdZVLQ'
destination_folder_fake = '/content/fake_audios'

# Create destination folder if it doesn't exist
os.makedirs(destination_folder_real, exist_ok=True)
os.makedirs(destination_folder_fake, exist_ok=True)

#Downloading files seperately
download_files_from_folder(folder_id_real, destination_folder_real)
download_files_from_folder(folder_id_fake, destination_folder_fake)

Audio to MFCC

In [6]:
import numpy as np
import os
from scipy.io import wavfile
from scipy.fft import fft
from scipy.signal import get_window
from scipy.fftpack import dct
import matplotlib.pyplot as plt
import librosa.display

# Define the function to normalize audio
def normalize_audio(audio):
    """Normalizes the audio signal to the range [-1, 1]."""
    audio = audio / np.max(np.abs(audio))
    return audio

# Define the function to frame the audio
def frame_audio(audio, FFT_size, hop_size, sample_rate):
    """Splits the audio signal into overlapping frames."""
    audio_len = len(audio)
    frame_num = int(np.ceil(float(np.abs(audio_len - FFT_size)) / hop_size))
    pad_audio_len = frame_num * hop_size + FFT_size
    z = np.zeros((pad_audio_len - audio_len))
    pad_audio = np.append(audio, z)
    indices = np.tile(np.arange(0, FFT_size), (frame_num, 1)) + np.tile(np.arange(0, frame_num * hop_size, hop_size), (FFT_size, 1)).T
    frames = pad_audio[indices.astype(np.int32, copy=False)]
    return frames

# Function to get Mel filter points
def get_filter_points(fmin, fmax, mel_filter_num, FFT_size, sample_rate):
    """Returns evenly spaced filter points for the Mel filter bank."""
    def hz_to_mel(freq):
        return 2595 * np.log10(1 + freq / 700)

    def mel_to_hz(mel):
        return 700 * (10 ** (mel / 2595) - 1)

    fmin_mel = hz_to_mel(fmin)
    fmax_mel = hz_to_mel(fmax)
    mels = np.linspace(fmin_mel, fmax_mel, num=mel_filter_num + 2)
    freqs = mel_to_hz(mels)
    return np.floor((FFT_size + 1) / sample_rate * freqs).astype(int), freqs

# Function to create Mel filters
def get_filters(filter_points, FFT_size):
    """Creates the filter bank for the Mel scale."""
    filters = np.zeros((len(filter_points) - 2, int(FFT_size / 2 + 1)))
    for n in range(1, len(filter_points) - 1):
        filters[n - 1, filter_points[n - 1]: filter_points[n]] = np.linspace(0, 1, filter_points[n] - filter_points[n - 1])
        filters[n - 1, filter_points[n]: filter_points[n + 1]] = np.linspace(1, 0, filter_points[n + 1] - filter_points[n])
    return filters

def save_mfccs_as_image(mfccs, output_path, file_id):
    """Saves MFCCs as an image (PNG format) with labeled axes and color bar, plotting only the first 10 MFCC coefficients."""
    plt.figure(figsize=(10, 4))

    # Plotting onlt the first 10 co-efficients
    mfccs_10 = mfccs[:10, :]

    # Display the sliced MFCCs as a spectrogram with proper axis labels
    librosa.display.specshow(mfccs_10, x_axis='time', sr=16000, hop_length=15)
    plt.colorbar(format='%+2.0f dB')

    # Labels
    plt.yticks(np.arange(10), labels=[f'Coef {i+1}' for i in range(10)])
    plt.xlabel("Time (s)")
    plt.ylabel("MFCC Coefficient Index")
    plt.title(f'MFCCs for {file_id}')

    # Saving the figure as a PNG file
    image_path = os.path.join(output_path, f"{file_id}_mfccs.png")
    plt.savefig(image_path)
    plt.close()
    print(f"MFCC image saved to {image_path}")

def extract_mfccs(input_folder, output_folder, mel_filter_num=40, dct_filter_num=40, FFT_size=2048, hop_size=15):
    """Extracts MFCCs from the input .wav files and saves them as PNG images."""

    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Iterating over all files in the input folder
    for filename in os.listdir(input_folder):
        if filename.endswith('.wav'):
            wav_file = os.path.join(input_folder, filename)

            # Loading the audio file
            sample_rate, audio = wavfile.read(wav_file)

            # Normalizing the audio
            audio = normalize_audio(audio)

            # Framing the audio
            audio_framed = frame_audio(audio, FFT_size=FFT_size, hop_size=hop_size, sample_rate=sample_rate)

            # Applying window function
            window = get_window("hann", FFT_size, fftbins=True)
            audio_win = audio_framed * window

            # FFT and calculate power spectrum
            audio_fft = np.empty((int(1 + FFT_size // 2), audio_win.shape[0]), dtype=np.complex64, order='F')
            for n in range(audio_fft.shape[1]):
                audio_fft[:, n] = fft(audio_win[n], axis=0)[:audio_fft.shape[0]]

            audio_power = np.square(np.abs(audio_fft.T))

            # Get filter points and filters
            freq_min = 0
            freq_high = sample_rate / 2
            filter_points, mel_freqs = get_filter_points(freq_min, freq_high, mel_filter_num, FFT_size, sample_rate)
            filters = get_filters(filter_points, FFT_size)

            # Applying Mel filter bank
            enorm = 2.0 / (mel_freqs[2:mel_filter_num + 2] - mel_freqs[:mel_filter_num])
            filters *= enorm[:, np.newaxis]

            audio_filtered = np.dot(filters, audio_power.T)

            # Converting to log scale
            audio_log = 10.0 * np.log10(audio_filtered)

            # DCT
            dct_filters = dct(np.eye(mel_filter_num), type=2, norm='ortho')
            mfccs = np.dot(dct_filters[:dct_filter_num], audio_log)

            # Saving the MFCCs as image
            file_id = os.path.splitext(filename)[0]
            save_mfccs_as_image(mfccs, output_folder, file_id)


Compute MFCC and save images to Google Drive

In [7]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
input_folder_real = '/content/real_audios'
output_folder_real = '/content/drive/MyDrive/SampleRun/Computed Images/MFCC/real'

extract_mfccs(input_folder_real, output_folder_real)

input_folder_fake = '/content/fake_audios'
output_folder_fake = '/content/drive/MyDrive/SampleRun/Computed Images/MFCC/fake'

extract_mfccs(input_folder_fake, output_folder_fake)