In [3]:
import numpy as np
import tensorflow as tf
import librosa
import numpy as np
import pandas as pd 
import tensorflow_io as tfio
from pydub import AudioSegment
from pydub.silence import split_on_silence

In [4]:
import librosa

def extract_audio_features(segment, sr):
    chroma_stft = np.mean(librosa.feature.chroma_stft(y=segment, sr=sr))
    rms = np.mean(librosa.feature.rms(y=segment))
    spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=segment, sr=sr)[0])
    spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=segment, sr=sr)[0])
    rolloff = np.mean(librosa.feature.spectral_rolloff(y=segment, sr=sr)[0])
    zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y=segment)[0])
    mfccs = librosa.feature.mfcc(y=segment, sr=sr, n_mfcc=20)
    spectrogram = np.abs(librosa.stft(segment))

    features = {
        'chroma_stft': chroma_stft,
        'rms': rms,
        'spectral_centroid': spectral_centroid,
        'spectral_bandwidth': spectral_bandwidth,
        'rolloff': rolloff,
        'zero_crossing_rate': zero_crossing_rate
    }

    for i in range(1, 21):
        features[f'mfcc{i}'] = np.mean(mfccs[i-1])

    spectrogram_mean = np.mean(spectrogram, axis=1)
    for i in range(len(spectrogram_mean)):
        features[f'spectrogram_{i}'] = spectrogram_mean[i]

    return features


In [10]:
import os
import librosa
import pandas as pd

audio_dir = "data/for-norm/for-norm/validation/real"

data = []

for filename in os.listdir(audio_dir):
    if filename.endswith(".wav"):  
        filepath = os.path.join(audio_dir, filename)
        print(f"Processing {filepath}")

        y, sr = librosa.load(filepath, sr=None)

        
        features = extract_audio_features(y, sr)

        label = "real" 
        features['label'] = label

        data.append(features)

df = pd.DataFrame(data)

df.to_csv("validation_real.csv", index=False)


Processing data/for-norm/for-norm/validation/real\file10004.wav_16k.wav_norm.wav_mono.wav_silence.wav
Processing data/for-norm/for-norm/validation/real\file10014.wav_16k.wav_norm.wav_mono.wav_silence.wav
Processing data/for-norm/for-norm/validation/real\file10016.wav_16k.wav_norm.wav_mono.wav_silence.wav
Processing data/for-norm/for-norm/validation/real\file10022.wav_16k.wav_norm.wav_mono.wav_silence.wav
Processing data/for-norm/for-norm/validation/real\file10034.wav_16k.wav_norm.wav_mono.wav_silence.wav
Processing data/for-norm/for-norm/validation/real\file1005.wav_16k.wav_norm.wav_mono.wav_silence.wav
Processing data/for-norm/for-norm/validation/real\file1006.wav_16k.wav_norm.wav_mono.wav_silence.wav
Processing data/for-norm/for-norm/validation/real\file10063.wav_16k.wav_norm.wav_mono.wav_silence.wav
Processing data/for-norm/for-norm/validation/real\file10067.wav_16k.wav_norm.wav_mono.wav_silence.wav
Processing data/for-norm/for-norm/validation/real\file10071.wav_16k.wav_norm.wav_mon

In [3]:
import pandas as pd
import os

csv_files = ['data/testing_fake.csv', 'data/testing_real.csv', 'data/training_fake.csv', 'data/training_real.csv', 'data/validation_fake.csv', 'data/validation_real.csv']

dfs = []

for file in csv_files:
    print(f"Processing {file}")
    df = pd.read_csv(file)
    dfs.append(df)

combined_df = pd.concat(dfs)

print("resampling the rows ")
resample = combined_df.sample(frac=1)

save_dir = "data"

if not os.path.exists(save_dir):
    os.makedirs(save_dir)

print("saving data ....")
save_path = os.path.join(save_dir, "resampled_data.csv")

resample.to_csv(save_path, index=False) 


Processing data/testing_fake.csv
Processing data/testing_real.csv
Processing data/training_fake.csv
Processing data/training_real.csv
Processing data/validation_fake.csv
Processing data/validation_real.csv
resampling the rows 
saving data ....


In [4]:
import pandas as pd

data = pd.read_csv("data/resampled_data.csv")

missing_rows = data[data.isna().any(axis=1)]

print("Rows with missing values:")
print(missing_rows)

clean_data = data.dropna()

clean_data.to_csv("data/resampled_data.csv", index=False)

print(f"Removed {len(data) - len(clean_data)} rows with missing values.")


Rows with missing values:
       chroma_stft  rms  spectral_centroid  spectral_bandwidth  rolloff  \
22884          NaN  NaN                NaN                 NaN      NaN   
61913          NaN  NaN                NaN                 NaN      NaN   

       zero_crossing_rate  mfcc1  mfcc2  mfcc3  mfcc4  ...  spectrogram_1016  \
22884                 NaN    NaN    NaN    NaN    NaN  ...               NaN   
61913                 NaN    NaN    NaN    NaN    NaN  ...               NaN   

       spectrogram_1017  spectrogram_1018  spectrogram_1019  spectrogram_1020  \
22884               NaN               NaN               NaN               NaN   
61913               NaN               NaN               NaN               NaN   

       spectrogram_1021  spectrogram_1022  spectrogram_1023  spectrogram_1024  \
22884               NaN               NaN               NaN               NaN   
61913               NaN               NaN               NaN               NaN   

       label  
2288

In [19]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

data = pd.read_csv("data/resampled_data.csv")

X = data.drop(columns=['label'])  
y = data['label']

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [17]:
import numpy as np
from sklearn.model_selection import KFold
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Flatten

# Define your CNN-LSTM model
def create_model(input_shape):
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(50, activation='relu'))  
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

n_folds = 5

kf = KFold(n_splits=n_folds, shuffle=True)

acc_per_fold = []


fold_no = 1
for train_index, test_index in kf.split(X):
    
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y[train_index], y[test_index]

    
    X_train = np.expand_dims(X_train.values, axis=2)
    X_test = np.expand_dims(X_test.values, axis=2)

    
    model = create_model(input_shape=(X_train.shape[1], X_train.shape[2]))

    
    model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)

    
    scores = model.evaluate(X_test, y_test, verbose=0)

    
    print(f'Score for fold {fold_no}: {model.metrics_names[1]} of {scores[1]*100}%')

   
    acc_per_fold.append(scores[1] * 100)

   
    fold_no += 1


print('Average accuracy across all folds:', np.mean(acc_per_fold))


Score for fold 1: accuracy of 99.02597665786743%
Score for fold 2: accuracy of 98.99711608886719%
Score for fold 3: accuracy of 99.25685524940491%
Score for fold 4: accuracy of 99.36503171920776%
Score for fold 5: accuracy of 99.14856553077698%
Average accuracy across all folds: 99.15870904922485


In [18]:
import numpy as np
from sklearn.model_selection import KFold
from keras.models import Sequential
from keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Flatten


def create_model(input_shape):
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(50, activation='relu'))  
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model


n_folds = 10


kf = KFold(n_splits=n_folds, shuffle=True)


acc_per_fold = []


fold_no = 1
for train_index, test_index in kf.split(X):
    
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y[train_index], y[test_index]

    
    X_train = np.expand_dims(X_train.values, axis=2)
    X_test = np.expand_dims(X_test.values, axis=2)

    
    model = create_model(input_shape=(X_train.shape[1], X_train.shape[2]))

    
    model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=0)

   
    scores = model.evaluate(X_test, y_test, verbose=0)

   
    print(f'Score for fold {fold_no}: {model.metrics_names[1]} of {scores[1]*100}%')

    
    acc_per_fold.append(scores[1] * 100)

   
    fold_no += 1


print('Average accuracy across all folds:', np.mean(acc_per_fold))


Score for fold 1: accuracy of 99.22077655792236%
Score for fold 2: accuracy of 98.86003136634827%
Score for fold 3: accuracy of 99.04761910438538%
Score for fold 4: accuracy of 50.59162974357605%
Score for fold 5: accuracy of 99.40837025642395%
Score for fold 6: accuracy of 98.91774654388428%
Score for fold 7: accuracy of 49.32178854942322%
Score for fold 8: accuracy of 98.91774654388428%
Score for fold 9: accuracy of 99.19180274009705%
Score for fold 10: accuracy of 99.10520911216736%
Average accuracy across all folds: 89.25827205181122
