<a href="https://colab.research.google.com/github/nancyshukla6756/gold-price-predict/blob/main/male_female_audio.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import urllib.request
import tarfile
import pandas as pd
import os


In [12]:
# Set the URL for the English dataset
url = 'https://common-voice-data-download.s3.amazonaws.com/cv_corpus_v1.tar.gz'

# Set the directory to download and extract the dataset to
data_dir = 'cv_corpus_v1'


In [13]:
# Create the data directory if it doesn't already exist
if not os.path.exists(data_dir):
    os.mkdir(data_dir)

In [14]:
# Download the dataset and extract it to the data directory
print('Downloading dataset...')
urllib.request.urlretrieve(url, f'{data_dir}.tar.gz')
print('Extracting dataset...')
tar = tarfile.open(f'{data_dir}.tar.gz', 'r:gz')
tar.extractall(data_dir)
tar.close()
os.remove(f'{data_dir}.tar.gz')


Downloading dataset...
Extracting dataset...


In [None]:
# Load the metadata file into a Pandas dataframe
metadata_file = f'{data_dir}/en/metadata.csv'
metadata = pd.read_csv(metadata_file)

# Filter the metadata to include only male and female speakers
metadata = metadata[(metadata['gender'] == 'male') | (metadata['gender'] == 'female')]

# Print some information about the dataset
print(f'Number of audio files: {metadata.shape[0]}')
print(f'Number of male speakers: {metadata[metadata["gender"]=="male"].speaker.nunique()}')
print(f'Number of female speakers: {metadata[metadata["gender"]=="female"].speaker.nunique()}')

# Loop through the audio files and do something with them (e.g. extract features)
for i, row in metadata.iterrows():
    audio_file = f'{data_dir}/{row.path}'
    # do something with the audio file...

In [None]:
import librosa
import pandas as pd
import numpy as np
import os

# Set the directory where the audio files are stored
data_dir = 'cv_corpus_v1/en/clips'

# Set the directory where the feature files will be saved
feat_dir = 'cv_corpus_v1/en/features'

# Create the feature directory if it doesn't already exist
if not os.path.exists(feat_dir):
    os.mkdir(feat_dir)

# Load the metadata file into a Pandas dataframe
metadata_file = 'cv_corpus_v1/en/metadata.csv'
metadata = pd.read_csv(metadata_file)

# Filter the metadata to include only male and female speakers
metadata = metadata[(metadata['gender'] == 'male') | (metadata['gender'] == 'female')]

# Set the size of the feature vectors
feat_size = 20

# Loop through the audio files and extract features
for i, row in metadata.iterrows():
    # Load the audio file and extract the features
    audio_file = os.path.join(data_dir, row['path'])
    y, sr = librosa.load(audio_file, sr=None)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=feat_size)
    feat = np.mean(mfcc, axis=1)
    
    # Save the features to a file
    feat_file = os.path.join(feat_dir, row['path'][:-4] + '.npy')
    np.save(feat_file, feat)

    # Print progress
    print(f'Processed file {i+1} of {metadata.shape[0]}: {row["path"]}')


In [None]:
import pandas as pd
import numpy as np
import os

# Set the directory where the feature files are stored
feat_dir = 'cv_corpus_v1/en/features'

# Load the metadata file into a Pandas dataframe
metadata_file = 'cv_corpus_v1/en/metadata.csv'
metadata = pd.read_csv(metadata_file)

# Filter the metadata to include only male and female speakers
metadata = metadata[(metadata['gender'] == 'male') | (metadata['gender'] == 'female')]

# Set the size of the feature vectors
feat_size = 20

# Create an empty dataframe to hold the features
features = pd.DataFrame(columns=['filename'] + [f'mfcc_{i}' for i in range(feat_size)])

# Loop through the audio files and add the features to the dataframe
for i, row in metadata.iterrows():
    # Load the feature file and add the features to the dataframe
    feat_file = os.path.join(feat_dir, row['path'][:-4] + '.npy')
    feat = np.load(feat_file)
    features.loc[i] = [row['path']] + feat.tolist()

    # Print progress
    print(f'Processed file {i+1} of {metadata.shape[0]}: {row["path"]}')

# Add the gender column to the dataframe
features['gender'] = metadata['gender'].values

# Save the dataframe to a CSV file
features.to_csv('cv_corpus_v1_en_features.csv', index=False)


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load the feature dataframe
features_file = 'cv_corpus_v1_en_features.csv'
features = pd.read_csv(features_file)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features.iloc[:, 1:-1], features.iloc[:, -1], test_size=0.2, random_state=42)

# Scale the features to zero mean and unit variance
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train a support vector machine (SVM) classifier
svm = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)
svm.fit(X_train_scaled, y_train)

# Make predictions on the test set and calculate accuracy
y_pred = svm.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print(f'Test accuracy: {accuracy:.4f}')


In [None]:
import pyaudio
import numpy as np
import librosa
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

# Load the SVM classifier and scaler
svm = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)
scaler = StandardScaler()
model_dir = 'models'
svm_file = 'svm_model.pkl'
scaler_file = 'scaler.pkl'
svm.load(model_dir + '/' + svm_file)
scaler = pd.read_pickle(model_dir + '/' + scaler_file)

# Define the audio recording parameters
duration = 3  # seconds
sample_rate = 16000
num_channels = 1
chunk_size = 1024

# Initialize the PyAudio object
audio = pyaudio.PyAudio()

# Open a stream to record audio from the microphone
stream = audio.open(format=pyaudio.paFloat32, channels=num_channels, rate=sample_rate, input=True, frames_per_buffer=chunk_size)

# Record audio for the specified duration
print('Recording...')
frames = []
for i in range(0, int(sample_rate / chunk_size * duration)):
    data = stream.read(chunk_size)
    frames.append(data)

# Close the stream
stream.stop_stream()
stream.close()
audio.terminate()

# Convert the recorded audio to a numpy array
audio_data = np.frombuffer(b''.join(frames), dtype=np.float32)

# Extract the MFCC features from the audio data
mfcc = librosa.feature.mfcc(audio_data, sr=sample_rate, n_mfcc=20)

# Flatten the MFCC features into a single row
mfcc_flat = mfcc.reshape(1, -1)

# Scale the MFCC features using the saved scaler
mfcc_scaled = scaler.transform(mfcc_flat)

# Predict the gender label of the audio using the saved SVM classifier
gender_label = svm.predict(mfcc_scaled)[0]

# Print the predicted gender label
if gender_label == 'male':
    print('The audio is male.')
elif gender_label == 'female':
    print('The audio is female.')
else:
    print('Error: Unknown gender label.')
