In [None]:
%cd /content/drive/MyDrive/research/PROJ201 20221/Gender Identification

In [None]:
import pandas as pd

In [None]:
!pip install pytube

In [None]:
from pytube import YouTube

In [None]:
SAVE_PATH = 'download'

In [None]:
import os

In [None]:
# Download the audio from Youtube
def download_audio(link):
  if not os.path.exists(SAVE_PATH):
      os.makedirs(SAVE_PATH)
  download_details = {}
  # Read the Youtube detail
  yt = YouTube(link)
  try:
    download_details['length'] = int(yt.length)
    if download_details['length'] >= 75*60:
      raise Exception()
    mp4files =  yt.streams.filter(only_audio=True).order_by('abr').desc()
    if len(mp4files)==0:
        raise Exception()
    mp4files[0].download(SAVE_PATH, filename='x.wav')
    download_details['audio_downloadable'] = 1
    download_details['audio_downloadable_2'] = 1
  except:
    download_details['audio_downloadable'] = 0
    download_details['audio_downloadable_2'] = 0
  return download_details

In [None]:
import glob

In [None]:
# Delete Downloaded files
def delete_download_files():
  files = glob.glob(SAVE_PATH+"/*")
  for f in files:
    try:
      os.remove(f)
    except:
      continue

In [None]:
!pip install librosa

In [None]:
import librosa
import soundfile as sf

In [None]:
def sampling(audio_path, audio_length):
    samples = []
    # Split the audion into samples (10s each 60)
    start = 0
    end = start + 10
    while (end < audio_length):
        samples.append((start, end))
        start = start + 60
        end = start + 10
    if start < audio_length:
        samples.append((start, audio_length))
    X, sample_rate = librosa.core.load(audio_path)
    for i, x in enumerate(samples):
      # extract features and reshape it
      sf.write(SAVE_PATH+'/'+str(i)+'.wav', X[x[0]*sample_rate:x[1]*sample_rate], sample_rate)
    return samples

In [None]:
!pip install inaSpeechSegmenter

In [None]:
from PyVoiceGR.utils import load_data, split_data, create_model
from PyVoiceGR.test import extract_feature
from inaSpeechSegmenter import Segmenter
import numpy as np

In [None]:
seg = Segmenter()

In [None]:
model = create_model()
model.load_weights("PyVoiceGR/results/model.h5")

In [None]:
def audio_gender_recognition(audio_split):
    male_sum = 0
    female_sum = 0
    male_sum_second_model = 0
    female_sum_second_model = 0
    total_time = 0
    speech_time = 0
    labels = ['male', 'female']

    for i in range(len(audio_split)):
        try:
            segmentation = seg(SAVE_PATH+'/'+str(i)+'.wav')
            X, sample_rate = librosa.core.load(SAVE_PATH+'/'+str(i)+'.wav')
            for s in segmentation:
                total_time += s[2] - s[1]
                if (s[0] == 'male') or (s[0] == 'female'):
                  if (s[2] - s[1]) > 1:
                    speech_time += s[2] - s[1]
                    if s[0] == 'male':
                        male_sum += s[2] - s[1]
                    elif s[0] == 'female':
                        female_sum += s[2] - s[1]
                    # extract features and reshape it
                    features = extract_feature(X[int(s[1])*sample_rate:int(s[2]+1)*sample_rate], sample_rate, mel=True).reshape(1, -1)
                    # predict the gender!
                    male_prob = model.predict(features)[0][0]
                    female_prob = 1 - male_prob
                    confidence = [male_prob, female_prob]
                    idx = np.argmax(confidence)
                    label = labels[idx]
                    if label == 'male':
                      male_sum_second_model += s[2] - s[1]
                    elif label == 'female':
                      female_sum_second_model += s[2] - s[1]

        except:
            continue
    if total_time > 0:
      male_ratio = male_sum/total_time
      female_ratio = female_sum/total_time
      male_ratio_second_model = male_sum_second_model/total_time
      female_ratio_second_model = female_sum_second_model/total_time
    else:
      male_ratio = 0
      female_ratio = 0
      male_ratio_second_model = 0
      female_ratio_second_model = 0
    no_speech_ratio = 1 - (male_ratio+female_ratio)
    no_speech_ratio_second_model = 1 - (male_ratio_second_model+female_ratio_second_model)
    return male_ratio, female_ratio, no_speech_ratio, male_ratio_second_model, female_ratio_second_model, no_speech_ratio_second_model


In [None]:
def prediction(male_ratio, female_ratio, no_speech_ratio):
  if no_speech_ratio >= 0.9:
    return 'NA'
  elif (male_ratio>0) and (female_ratio>0) and (abs(male_ratio-female_ratio)<0.25):
    return 'Neutral'
  elif male_ratio > female_ratio:
    return 'Male'
  else:
    return 'Female'

In [None]:
def analyze(link):
    # Download the audio and video of the youtube link
    sample = download_audio(link)
    # check if the audio has been downloaded
    if (sample['audio_downloadable']==1) or (sample['audio_downloadable_2']==1):
        #Split and sample the audio
        audio_path = SAVE_PATH+'/x.wav'
        audio_length = sample['length']
        # start = time.time()
        audio_split = sampling(audio_path, audio_length)
        if len(audio_split)!= 0:
            # Apply gender recognition of audio
            sample['male_ratio'], sample['female_ratio'], sample['no_speech_ratio'], sample['male_ratio_second_model'], sample['female_ratio_second_model'], sample['no_speech_ratio_second_model'] = audio_gender_recognition(audio_split)
            sample['pred'] = prediction(sample['male_ratio'], sample['female_ratio'], sample['no_speech_ratio'])
            sample['pred_second_model'] = prediction(sample['male_ratio_second_model'], sample['female_ratio_second_model'], sample['no_speech_ratio_second_model'])

        else:
            sample['male_ratio'] = 0
            sample['female_ratio'] = 0
            sample['no_speech_ratio'] = 0
            sample['pred'] = 'NA'
            sample['male_ratio_second_model'] = 0
            sample['female_ratio_second_model'] = 0
            sample['no_speech_ratio_second_model'] = 0
            sample['pred_second_model'] = 'NA'
    else:
        sample['male_ratio'] = 0
        sample['female_ratio'] = 0
        sample['no_speech_ratio'] = 0
        sample['pred'] = 'NA'
        sample['male_ratio_second_model'] = 0
        sample['female_ratio_second_model'] = 0
        sample['no_speech_ratio_second_model'] = 0
        sample['pred_second_model'] = 'NA'
    delete_download_files()
    return sample

In [None]:
import numpy as np
import time

In [None]:
files = glob.glob('data/STEM/*/*.csv')
for file in files:
  change = False
  # print(file)
  file_name = file.split('/')[-1]
  data = pd.read_csv(file)
  data['Gender'] = data['Gender'].fillna('NA')
  for i in data.index:
    if ('audio_downloadable' in data.columns) and (not np.isnan(data.loc[i, 'audio_downloadable'])) and ('audio_downloadable_2' in data.columns) and (not np.isnan(data.loc[i, 'audio_downloadable_2'])):
      continue
    change = True
    link = data.loc[i,'URL']
    sample = analyze(link)
    for k in sample:
      data.loc[i,k] = sample[k]
    data.to_csv(file, index=False)
    time.sleep(2*60)
  print(file_name,'is done................................................................................................')
  if change:
    time.sleep(10*60)
  # break