<a href="https://colab.research.google.com/github/mfatmam/voice/blob/main/reg.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns


In [None]:
df = pd.read_csv("/content/voice.csv")


In [None]:
!pip install pyaudio
import librosa
import numpy as np
from scipy.stats import skew, kurtosis
from scipy.signal import find_peaks
from google.colab import files


def extract_voice_features(signal, sample_rate):
    # meanfreq
    meanfreq = np.mean(np.abs(librosa.core.stft(signal)))

    # sd (standard deviation of frequency)
    sd = np.std(np.abs(librosa.core.stft(signal)))

    # median
    median = np.median(np.abs(librosa.core.stft(signal)))

    # Q25 et Q75
    q25, q75 = np.percentile(signal, [25, 75])

    # IQR (interquantile range)
    iqr = q75 - q25

    # skew et kurt
    skewness = skew(signal)
    kurtosis_value = kurtosis(signal)

    # sp.ent (spectral entropy)
    magnitude_spectrum = np.abs(librosa.core.stft(signal))
    normalized_spectrum = magnitude_spectrum / np.sum(magnitude_spectrum)
    spectral_entropy = -np.sum(normalized_spectrum * np.log2(normalized_spectrum + 1e-12))

    # sfm (spectral flatness)
    sfm = np.mean(librosa.feature.spectral_flatness(y=signal))

    # mode (mode frequency)
    mode = np.mean(librosa.feature.mfcc(y=signal, sr=sample_rate))

    # centroid (frequency centroid)
    centroid = np.mean(librosa.feature.spectral_centroid(y=signal, sr=sample_rate))



    # meanfun (average of fundamental frequency)
    mean_fun = np.mean(librosa.feature.rms(y=signal))

    # minfun et maxfun
    min_fun = np.min(signal)
    max_fun = np.max(signal)

    # meandom (average of dominant frequency)
    peaks, _ = find_peaks(np.abs(signal))
    mean_dom = np.mean(peaks) if len(peaks) > 0 else 0  # Utilisation de la moyenne des pics

    # mindom (minimum of dominant frequency)
    min_dom = np.min(peaks) if len(peaks) > 0 else 0  # Utilisation du minimum des pics

    # maxdom (maximum of dominant frequency)
    max_dom = np.max(peaks) if len(peaks) > 0 else 0  # Utilisation du maximum des pics

    # dfrange (range of dominant frequency)
    dfrange = max_dom - min_dom

    # modindx (modulation index)
    mod_indx = np.mean(np.abs(np.diff(signal))) / (np.max(signal) - np.min(signal))

    # Créer un dictionnaire de caractéristiques
    features = {
        'meanfreq': meanfreq,
        'sd': sd,
        'median': median,
        'Q25': q25,
        'Q75': q75,
        'IQR': iqr,
        'skew': skewness,
        'kurt': kurtosis_value,
        'sp.ent': spectral_entropy,
        'sfm': sfm,
        'mode': mode,
        'centroid': centroid,
        'meanfun': mean_fun,
        'minfun': min_fun,
        'maxfun': max_fun,
        'meandom': mean_dom,
        'mindom': min_dom,
        'maxdom': max_dom,
        'dfrange': dfrange,
        'modindx': mod_indx
    }

    return features
    # Ajoutez ceci dans la fonction extract_voice_features
#print("Peak Frequency in extract_voice_features:", peakf)


# Exemple d'utilisation
uploaded = files.upload()
audio_path = 'audio.wav'  # Assurez-vous de remplacer cela par le nom de votre fichier audio

# Charger le fichier audio et extraire les caractéristiques
signal, sample_rate = librosa.load(audio_path, sr=None)
voice_features = extract_voice_features(signal, sample_rate)
# Définir les ranges des caractéristiques
feature_ranges = {
    'meanfreq': {'min': 0.039363, 'max': 0.251124},
    'sd': {'min': 0.018363, 'max': 0.115273},
    'median': {'min': 0.010975, 'max': 0.261224},
    'Q25': {'min': 0.000229, 'max': 0.247347},
    'Q75': {'min': 0.042946, 'max': 0.273469},
    'IQR': {'min': 0.014558, 'max': 0.252225},
    'skew': {'min': 0.141735, 'max': 34.725453},
    'kurt': {'min': 2.068455, 'max': 1309.612887},
    'sp.ent': {'min': 0.738651, 'max': 0.981997},
    'sfm': {'min': 0.036876, 'max': 0.842936},
    'mode': {'min': 0.0, 'max': 0.28},
    'centroid': {'min': 0.039363, 'max': 0.251124},
    'peakf': {'min': 0.103093, 'max': 0.279114},
    'meanfun': {'min': 0.055565, 'max': 0.237636},
    'minfun': {'min': 0.009775, 'max': 0.204082},
    'maxfun': {'min': 0.103093, 'max': 0.279114},
    'meandom': {'min': 0.007812, 'max': 2.957682},
    'mindom': {'min': 0.004883, 'max': 0.458984},
    'maxdom': {'min': 0.007812, 'max': 21.867188},
    'dfrange': {'min': 0.0, 'max': 21.84375},
    'modindx': {'min': 0.0, 'max': 0.932374},
}

# Appliquer les conditions aux valeurs des caractéristiques
for feature in voice_features:
    if voice_features[feature] < feature_ranges[feature]['min']:
        voice_features[feature] = feature_ranges[feature]['min']
    elif voice_features[feature] > feature_ranges[feature]['max']:
        voice_features[feature] = feature_ranges[feature]['max']

#import pandas as pd

# Supposons que 'new_data' est un dictionnaire contenant les nouvelles données
#new_data =voice_features

# Convertir le dictionnaire en DataFrame
#new_row = pd.DataFrame([new_data.values])

# Ajouter la nouvelle ligne au DataFrame existant
#df = df.append(new_row, ignore_index=True)

#import pandas as pd

#Charger la base de données CSV dans un DataFrame
#df = pd.read_csv('voice.csv')

# Supprimer une ligne en fonction d'une condition (remplacez 'condition_column' et 'condition_value' par les valeurs appropriées)
#condition_column = voice_features
#condition_value = voice_features

# Formuler la condition pour supprimer la ligne
#condition = (df[condition_column] == condition_value)

# Supprimer la ligne qui satisfait la condition
#df = df[~condition]

# Enregistrer le DataFrame mis à jour dans le fichier CSV
#df.to_csv('voice.csv', index=False)


# Afficher les caractéristiques après application des conditions
print(voice_features)

Collecting pyaudio
  Downloading PyAudio-0.2.14.tar.gz (47 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/47.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.1/47.1 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: pyaudio
  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mBuilding wheel for pyaudio [0m[1;32m([0m[32mpyproject.toml[0m[1;32m)[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Building wheel for pyaudio (pyproject.toml) ... [?25l[?25herror
[31m  ERROR: Failed building wheel for pyaudio[0m

Saving audio.wav to audio.wav
{'meanfreq': 0.251124, 'sd': 0.115273, 'median': 0.010975, 'Q25': 0.000229, 'Q75': 0.042946, 'IQR': 0.014558, 'skew': 0.141735, 'kurt': 9.244614422999266, 'sp.ent': 0.981997, 'sfm': 0.036876, 'mode': 0.0, 'centroid': 0.251124, 'meanfun': 0.055565, 'minfun': 0.009775, 'maxfun': 0.279114, 'meandom': 2.957682, 'mindom': 0.458984, 'maxdom': 21.867188, 'dfrange': 21.84375, 'modindx': 0.0049039572}


In [None]:
valeurs_array = np.array(list(voice_features.values()))
print (valeurs_array)

[2.51124000e-01 1.15273000e-01 1.09750000e-02 2.29000000e-04
 4.29460000e-02 1.45580000e-02 1.41735000e-01 9.24461442e+00
 9.81997000e-01 3.68760000e-02 0.00000000e+00 2.51124000e-01
 5.55650000e-02 9.77500000e-03 2.79114000e-01 2.95768200e+00
 4.58984000e-01 2.18671880e+01 2.18437500e+01 4.90395725e-03]


Prédiction de genre : [0.251124, 0.115273, 0.010975, 0.000229, 0.042946, 0.014558, 0.141735, 9.244614422999266, 0.981997, 0.036876, 0.0, 0.251124, 0.055565, 0.009775, 0.279114, 2.957682, 0.458984, 21.867188, 21.84375, 0.00490395724773407, 'Male']


In [None]:
# RF
!pip install joblib
!pip install pydub gtts
!apt-get install -y ffmpeg
!pip install pydub
from IPython.display import Audio
from pydub import AudioSegment
from pydub.playback import play
from gtts import gTTS
import pandas as pd
import pygame
import os
from google.colab import files
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Supposons que X contient les caractéristiques extraites de votre base de données et y contient les étiquettes correspondantes

# Divisez les données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialisez un modèle de forêt aléatoire (Random Forest)
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Entraînez le modèle sur l'ensemble d'entraînement
model.fit(X_train, y_train)

# Faites des prédictions sur l'ensemble de test
predictions = model.predict(X_test)

# Évaluez les performances du modèle
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)
# Sauvegarder le modèle au format natif Keras

# Charger le modèle
from joblib import dump

# Supposons que 'model' est votre modèle Random Forest entraîné
dump(model, 'random_forest_model.joblib')


# Maintenant, vous pouvez utiliser ce modèle pour classifier de nouvelles voix
nouvelles_caracteristiques = valeurs_array.tolist() # Extrayez les caractéristiques de la nouvelle voix
nouvelle_prediction = model.predict([nouvelles_caracteristiques])

if nouvelle_prediction == 1:  # Supposons que 1 indique que la voix appartient à la base de données
    print("La voix appartient à la base de données.")
    welcome_message = "Welcome!"
    print("Playing welcome message...")
    tts = gTTS(welcome_message)
    tts.save("/content/welcome_message.mp3")  # Assurez-vous de sauvegarder dans /content/
    display(Audio("/content/welcome_message.mp3", autoplay=True))
    print("Welcome message played.")
else:
    print("La voix n'appartient pas à la base de données.")
    outsider_message = "You are an outsider!"
    print("Playing outsider message...")
    tts = gTTS(outsider_message)
    tts.save("/content/outsider_message.mp3")  # Assurez-vous de sauvegarder dans /content/
    display(Audio("/content/outsider_message.mp3", autoplay=True))
    print("Outsider message played.")


Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Collecting gtts
  Downloading gTTS-2.5.1-py3-none-any.whl (29 kB)
Installing collected packages: pydub, gtts
Successfully installed gtts-2.5.1 pydub-0.25.1
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 45 not upgraded.
pygame 2.5.2 (SDL 2.28.2, Python 3.10.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
Accuracy: 0.9842271293375394
La voix appartient à la base de données.
Playing welcome message...




Welcome message played.


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug: * Restarting with stat
