In [1]:
import numpy as np
import librosa
import librosa.display
import os
from scipy.fft import *
import matplotlib.pyplot as plt
from scipy.signal import find_peaks

# **Frequency**

In [3]:
# Load the audio file
def calculate_average_frequency(file):
  # Check if the path is a directory
  if os.path.isdir(file):
  # If it's a directory, iterate through the files in the directory
    for filename in os.listdir(file):
    # Construct the full path to the audio file
      audio_file = os.path.join(file, filename)
      # Check if the file is an audio file
      if audio_file.endswith(('.wav', '.mp3', '.ogg')):  # Add more audio extensions if needed
        # Load the audio file using librosa
        try: #Try loading the audio file. If it fails, print the filename and the exception
            audio_data, sr = librosa.load(audio_file)
            # Perform Short-Time Fourier Transform (STFT)
            S = np.abs(librosa.stft(audio_data))

            # Get the corresponding frequencies for the rows in the STFT matrix
            frequencies = librosa.fft_frequencies(sr=sr)

            # Compute the magnitude of frequencies (sum of the power of each frequency)
            magnitude = np.mean(S, axis=1)

            # Calculate the weighted average frequency
            average_frequency = np.sum(magnitude * frequencies) / np.sum(magnitude)

            print(f"{average_frequency: .2f}")

        except Exception as e:
            print(f"Could not load {filename}: {e}")

In [5]:
calculate_average_frequency('training/real')

 2498.87
 1297.78
 2952.14
 2926.94
 1741.95
 1687.70
 2223.65
 1399.88
 1922.16
 2222.14
 1896.06
 1750.07
 1594.76
 1889.99
 1707.72
 1124.92
 1457.17
 821.11
 1785.70
 2642.41
 1155.36
 2531.88
 2594.60
 1250.30
 1726.12
 1372.84
 1775.05
 2670.64
 2130.85
 1487.77
 1596.05
 2035.10
 1025.12
 1176.03
 1470.91
 1301.89
 1839.92
 1502.11
 1545.57
 1928.87
 1967.70
 1785.18
 2261.56
 2425.85
 2578.50
 1786.67
 1454.43
 2160.53
 1857.55
 2718.80
 2805.17
 2287.04
 3034.54
 2588.73
 1270.17
 1066.36
 2157.30
 2050.28
 1888.58
 2629.91
 1140.11
 2100.36
 2526.29
 1941.99
 1746.61
 828.41
 2084.47
 1522.46
 847.61
 1794.67
 2470.20
 1188.05
 1961.06
 2746.50
 2648.33
 1879.23
 2034.50
 2954.33
 1995.14
 1442.65
 1396.66
 2320.77
 2432.66
 1789.75
 2119.53
 1946.26
 306.55
 1368.86
 2213.20
 2005.89
 606.37
 1845.96
 2125.04
 1547.80
 2517.27
 1448.85
 2835.04
 2016.55
 2852.55
 2510.75
 1858.25
 1701.02
 1534.25
 1031.99
 1567.68
 1661.73
 1030.02
 2441.82
 2317.75
 1843.06
 2120.28
 2085.

# **Amplitude**

In [7]:
# Load the audio file
def amplitude(file):
  # Check if the path is a directory
  if os.path.isdir(file):
  # If it's a directory, iterate through the files in the directory
    for filename in os.listdir(file):
    # Construct the full path to the audio file
      audio_file = os.path.join(file, filename)
      # Check if the file is an audio file
      if audio_file.endswith(('.wav', '.mp3', '.ogg')):  # Add more audio extensions if needed
        # Load the audio file using librosa
        try: #Try loading the audio file. If it fails, print the filename and the exception
            audio_data, sr = librosa.load(audio_file)
            if len(audio_data.shape) > 1:
              audio_data = audio_data[:, 0]

            rms_amplitude = np.sqrt(np.mean(np.square(audio_data)))
            print(f"{rms_amplitude: .4f}")

        except Exception as e:
            print(f"Could not load {filename}: {e}")

In [9]:
amplitude('training/real')

 0.0917
 0.2240
 0.1314
 0.1050
 0.1309
 0.1253
 0.0947
 0.1449
 0.1453
 0.1227
 0.1609
 0.1401
 0.2049
 0.1067
 0.1688
 0.1906
 0.1174
 0.1049
 0.1315
 0.0807
 0.1649
 0.1723
 0.1314
 0.1154
 0.1825
 0.1340
 0.0913
 0.1581
 0.1806
 0.1063
 0.2153
 0.1439
 0.2050
 0.1679
 0.1855
 0.2381
 0.2765
 0.1130
 0.1342
 0.1427
 0.1071
 0.1283
 0.1210
 0.1004
 0.1126
 0.2014
 0.1602
 0.2371
 0.1503
 0.1161
 0.1620
 0.0967
 0.1554
 0.1448
 0.2430
 0.1727
 0.1287
 0.2451
 0.1689
 0.1670
 0.1996
 0.1033
 0.1003
 0.2338
 0.1549
 0.1768
 0.0567
 0.1904
 0.1434
 0.1243
 0.1187
 0.1362
 0.0954
 0.1312
 0.3556
 0.0676
 0.1954
 0.0577
 0.1310
 0.1748
 0.1688
 0.0715
 0.1623
 0.2229
 0.1751
 0.1827
 0.4573
 0.2527
 0.1144
 0.1592
 0.2390
 0.1387
 0.1625
 0.1502
 0.1823
 0.1141
 0.1482
 0.1934
 0.0632
 0.1189
 0.1529
 0.1888
 0.2280
 0.2434
 0.2657
 0.1167
 0.2248
 0.1782
 0.1442
 0.1558
 0.1343
 0.1334
 0.1467
 0.1526
 0.1243
 0.1088
 0.1330
 0.1441
 0.1151
 0.0740
 0.1162
 0.1335
 0.1444
 0.1120
 0.1382


# **Pitch Variability**

In [16]:
# Load the audio file
def pitch_variability(file):
  # Check if the path is a directory
  if os.path.isdir(file):
  # If it's a directory, iterate through the files in the directory
    for filename in os.listdir(file):
    # Construct the full path to the audio file
      audio_file = os.path.join(file, filename)
      # Check if the file is an audio file
      if audio_file.endswith(('.wav', '.mp3', '.ogg')):  # Add more audio extensions if needed
        # Load the audio file using librosa
        try: #Try loading the audio file. If it fails, print the filename and the exception
            audio_data, sr = librosa.load(audio_file)
            pitches, magnitudes = librosa.core.piptrack(y=audio_data, sr=sr)
            pitch_values = []
            for t in range(pitches.shape[1]):
              index = magnitudes[:, t].argmax()
              pitch = pitches[index, t]
              if pitch > 0:
                pitch_values.append(pitch)

            pitch_values = np.array(pitch_values)
            if pitch_values.size ==0: #Handle the case where there are no pitch values
                print(f"No pitch values found for {filename}")
            else:
                pitch_variability = np.std(pitch_values)
                print(f"{pitch_variability:.2f}")
        except Exception as e:
            print(f"Could not load {filename}: {e}")

In [None]:
pitch_variability('/content/drive/MyDrive/Large Audio Folder/Real Audio Datasets')

584.20
586.80
256.85
427.63
1085.17
908.40
185.96
784.63
579.97
796.06
684.76
875.66
811.88
379.72
785.68
415.15
395.52
500.75
1058.05
194.18
815.60
748.61
786.06
501.88
1196.17
1156.40
1207.29
163.01
396.51
169.16
1017.96
190.02
455.74
1083.42
329.20
673.06
544.91
498.51
700.02
130.10
1359.29
424.26
968.88
827.11
704.03
970.54
427.88
1149.77
206.64
581.48
901.31
706.91
140.72
343.25
1039.11
1215.13
905.41
476.46
113.78
200.14
1048.75
410.46
575.85
178.64
760.83
727.55
610.45
242.48
1126.94
880.14
907.94
642.81
164.24
361.60
1169.01
578.04
447.83
995.97
426.45
1012.20
872.34
912.46
745.31
1102.01
872.30
0.02
426.12
619.83
605.26
300.47
510.88
677.89
369.54
715.77
175.56
851.24
939.94
678.92
768.92
479.07
586.21
762.26
1210.21
1165.59
466.42
528.58
702.77
712.81
111.93
788.56
760.19
456.01
954.86
867.43
758.57
935.88
1170.54
1302.45
938.11
994.24
1082.44
937.12
943.49
294.76
96.48
1034.53
783.63
238.47
580.25
527.65
599.02
1080.43
384.07
573.65
231.73
615.03
1161.11
1078.91
1405.58
760.

# **Speech Rate**

In [4]:
# Load the audio file
def speech_rate(file):
  # Check if the path is a directory
  if os.path.isdir(file):
  # If it's a directory, iterate through the files in the directory
    for filename in os.listdir(file):
    # Construct the full path to the audio file
      audio_file = os.path.join(file, filename)
      # Check if the file is an audio file
      if audio_file.endswith(('.wav', '.mp3', '.ogg')):  # Add more audio extensions if needed
        # Load the audio file using librosa
        try: #Try loading the audio file. If it fails, print the filename and the exception
            audio_data, sr = librosa.load(audio_file)
            zero_crossings = librosa.zero_crossings(audio_data, pad=False)
            speech_rate = sum(zero_crossings) / len(audio_data) * sr

            print(f"{speech_rate:.2f}")

        except Exception as e:
            print(f"Could not load {filename}: {e}")

In [5]:
speech_rate('/content/drive/MyDrive/Large Audio Folder/Real Audio Datasets')

4300.50
2487.50
2150.50
907.00
1685.00
1778.00
2794.00
2224.50
3007.50
3695.00
4309.50
2253.00
2216.50
3457.00
863.00
2326.00
1671.00
893.50
5454.00
736.00
3164.50
1495.00
1413.50
1013.50
4383.00
2024.50
4322.50
1776.00
3545.00
3273.50
2631.50
2279.50
3677.50
2811.00
2831.00
1741.50
2771.50
1891.00
1057.00
2430.00
3502.00
3075.00
2789.00
3187.50
4010.50
3915.00
2195.00
1627.50
4068.00
2832.00
2351.50
535.00
453.50
2322.00
4022.00
1340.00
1840.50
1197.00
343.00
2927.00
2320.50
1578.00
1864.50
1639.50
3749.00
1720.00
3282.50
2135.50
2753.00
2973.50
1315.00
3121.00
2445.00
1980.00
3301.50
2526.00
3359.00
2012.50
1769.50
2201.50
2374.50
3633.00
2568.00
4264.50
1568.50
0.00
1513.50
3588.00
2284.00
3663.00
2214.00
4749.50
1053.50
3762.00
1130.00
1418.50
3012.50
1868.50
2453.00
2898.50
2726.00
1918.50
3156.50
1520.00
2560.00
1957.00
3930.50
3344.00
2191.00
2289.00
1305.00
1690.50
2886.00
2226.50
1997.50
1785.00
4103.50
2717.00
2280.00
2507.00
2065.50
2263.00
1500.50
2466.00
1782.50
2326.50
27

# **Average Zero Crossing Rate**

In [10]:
# Load the audio file
def ZCR(file):
  # Check if the path is a directory
  if os.path.isdir(file):
  # If it's a directory, iterate through the files in the directory
    for filename in os.listdir(file):
    # Construct the full path to the audio file
      audio_file = os.path.join(file, filename)
      # Check if the file is an audio file
      if audio_file.endswith(('.wav', '.mp3', '.ogg')):  # Add more audio extensions if needed
        # Load the audio file using librosa
        try: #Try loading the audio file. If it fails, print the filename and the exception
            audio_data, sr = librosa.load(audio_file)
            # Calculate the Zero-Crossing Rate
            zcr = librosa.feature.zero_crossing_rate(audio_data)

            # Calculate the average ZCR
            average_zcr = zcr.mean()

            print(f"{average_zcr:.3f}")

        except Exception as e:
            print(f"Could not load {filename}: {e}")

In [9]:
ZCR('/content/drive/MyDrive/Large Audio Folder/Real Audio Datasets')

0.193
0.111
0.095
0.040
0.075
0.079
0.125
0.097
0.133
0.164
0.193
0.101
0.099
0.155
0.039
0.103
0.075
0.040
0.244
0.033
0.142
0.067
0.063
0.045
0.196
0.091
0.192
0.079
0.159
0.147
0.117
0.102
0.165
0.126
0.127
0.078
0.124
0.084
0.047
0.109
0.157
0.138
0.124
0.141
0.179
0.175
0.098
0.072
0.182
0.126
0.105
0.024
0.020
0.101
0.179
0.060
0.082
0.053
0.015
0.131
0.103
0.070
0.083
0.073
0.168
0.077
0.146
0.095
0.123
0.133
0.058
0.138
0.109
0.088
0.148
0.113
0.150
0.089
0.079
0.098
0.105
0.162
0.115
0.191
0.070
0.000
0.067
0.159
0.101
0.164
0.098
0.212
0.047
0.166
0.050
0.063
0.135
0.084
0.110
0.128
0.122
0.085
0.140
0.068
0.114
0.087
0.175
0.150
0.098
0.101
0.058
0.076
0.128
0.100
0.089
0.079
0.184
0.122
0.100
0.111
0.091
0.101
0.067
0.110
0.080
0.103
0.122
0.060
0.094
0.107
0.184
0.109
0.058
0.083
0.071
0.084
0.155
0.081
0.125
0.049
0.061
0.106
0.129
0.162
0.083
0.116
0.059
0.194
0.159
0.108
0.029
0.013
0.107
0.132
0.176
0.000
0.212
0.124
0.151
0.110
0.165
0.060
0.028
0.129
0.109
0.084
0.16

# **Temporal Centroid**

In [17]:
# Load the audio file
def compute_temporal_centroid(file):
  # Check if the path is a directory
  if os.path.isdir(file):
  # If it's a directory, iterate through the files in the directory
    for filename in os.listdir(file):
    # Construct the full path to the audio file
      audio_file = os.path.join(file, filename)
      # Check if the file is an audio file
      if audio_file.endswith(('.wav', '.mp3', '.ogg')):  # Add more audio extensions if needed
        # Load the audio file using librosa
        try: #Try loading the audio file. If it fails, print the filename and the exception
            audio_data, sr = librosa.load(audio_file)

            # Calculate the time frame for each sample
            time_frames = np.arange(len(audio_data)) / sr

            # Compute the energy (squared amplitude)
            energy = audio_data ** 2

            # Calculate the temporal centroid
            temporal_centroid = np.sum(time_frames * energy) / np.sum(energy)

            print(f"{temporal_centroid:.3f}")

        except Exception as e:
            print(f"Could not load {filename}: {e}")

In [18]:
compute_temporal_centroid('/content/drive/MyDrive/Large Audio Folder/Real Audio Datasets')

0.603
0.967
0.745
1.299
0.590
0.752
0.728
0.829
0.858
0.931
1.029
0.812
0.733
0.699
1.249
0.644
0.877
0.833
0.739
0.890
0.777
0.895
0.763
0.838
0.699
1.145
0.477
0.755
0.869
0.985
0.717
0.629
0.947
1.074
0.921
0.976
0.997
1.122
0.943
1.006
0.914
0.913
0.938
0.437
0.736
0.850
1.192
0.715
0.986
0.904
0.713
1.152
1.175
0.645
0.638
1.286
0.895
0.901
0.696
0.818
0.827
0.748
0.982
0.981
0.836
0.704
0.688
0.734
0.897
0.838
0.784
1.115
0.844
0.660
0.511
0.837
0.988
0.809
0.756
0.887
0.855
0.754
0.630
1.013
0.871
1.004
0.928
0.942
0.558
0.825
1.140
0.814
1.011
0.844
0.888
1.090
0.678
0.886
0.836
0.826
0.934
0.645
0.990
1.306
1.015
1.075
0.622
0.977
0.716
0.615
0.902
0.515
0.876
0.698
0.649
0.742
0.467
0.937
0.820
0.834
0.892
0.819
0.929
0.897
0.946
0.980
0.919
0.844
1.029
0.452
1.049
0.567
1.093
0.748
0.773
0.858
0.717
0.639
0.509
0.915
0.887
0.588
0.682
0.785
0.894
0.368
0.822
0.885
1.028
0.872
0.774
1.096
0.836
0.830
0.860
1.000
0.855
0.785
0.766
0.902
0.858
0.742
1.443
0.696
0.923
0.896
0.80

# **Spectral Centroid**

In [21]:
# Load the audio file
def spec_centroid(file):
  # Check if the path is a directory
  if os.path.isdir(file):
  # If it's a directory, iterate through the files in the directory
    for filename in os.listdir(file):
    # Construct the full path to the audio file
      audio_file = os.path.join(file, filename)
      # Check if the file is an audio file
      if audio_file.endswith(('.wav', '.mp3', '.ogg')):  # Add more audio extensions if needed
        # Load the audio file using librosa
        try: #Try loading the audio file. If it fails, print the filename and the exception
            audio_data, sr = librosa.load(audio_file)

            spectral_centroid = librosa.feature.spectral_centroid(y=audio_data, sr=sr)[0]
            print(f"{np.mean(spectral_centroid):.2f}")

        except Exception as e:
            print(f"Could not load {filename}: {e}")

In [22]:
spec_centroid('/content/drive/MyDrive/Large Audio Folder/Real Audio Datasets')

2655.64
1696.20
1826.80
1164.20
1379.57
1472.14
2024.96
1650.48
2174.01
2501.32
2605.70
1668.44
2004.11
2208.14
1189.49
1888.43
1240.98
968.27
3155.20
909.53
1920.47
1780.44
1248.72
965.70
2616.88
1819.17
2843.24
1411.97
2163.05
2126.41
2009.71
1885.92
2416.08
2040.90
2061.62
1580.53
1816.00
1519.06
1127.96
1950.22
2428.26
1914.27
2084.78
2153.34
2422.31
2506.77
1708.27
1526.75
2443.29
1993.08
1821.20
896.41
786.65
1823.47
2639.23
1661.22
1444.28
1350.87
1040.18
1847.24
2162.55
1606.05
1521.97
1517.77
2465.19
1651.55
2249.15
1753.65
2046.56
1875.78
1330.76
2044.14
1917.82
1758.14
2219.60
2159.53
2290.13
1606.49
1385.76
1872.82
1634.66
2378.60
1856.88
2889.91
1390.06
225.90
1447.44
2316.48
1736.09
2290.70
1904.42
2803.74
1001.66
2362.61
1204.33
1335.57
1974.42
1517.82
1788.53
2207.41
2190.77
1657.13
2082.54
1708.31
1951.13
1702.05
2429.87
2250.87
1791.91
1707.63
1374.06
1327.81
2047.67
1746.29
1685.75
1556.73
2367.41
1889.28
1642.45
2145.56
1809.45
1662.49
1650.71
1795.11
1415.88
1520.9

# **Spectral Bandwidth**

In [27]:
# Load the audio file
def spec_bandwidth(file):
  # Check if the path is a directory
  if os.path.isdir(file):
  # If it's a directory, iterate through the files in the directory
    for filename in os.listdir(file):
    # Construct the full path to the audio file
      audio_file = os.path.join(file, filename)
      # Check if the file is an audio file
      if audio_file.endswith(('.wav', '.mp3', '.ogg')):  # Add more audio extensions if needed
        # Load the audio file using librosa
        try: #Try loading the audio file. If it fails, print the filename and the exception
            audio_data, sr = librosa.load(audio_file)
            spectral_bandwidth = librosa.feature.spectral_bandwidth(y=audio_data, sr=sr)
            print(f"{np.mean(spectral_bandwidth):.2f}")

        except Exception as e:
            print(f"Could not load {filename}: {e}")

In [28]:
spec_bandwidth('/content/drive/MyDrive/Large Audio Folder/Real Audio Datasets')

1656.43
1478.08
1569.31
1266.04
1213.43
1326.03
1735.98
1453.84
1641.40
1712.56
1663.29
1366.92
1787.13
1538.99
1435.36
1474.73
1171.03
1248.39
1710.60
1299.67
1474.16
1701.92
1246.62
1066.33
1669.65
1541.41
1871.78
1368.30
1471.58
1505.86
1488.46
1572.37
1634.32
1590.14
1668.43
1411.08
1521.17
1396.18
1432.19
1673.71
1640.46
1361.65
1638.44
1592.43
1529.00
1637.50
1667.95
1381.01
1612.97
1583.63
1464.92
1306.76
1263.91
1667.19
1673.98
1499.62
1363.12
1529.68
1642.63
1443.59
2027.75
1446.14
1402.20
1595.24
1505.95
1422.41
1662.79
1564.64
1569.65
1079.84
1279.45
1535.47
1588.36
1665.15
1597.12
1797.49
1539.66
1603.63
1240.77
1556.03
1281.41
1610.06
1528.92
1873.75
1375.29
765.88
1240.52
1512.70
1448.52
1647.21
1552.22
1654.57
1293.73
1529.85
1383.88
1438.39
1540.25
1526.59
1478.46
1627.47
1776.44
1663.47
1646.16
1530.43
1676.70
1593.19
1629.08
1667.30
1527.83
1516.59
1518.58
1399.59
1603.80
1581.35
1540.24
1287.76
1350.15
1475.73
1388.26
1607.70
1627.55
1475.25
1730.23
1614.73
1414.52
1

# **Spectral Contrast**

In [35]:
# Load the audio file
def spec_contrast(file):
  # Check if the path is a directory
  if os.path.isdir(file):
  # If it's a directory, iterate through the files in the directory
    for filename in os.listdir(file):
    # Construct the full path to the audio file
      audio_file = os.path.join(file, filename)
      # Check if the file is an audio file
      if audio_file.endswith(('.wav', '.mp3', '.ogg')):  # Add more audio extensions if needed
        # Load the audio file using librosa
        try: #Try loading the audio file. If it fails, print the filename and the exception
            audio_data, sr = librosa.load(audio_file)
            spectral_contrast = librosa.feature.spectral_contrast(y=audio_data, sr=sr)
            print(f"{np.mean(spectral_contrast):.2f}")

        except Exception as e:
            print(f"Could not load {filename}: {e}")

In [36]:
spec_contrast('/content/drive/MyDrive/Large Audio Folder/Real Audio Datasets')

26.60
25.17
27.53
27.05
23.52
23.82
27.90
25.45
27.43
26.91
27.36
24.07
23.96
26.44
23.80
26.07
26.10
23.33
26.25
23.52
25.03
25.17
24.32
26.27
26.63
22.63
22.95
23.64
27.22
26.41
26.91
27.04
26.48
25.71
25.73
27.40
25.77
24.45
23.52
26.59
26.43
26.80
26.98
27.40
26.01
27.62
24.71
23.61
26.57
27.59
26.30
22.90
22.61
25.91
26.74
25.19
25.67
25.21
24.25
26.74
22.80
26.54
27.68
27.20
26.86
25.24
25.61
27.88
23.91
23.48
21.07
26.22
27.01
26.03
26.87
28.15
27.94
23.99
26.26
23.16
26.86
26.18
26.97
26.27
24.50
24.69
26.48
27.11
27.95
26.07
25.97
26.31
23.75
27.44
25.19
23.52
26.21
24.60
27.07
27.79
27.84
23.42
25.21
23.21
26.72
28.51
26.08
26.74
27.31
24.98
26.55
22.03
25.11
24.22
27.67
23.47
22.58
24.18
25.34
24.72
25.79
24.87
24.91
26.21
25.35
26.30
25.83
24.32
22.80
23.22
26.55
28.20
23.48
26.74
23.85
24.34
24.47
22.11
23.20
24.17
22.51
26.92
26.73
25.91
24.04
25.92
25.30
26.39
25.48
26.86
22.75
23.00
26.44
23.72
24.87
23.15
27.57
25.44
23.95
27.44
25.09
25.26
22.14
25.90
26.71
27.34
26.9

# **Spectral Flatness**

In [39]:
# Load the audio file
def spec_flatness(file):
  # Check if the path is a directory
  if os.path.isdir(file):
  # If it's a directory, iterate through the files in the directory
    for filename in os.listdir(file):
    # Construct the full path to the audio file
      audio_file = os.path.join(file, filename)
      # Check if the file is an audio file
      if audio_file.endswith(('.wav', '.mp3', '.ogg')):  # Add more audio extensions if needed
        # Load the audio file using librosa
        try: #Try loading the audio file. If it fails, print the filename and the exception
            audio_data, sr = librosa.load(audio_file)
            spectral_flatness = librosa.feature.spectral_flatness(y=audio_data)
            print(f"{np.mean(spectral_flatness):.4f}")

        except Exception as e:
            print(f"Could not load {filename}: {e}")

In [40]:
spec_flatness('/content/drive/MyDrive/Large Audio Folder/Real Audio Datasets')

0.0003
0.0002
0.0006
0.0001
0.0002
0.0002
0.0003
0.0011
0.0001
0.0002
0.0002
0.0002
0.0004
0.0002
0.0001
0.0003
0.0001
0.0001
0.0002
0.0001
0.0002
0.0004
0.0001
0.0002
0.0004
0.0001
0.0019
0.0002
0.0002
0.0002
0.0009
0.0002
0.0001
0.0002
0.0004
0.0003
0.0005
0.0004
0.0001
0.0002
0.0003
0.0002
0.0018
0.0005
0.0004
0.0005
0.0003
0.0007
0.0006
0.0008
0.0003
0.0001
0.0001
0.0014
0.0002
0.0001
0.0001
0.0001
0.0001
0.0001
0.0009
0.0002
0.0002
0.0004
0.0001
0.0003
0.0007
0.0002
0.0003
0.0003
0.0003
0.0009
0.0002
0.0004
0.0002
0.0003
0.0002
0.0005
0.0004
0.0003
0.0014
0.0006
0.0004
0.0004
0.0006
0.0000
0.0001
0.0006
0.0012
0.0004
0.0006
0.0007
0.0001
0.0004
0.0001
0.0001
0.0001
0.0002
0.0001
0.0009
0.0002
0.0003
0.0009
0.0003
0.0003
0.0007
0.0010
0.0002
0.0001
0.0012
0.0001
0.0002
0.0004
0.0002
0.0002
0.0006
0.0003
0.0001
0.0014
0.0004
0.0006
0.0002
0.0001
0.0004
0.0001
0.0004
0.0003
0.0001
0.0007
0.0004
0.0002
0.0003
0.0000
0.0003
0.0002
0.0002
0.0006
0.0001
0.0002
0.0001
0.0001
0.0005
0.0004

# **MFCCs**

In [43]:
# Load the audio file
def calculate_mfccs(file):
  # Check if the path is a directory
  if os.path.isdir(file):
  # If it's a directory, iterate through the files in the directory
    for filename in os.listdir(file):
    # Construct the full path to the audio file
      audio_file = os.path.join(file, filename)
      # Check if the file is an audio file
      if audio_file.endswith(('.wav', '.mp3', '.ogg')):  # Add more audio extensions if needed
        # Load the audio file using librosa
        try: #Try loading the audio file. If it fails, print the filename and the exception
            audio_data, sr = librosa.load(audio_file)
            # Compute MFCCs
            n_mfcc = 13  # Number of MFCCs to extract
            mfccs = librosa.feature.mfcc(y=audio_data, sr=sr, n_mfcc=n_mfcc)

            # Compute the mean of each MFCC coefficient across all frames
            mfcc_means = np.mean(mfccs, axis=1)

            # Compute a single value (e.g., mean of the means) for comparison
            single_value = np.mean(mfcc_means)


            # Print the single value
            print(f"{single_value: .2f}")

        except Exception as e:
            print(f"Could not load {filename}: {e}")

In [44]:
calculate_mfccs('/content/drive/MyDrive/Large Audio Folder/Real Audio Datasets')

-19.60
-10.48
-19.77
-13.00
-6.43
 0.61
-21.49
-14.97
-18.83
-20.75
-20.85
-1.67
-6.65
-19.51
-8.24
-19.35
-19.37
 1.49
-23.36
-2.17
-9.87
-0.97
-4.67
-9.37
-22.52
-3.61
-18.33
-11.06
-20.23
-20.78
-21.47
-20.04
-19.88
-10.45
-14.82
-22.15
-27.33
-10.27
-5.76
-20.53
-11.81
-22.16
-20.90
-22.97
-21.67
-19.59
-20.93
-4.98
-26.04
-20.55
-20.53
-3.55
 4.23
-15.47
-21.52
-12.26
-7.40
-2.59
-8.99
-21.92
 1.47
-6.79
-17.67
-11.63
-20.69
-4.38
-19.26
-20.03
-6.37
-23.55
-1.71
-22.00
-18.51
-23.00
-23.85
-19.66
-20.74
-2.19
-16.33
-1.28
-8.56
-19.14
-23.32
-21.60
-2.99
-11.43
-3.61
-19.91
-23.79
-20.83
-15.75
-19.77
-5.34
-23.75
-5.00
-0.91
-10.18
-5.91
-12.19
-12.72
-16.93
-1.29
-5.49
-5.71
-20.38
-18.24
-18.68
-20.85
-19.47
-11.30
-5.86
-8.40
-6.75
-4.17
-11.41
-7.26
-21.87
-2.24
-9.98
-9.56
-8.20
-5.49
-6.95
-9.22
-7.55
-11.16
-11.30
-8.81
 1.59
-8.53
-23.95
-10.27
-10.28
-21.49
-11.08
-0.31
-11.72
-4.85
-11.38
-0.58
-5.09
-22.53
-20.54
-18.93
-5.31
-23.82
-5.28
-18.45
-20.81
-22.72
-3.07
-6

# **Chroma Vector**

In [11]:
# Load the audio file
def compute_mean_chroma_vector(file):
  # Check if the path is a directory
  if os.path.isdir(file):
  # If it's a directory, iterate through the files in the directory
    for filename in os.listdir(file):
    # Construct the full path to the audio file
      audio_file = os.path.join(file, filename)
      # Check if the file is an audio file
      if audio_file.endswith(('.wav', '.mp3', '.ogg')):  # Add more audio extensions if needed
        # Load the audio file using librosa
        try: #Try loading the audio file. If it fails, print the filename and the exception
            audio_data, sr = librosa.load(audio_file)

            chroma_feature = librosa.feature.chroma_stft(y=audio_data, sr=sr)

            # Calculate the mean chroma vector across all frames
            mean_chroma_vector = np.mean(chroma_feature, axis=1)

            # Calculate a single mean value from the mean chroma vector
            single_value = np.mean(mean_chroma_vector)

            print(f"{single_value: .4f}")

        except Exception as e:
            print(f"Could not load {filename}: {e}")

In [13]:
compute_mean_chroma_vector('training/real')

 0.3811
 0.3520
 0.2859
 0.3241
 0.4554
 0.2928
 0.3476
 0.3431
 0.3551
 0.3876
 0.3049
 0.3477
 0.2512
 0.3472
 0.3481
 0.3528
 0.4006
 0.6799
 0.5600
 0.4738
 0.5478
 0.2919
 0.3384
 0.5633
 0.2911
 0.3129
 0.3884
 0.3221
 0.4020
 0.3673
 0.2897
 0.3656
 0.4034
 0.4415
 0.3076
 0.3523
 0.2982
 0.4045
 0.3998
 0.3079
 0.7776
 0.4180
 0.3052
 0.2676
 0.3414
 0.3156
 0.3874
 0.2686
 0.3680
 0.3082
 0.3467
 0.3553
 0.3395
 0.2735
 0.2663
 0.2930
 0.3464
 0.3362
 0.2675
 0.2654
 0.4063
 0.2949
 0.3612
 0.3448
 0.2541
 0.3658
 0.4017
 0.4761
 0.6390
 0.2990
 0.3538
 0.5271
 0.3180
 0.2708
 0.3039
 0.4362
 0.2841
 0.2924
 0.3801
 0.4162
 0.4243
 0.5633
 0.2537
 0.3347
 0.4124
 0.3369
 0.6015
 0.2509
 0.4423
 0.4277
 0.7277
 0.2496
 0.3960
 0.3174


  return pitch_tuning(


 0.3871
 0.3801
 0.3368
 0.3776
 0.3212
 0.4231
 0.3663
 0.3273
 0.2761
 0.3088
 0.2827
 0.4827
 0.3997
 0.4268
 0.2765
 0.3890
 0.3972
 0.3296
 0.4045
 0.3718
 0.3112
 0.3444
 0.3578
 0.2821
 0.2963
 0.2984
 0.2827
 0.3036
 0.3714
 0.3781
 0.2621
 0.2846
 0.3507
 0.3241
 0.4021
 0.3135
 0.2868
 0.3552
 0.3173
 0.2685
 0.3381
 0.3089
 0.2998
 0.3180
 0.3224
 0.3109
 0.3365
 0.3112
 0.3932
 0.4608
 0.2913
 0.3451
 0.3388
 0.3834
 0.4127
 0.2711
 0.3436
 0.4821
 0.3184
 0.4198
 0.4753
 0.4419
 0.3342
 0.4110
 0.3390
 0.4813
 0.2385
 0.3417
 0.2958
 0.3797
 0.4047
 0.4449
 0.4177
 0.3098
 0.4539
 0.3181
 0.4798
 0.3741
 0.4287
 0.2892
 0.3719
 0.2551
 0.3131
 0.4216
 0.3082
 0.3514
 0.2669
 0.4348
 0.3420
 0.2771
 0.3271
 0.2808
 0.3059
 0.2669
 0.3095
 0.3071
 0.2702
 0.2988
 0.3204
 0.3060
 0.2905
 0.3861
 0.2278
 0.3484
 0.3038
 0.4134
 0.2694
 0.3062
 0.3620
 0.3575
 0.2526
 0.2859
 0.3143
 0.3509
 0.3843
 0.3281
 0.4187
 0.4665
 0.5421
 0.2882
 0.2862
 0.4097
 0.3273
 0.2624
 0.3626


# **HNR**

In [64]:
# Load the audio file
def calculate_hnr(file, frame_length=2048, hop_length=512):
  # Check if the path is a directory
  if os.path.isdir(file):
  # If it's a directory, iterate through the files in the directory
    for filename in os.listdir(file):
    # Construct the full path to the audio file
      audio_file = os.path.join(file, filename)
      # Check if the file is an audio file
      if audio_file.endswith(('.wav', '.mp3', '.ogg')):  # Add more audio extensions if needed
        # Load the audio file using librosa
        try: #Try loading the audio file. If it fails, print the filename and the exception
            y, sr = librosa.load(audio_file)

            # Perform Short-Time Fourier Transform (STFT)
            stft = np.abs(librosa.stft(y, n_fft=frame_length, hop_length=hop_length))

            # Calculate the fundamental frequency using librosa's pitch detection
            pitches, magnitudes = librosa.piptrack(y=y, sr=sr, n_fft=frame_length, hop_length=hop_length)

            # Extract the strongest pitch
            pitch = []
            for t in range(pitches.shape[1]):
                index = magnitudes[:, t].argmax()
                pitch_freq = pitches[index, t]
                if pitch_freq > 0:
                   pitch.append(pitch_freq)
                else:
                    pitch.append(np.nan)

            # Estimate the harmonic components
            harmonic_energy = []
            noise_energy = []
            for t, f0 in enumerate(pitch):
                if np.isnan(f0):
                    harmonic_energy.append(0)
                    noise_energy.append(0)
                    continue

                # Find peaks near the fundamental frequency
                peaks, _ = find_peaks(stft[:, t], height=0.1 * np.max(stft[:, t]))

                # Harmonic energy is the sum of the energies at the fundamental frequency and its harmonics
                harmonic_freqs = [f0 * (i+1) for i in range(1, 4)]  # Consider first 3 harmonics
                harmonic_amplitude = 0

                for hf in harmonic_freqs:
                    # Find the nearest frequency bin
                    harmonic_bin = int(np.round(librosa.core.hz_to_mel(hf)))
                    if 0 <= harmonic_bin < stft.shape[0]:
                        harmonic_amplitude += stft[harmonic_bin, t]

                harmonic_energy.append(harmonic_amplitude)

                # Noise energy is the total energy minus harmonic energy
                total_energy = np.sum(stft[:, t])
                noise_energy_value = total_energy - harmonic_amplitude

                # Ensure noise energy is not negative
                noise_energy_value = max(noise_energy_value, 1e-10)  # Avoid division by zero or negative energy
                noise_energy.append(noise_energy_value)

            # Calculate the HNR in decibels
            harmonic_energy = np.array(harmonic_energy)
            noise_energy = np.array(noise_energy)

            # Avoid division by zero in HNR calculation
            hnr = 10 * np.log10(np.where(noise_energy > 0, harmonic_energy / noise_energy, np.nan))  # Set undefined HNR to NaN

            # Calculate average HNR, ignoring NaNs
            avg_hnr = np.nanmean(hnr)

            # Print the average HNR
            print(f"{avg_hnr:.3f} ")

        except Exception as e:
            print(f"Could not load {filename}: {e}")

In [65]:
calculate_hnr('/content/drive/MyDrive/Large Audio Folder/Real Audio Datasets')

  hnr = 10 * np.log10(np.where(noise_energy > 0, harmonic_energy / noise_energy, np.nan))  # Set undefined HNR to NaN


-20.819 
-22.321 
-21.049 
-24.987 
-15.565 
-17.679 
-19.876 
-19.679 
-19.998 
-21.927 
-22.458 
-16.411 
-18.547 
-21.277 
-17.212 
-20.536 
-20.222 
-10.304 
-22.149 
-12.114 
-20.187 
-17.074 
-17.132 
-25.503 
-20.228 
-17.741 
-21.312 
-17.010 
-22.205 
-18.209 
-20.424 
-22.403 
-21.939 
-18.862 
-21.147 
-19.881 
-18.177 
-19.435 
-10.255 
-19.633 
-27.983 
-22.040 
-23.726 
-19.970 
-21.088 
-22.305 
-18.554 
-18.698 
-22.437 
-22.561 
-22.436 
-17.836 
-11.677 
-17.976 
-20.227 
-21.742 
-15.932 
-14.591 
-14.213 
-20.171 
-17.345 
-16.148 
-19.006 
-16.976 
-22.349 
-14.605 
-22.784 
-20.482 
-18.582 
-23.220 
-14.336 
-22.217 
-20.509 
-20.033 
-23.903 
-21.411 
-19.951 
-17.048 
-21.210 
-16.698 
-25.801 
-20.254 
-21.269 
-19.770 
-15.984 
-14.545 
-17.834 
-19.388 
-19.983 
-21.623 
-19.859 
-21.850 
-15.154 
-18.102 
-21.201 
-12.629 
-26.158 
-17.875 
-18.894 
-21.502 
-18.908 
-14.402 
-19.180 
-22.704 
-22.293 
-18.016 
-20.547 
-21.383 
-17.850 
-18.050 
-15.744 
-

  avg_hnr = np.nanmean(hnr)


-17.549 
-14.226 
-19.298 
-26.405 
-18.969 
-20.175 
-17.572 
-21.107 
-27.707 
-20.496 
-20.136 
-23.212 
-21.511 
-18.231 
-16.062 
-20.643 
-14.849 
-17.419 
-15.253 
-16.963 
-20.898 
-21.617 
-27.326 
-20.818 
-15.104 
-18.727 
-17.116 
-19.991 
-23.525 
-17.171 
-18.665 
-21.156 
-25.074 
-20.745 
-16.955 
-20.187 
-20.900 
-17.642 
-16.623 
-22.548 
-14.166 
-20.399 
-15.027 
-18.525 
-12.272 
-17.612 
-22.087 
-10.542 
-19.373 
-11.842 
-20.237 
-22.935 
-15.648 
-22.583 
-17.651 
-9.988 
-16.379 
-16.458 
-21.209 
-19.758 
-16.978 
-19.018 
-21.150 
-17.409 
-20.492 
-25.700 
-25.186 
-19.801 
-18.211 
-21.918 
-20.710 
-24.628 
-22.932 
-10.961 
-22.503 
-19.734 
-16.071 
-18.346 
-20.891 
-21.093 
-23.208 
-18.153 
-21.445 
-17.986 
-17.447 
-9.442 
-18.200 
-15.732 
-21.089 
-18.908 
-18.932 
-24.197 
-20.898 
-22.298 
-17.623 
-21.275 
-14.418 
-21.579 
-15.895 
-21.664 
-19.588 
-16.599 
-16.501 
-21.875 
-19.836 
-15.579 
-20.444 
-23.148 
-21.769 
-14.292 
-15.352 
-15

# **Tonal Centroid**

In [19]:
# Load the audio file
def calculate_tonal_centroid(file):
  # Check if the path is a directory
  if os.path.isdir(file):
  # If it's a directory, iterate through the files in the directory
    for filename in os.listdir(file):
    # Construct the full path to the audio file
      audio_file = os.path.join(file, filename)
      # Check if the file is an audio file
      if audio_file.endswith(('.wav', '.mp3', '.ogg')):  # Add more audio extensions if needed
        # Load the audio file using librosa
        try: #Try loading the audio file. If it fails, print the filename and the exception
            y, sr = librosa.load(audio_file)
            # Extract harmonic component using Harmonic-Percussive Source Separation (HPSS)
            y_harmonic, _ = librosa.effects.hpss(y)

            # Compute the tonal centroid features (tonnetz)
            tonal_centroid = librosa.feature.tonnetz(y=y_harmonic, sr=sr)
            mean_tonal = np.mean(np.mean(tonal_centroid))

            print(f"{mean_tonal: .4f}")

        except Exception as e:
            print(f"Could not load {filename}: {e}")

In [21]:
calculate_tonal_centroid('training/real')



 0.0162
 0.0071
-0.0071
 0.0085
-0.0208
-0.0306
 0.0075
-0.0428
-0.0272
-0.0011
 0.0165
-0.0121
-0.0197
-0.0019
 0.0051
-0.0105
-0.0053
 0.0679
-0.0012
-0.0047
-0.0002
-0.0203
-0.0053
-0.0076
 0.0504
-0.0049
 0.0112
-0.0025
-0.0064
 0.0164
-0.0024
-0.0103
-0.0011
-0.0170
 0.0109
-0.0008
-0.0024
 0.0233
 0.0269
-0.0535


  return pitch_tuning(


 0.0106
-0.0118
-0.0046
 0.0157
 0.0059
 0.0208
 0.0173
-0.0089
 0.0081
 0.0203
 0.0088
 0.0131
 0.0071
-0.0162
 0.0380
-0.0036
 0.0404
-0.0611
-0.0060
 0.0041
-0.0671
-0.0236
-0.0067
-0.0033
-0.0231
-0.0016
-0.0006
-0.0207
 0.0219
-0.0146
-0.0067
-0.0104
 0.0023
-0.0249
 0.0075
-0.0180
 0.0374
-0.0102
-0.0190
-0.0237
 0.0125
 0.0032
 0.0465
 0.0114
 0.0051
-0.0445
-0.0203
 0.0380
 0.0047
 0.0395
 0.0051
 0.0448
 0.0115
 0.0123
 0.0140
-0.0215
-0.0157
 0.0061
-0.0061
 0.0130
-0.0010
 0.0097
 0.0049
-0.0118
-0.0174
 0.0104
-0.0269
 0.0087
-0.0329
-0.0230
-0.0134
 0.0088
-0.0133
-0.0175
 0.0034
-0.0016
 0.0042
-0.0159
-0.0141
 0.0073
 0.0360
-0.0142
-0.0078
-0.0251
 0.0285
 0.0011
-0.0097
-0.0021
-0.0034
-0.0261
 0.0062
-0.0257
 0.0021
-0.0097
-0.0527
-0.0428
-0.0068
 0.0008
 0.0033
 0.0022
-0.0061
 0.0054
-0.0078
-0.0092
 0.0110
-0.0309
-0.0102
-0.0294
-0.0308
-0.0042
-0.0062
-0.0315
-0.0067
-0.0148
-0.0041
 0.0208
 0.0083
 0.0052
 0.0055
-0.0341
 0.0375
-0.0109
 0.0176
-0.0140
-0.0048
