## API Feature Extraction And Training Random Forest Model

#### Features Extraction 

#### Note that it is assumed that training clips exist under the working directory. Specifically, the directory should be named /clips, and each category of food should be in a separate directory. Follow the hierarchy below.


In [12]:
'''
.
└── main.ipynb
└── clip_sound.ipynb
└── clips
    └── apple
    └── cabbage
    └── carrot
    └── chips
    └── noise                                    
    └── talk
'''

'\n.\n└── main.ipynb\n└── clip_sound.ipynb\n└── clips\n    └── apple\n    └── cabbage\n    └── carrot\n    └── chips\n    └── noise                                    \n    └── talk\n'

#### OpenSmile Library to Extract Features

In [1]:
import opensmile

smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.ComParE_2016,
    feature_level=opensmile.FeatureLevel.Functionals,
)

In [72]:
import os
data_path = os.getcwd() + '\\clips\\'
classes = os.listdir(data_path)


In [3]:
classes

['apple', 'cabbage', 'carrot', 'chips', 'noise', 'talk']

#### Load all the samples within each category

In [4]:
from os.path import join

l_files = [[join(data_path, cls, file), cls]
           for cls in classes
           for file in os.listdir(join(data_path, cls))]

#### Generate dataset

In [5]:
import pandas as pd

df=pd.DataFrame()

for row in l_files:
    clip = smile.process_file(row[0])
    clip['label']=row[1]
    df = df._append(clip,ignore_index=True)



In [13]:
df

Unnamed: 0,audspec_lengthL1norm_sma_range,audspec_lengthL1norm_sma_maxPos,audspec_lengthL1norm_sma_minPos,audspec_lengthL1norm_sma_quartile1,audspec_lengthL1norm_sma_quartile2,audspec_lengthL1norm_sma_quartile3,audspec_lengthL1norm_sma_iqr1-2,audspec_lengthL1norm_sma_iqr2-3,audspec_lengthL1norm_sma_iqr1-3,audspec_lengthL1norm_sma_percentile1.0,...,mfcc_sma_de[14]_peakRangeRel,mfcc_sma_de[14]_peakMeanAbs,mfcc_sma_de[14]_peakMeanMeanDist,mfcc_sma_de[14]_peakMeanRel,mfcc_sma_de[14]_minRangeRel,mfcc_sma_de[14]_meanRisingSlope,mfcc_sma_de[14]_stddevRisingSlope,mfcc_sma_de[14]_meanFallingSlope,mfcc_sma_de[14]_stddevFallingSlope,label
0,4.146712,0.383420,0.000000,0.278954,0.692023,1.166771,0.413069,0.474747,0.887816,0.004263,...,0.530642,1.754268,1.721982,18.166348,0.470521,100.325256,39.081284,104.495758,49.829739,apple
1,1.635273,0.041451,0.419689,0.326165,0.459716,0.707416,0.133551,0.247700,0.381250,0.234789,...,0.497149,1.532504,1.580927,-16.362225,0.558361,82.063141,36.654072,80.257469,47.865520,apple
2,4.764294,0.476684,0.000000,0.364536,0.570255,1.523106,0.205720,0.952850,1.158570,0.231642,...,0.492075,1.566053,1.611472,-16.693504,0.498666,91.317863,36.077229,94.182121,49.295776,apple
3,4.032410,0.626943,0.880829,0.423672,0.765254,1.833425,0.341582,1.068171,1.409753,0.299757,...,0.516594,2.234223,2.263981,-18.908016,0.532912,111.293198,72.786560,102.444008,41.708553,apple
4,1.848544,0.772021,0.383420,0.382550,0.494377,0.705097,0.111827,0.210720,0.322547,0.302564,...,0.506272,1.182863,1.196855,-19.118280,0.423255,67.207764,26.318726,61.748287,21.283089,apple
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
450,1.867906,0.559586,0.103627,0.621086,0.971575,1.259455,0.350489,0.287880,0.638369,0.370813,...,0.611954,2.339883,2.292671,17.908691,0.453995,126.060707,65.503006,104.469643,67.313560,talk
451,1.737248,0.725389,0.445596,0.531628,0.883879,1.269750,0.352251,0.385872,0.738123,0.206899,...,0.547214,2.761574,2.863292,-15.730406,0.600494,121.676697,57.757362,106.556587,59.180660,talk
452,5.206242,0.150259,0.000000,0.877150,1.507742,2.295779,0.630592,0.788038,1.418630,0.363353,...,0.433117,2.262348,2.283690,-19.435894,0.594270,120.431229,66.395851,116.126167,62.382973,talk
453,4.935063,0.860104,0.777202,0.510421,0.899855,1.850936,0.389435,0.951081,1.340515,0.280748,...,0.477278,2.664474,2.670159,-19.994375,0.582239,129.779968,57.332817,131.815033,70.832466,talk


In [21]:
#Shuffle data

df = df.sample(frac = 1)


In [22]:
df

Unnamed: 0,audspec_lengthL1norm_sma_range,audspec_lengthL1norm_sma_maxPos,audspec_lengthL1norm_sma_minPos,audspec_lengthL1norm_sma_quartile1,audspec_lengthL1norm_sma_quartile2,audspec_lengthL1norm_sma_quartile3,audspec_lengthL1norm_sma_iqr1-2,audspec_lengthL1norm_sma_iqr2-3,audspec_lengthL1norm_sma_iqr1-3,audspec_lengthL1norm_sma_percentile1.0,...,mfcc_sma_de[14]_peakRangeRel,mfcc_sma_de[14]_peakMeanAbs,mfcc_sma_de[14]_peakMeanMeanDist,mfcc_sma_de[14]_peakMeanRel,mfcc_sma_de[14]_minRangeRel,mfcc_sma_de[14]_meanRisingSlope,mfcc_sma_de[14]_stddevRisingSlope,mfcc_sma_de[14]_meanFallingSlope,mfcc_sma_de[14]_stddevFallingSlope,label
415,3.249789,0.668394,0.901554,1.037592,2.201094,2.994588,1.163502,0.793494,1.956996,0.711616,...,0.524599,1.782418,1.818559,-17.894350,0.399293,86.170418,39.180508,67.332642,35.452393,talk
88,2.962915,0.098446,0.010363,0.448573,0.841947,1.330069,0.393374,0.488123,0.881497,0.121137,...,0.398935,2.085534,2.076853,19.932121,0.573576,96.679756,36.628468,101.956993,51.898682,cabbage
331,1.849956,0.637306,0.279793,0.710866,0.937511,1.193502,0.226645,0.255992,0.482637,0.588183,...,0.439322,1.678308,1.713694,-17.778141,0.540275,88.399109,42.547398,73.524422,41.403149,noise
79,0.353969,0.331606,0.000000,0.027326,0.041432,0.072131,0.014106,0.030698,0.044804,0.018139,...,0.652279,2.066111,2.041272,19.091450,0.509912,103.130219,40.268349,116.189949,52.552940,cabbage
111,3.489231,0.621762,0.321244,0.322266,0.494496,1.190937,0.172230,0.696440,0.868671,0.216816,...,0.586573,1.929817,1.890919,17.911690,0.424338,98.695564,51.383106,97.559731,46.988724,cabbage
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2,4.764294,0.476684,0.000000,0.364536,0.570255,1.523106,0.205720,0.952850,1.158570,0.231642,...,0.492075,1.566053,1.611472,-16.693504,0.498666,91.317863,36.077229,94.182121,49.295776,apple
168,2.248766,0.259067,0.000000,0.088508,0.099376,0.273717,0.010869,0.174340,0.185209,0.075936,...,0.451545,2.120134,2.119263,20.000000,0.475268,92.304649,61.017414,100.480675,48.053158,carrot
185,1.214614,0.901554,0.000000,0.132577,0.152395,0.238292,0.019819,0.085897,0.105716,0.108370,...,0.442718,1.907333,1.964050,-16.598528,0.558763,102.379860,44.142109,103.006821,48.203632,carrot
150,1.262224,0.796875,0.000000,0.062098,0.066505,0.091254,0.004407,0.024749,0.029156,0.046633,...,0.613274,1.694882,1.682521,19.682655,0.408589,89.110344,38.379448,91.764160,38.959110,carrot


In [23]:
df.to_csv('./datasets/dataset.csv', encoding='utf-8')


## Training Random Forest


In [1]:
import opensmile

smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.ComParE_2016,
    feature_level=opensmile.FeatureLevel.Functionals,
)

In [2]:
import pandas as pd

df=pd.read_csv('./datasets/dataset.csv')


In [3]:
df.drop(columns=df.columns[0], axis=1,  inplace=True)
df

Unnamed: 0,audspec_lengthL1norm_sma_range,audspec_lengthL1norm_sma_maxPos,audspec_lengthL1norm_sma_minPos,audspec_lengthL1norm_sma_quartile1,audspec_lengthL1norm_sma_quartile2,audspec_lengthL1norm_sma_quartile3,audspec_lengthL1norm_sma_iqr1-2,audspec_lengthL1norm_sma_iqr2-3,audspec_lengthL1norm_sma_iqr1-3,audspec_lengthL1norm_sma_percentile1.0,...,mfcc_sma_de[14]_peakRangeRel,mfcc_sma_de[14]_peakMeanAbs,mfcc_sma_de[14]_peakMeanMeanDist,mfcc_sma_de[14]_peakMeanRel,mfcc_sma_de[14]_minRangeRel,mfcc_sma_de[14]_meanRisingSlope,mfcc_sma_de[14]_stddevRisingSlope,mfcc_sma_de[14]_meanFallingSlope,mfcc_sma_de[14]_stddevFallingSlope,label
0,3.249789,0.668394,0.901554,1.037592,2.201094,2.994588,1.163502,0.793494,1.956996,0.711616,...,0.524599,1.782418,1.818559,-17.894350,0.399293,86.170420,39.180508,67.332640,35.452393,talk
1,2.962915,0.098446,0.010363,0.448573,0.841947,1.330069,0.393374,0.488123,0.881497,0.121137,...,0.398935,2.085534,2.076853,19.932121,0.573576,96.679756,36.628468,101.956990,51.898680,cabbage
2,1.849956,0.637306,0.279793,0.710866,0.937511,1.193502,0.226645,0.255992,0.482637,0.588183,...,0.439322,1.678308,1.713694,-17.778141,0.540274,88.399110,42.547398,73.524420,41.403150,noise
3,0.353969,0.331606,0.000000,0.027326,0.041432,0.072131,0.014106,0.030698,0.044804,0.018139,...,0.652280,2.066111,2.041272,19.091450,0.509912,103.130220,40.268350,116.189950,52.552940,cabbage
4,3.489231,0.621762,0.321244,0.322266,0.494496,1.190937,0.172230,0.696440,0.868671,0.216816,...,0.586573,1.929817,1.890919,17.911690,0.424338,98.695564,51.383106,97.559730,46.988724,cabbage
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
450,4.764294,0.476684,0.000000,0.364536,0.570255,1.523106,0.205720,0.952850,1.158570,0.231642,...,0.492075,1.566053,1.611472,-16.693504,0.498666,91.317860,36.077230,94.182120,49.295776,apple
451,2.248766,0.259067,0.000000,0.088508,0.099376,0.273717,0.010869,0.174340,0.185209,0.075936,...,0.451545,2.120134,2.119263,20.000000,0.475268,92.304650,61.017414,100.480675,48.053158,carrot
452,1.214614,0.901554,0.000000,0.132577,0.152395,0.238292,0.019819,0.085897,0.105716,0.108370,...,0.442718,1.907333,1.964050,-16.598528,0.558763,102.379860,44.142110,103.006820,48.203632,carrot
453,1.262224,0.796875,0.000000,0.062098,0.066505,0.091254,0.004407,0.024749,0.029156,0.046633,...,0.613274,1.694882,1.682521,19.682655,0.408589,89.110344,38.379448,91.764160,38.959110,carrot


In [4]:
X = df.drop('label', axis=1)  # Features
y = df['label']  # Labels

In [5]:
X

Unnamed: 0,audspec_lengthL1norm_sma_range,audspec_lengthL1norm_sma_maxPos,audspec_lengthL1norm_sma_minPos,audspec_lengthL1norm_sma_quartile1,audspec_lengthL1norm_sma_quartile2,audspec_lengthL1norm_sma_quartile3,audspec_lengthL1norm_sma_iqr1-2,audspec_lengthL1norm_sma_iqr2-3,audspec_lengthL1norm_sma_iqr1-3,audspec_lengthL1norm_sma_percentile1.0,...,mfcc_sma_de[14]_peakRangeAbs,mfcc_sma_de[14]_peakRangeRel,mfcc_sma_de[14]_peakMeanAbs,mfcc_sma_de[14]_peakMeanMeanDist,mfcc_sma_de[14]_peakMeanRel,mfcc_sma_de[14]_minRangeRel,mfcc_sma_de[14]_meanRisingSlope,mfcc_sma_de[14]_stddevRisingSlope,mfcc_sma_de[14]_meanFallingSlope,mfcc_sma_de[14]_stddevFallingSlope
0,3.249789,0.668394,0.901554,1.037592,2.201094,2.994588,1.163502,0.793494,1.956996,0.711616,...,4.927357,0.524599,1.782418,1.818559,-17.894350,0.399293,86.170420,39.180508,67.332640,35.452393
1,2.962915,0.098446,0.010363,0.448573,0.841947,1.330069,0.393374,0.488123,0.881497,0.121137,...,3.239463,0.398935,2.085534,2.076853,19.932121,0.573576,96.679756,36.628468,101.956990,51.898680
2,1.849956,0.637306,0.279793,0.710866,0.937511,1.193502,0.226645,0.255992,0.482637,0.588183,...,3.775739,0.439322,1.678308,1.713694,-17.778141,0.540274,88.399110,42.547398,73.524420,41.403150
3,0.353969,0.331606,0.000000,0.027326,0.041432,0.072131,0.014106,0.030698,0.044804,0.018139,...,4.781018,0.652280,2.066111,2.041272,19.091450,0.509912,103.130220,40.268350,116.189950,52.552940
4,3.489231,0.621762,0.321244,0.322266,0.494496,1.190937,0.172230,0.696440,0.868671,0.216816,...,5.678109,0.586573,1.929817,1.890919,17.911690,0.424338,98.695564,51.383106,97.559730,46.988724
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
450,4.764294,0.476684,0.000000,0.364536,0.570255,1.523106,0.205720,0.952850,1.158570,0.231642,...,3.701786,0.492075,1.566053,1.611472,-16.693504,0.498666,91.317860,36.077230,94.182120,49.295776
451,2.248766,0.259067,0.000000,0.088508,0.099376,0.273717,0.010869,0.174340,0.185209,0.075936,...,4.965257,0.451545,2.120134,2.119263,20.000000,0.475268,92.304650,61.017414,100.480675,48.053158
452,1.214614,0.901554,0.000000,0.132577,0.152395,0.238292,0.019819,0.085897,0.105716,0.108370,...,3.643732,0.442718,1.907333,1.964050,-16.598528,0.558763,102.379860,44.142110,103.006820,48.203632
453,1.262224,0.796875,0.000000,0.062098,0.066505,0.091254,0.004407,0.024749,0.029156,0.046633,...,3.968391,0.613274,1.694882,1.682521,19.682655,0.408589,89.110344,38.379448,91.764160,38.959110


In [6]:
y

0         talk
1      cabbage
2        noise
3      cabbage
4      cabbage
        ...   
450      apple
451     carrot
452     carrot
453     carrot
454     carrot
Name: label, Length: 455, dtype: object

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score


In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=73,shuffle=True)


In [9]:
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=73,class_weight='balanced')


In [10]:
rf_classifier.fit(X_train, y_train)



In [11]:
y_pred = rf_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.3f}")

Accuracy: 0.956


In [74]:
from sklearn.metrics import classification_report

report = classification_report(y_test, y_pred)

print(report)



              precision    recall  f1-score   support

       apple       0.94      0.94      0.94        17
     cabbage       0.90      0.95      0.92        19
      carrot       0.95      0.91      0.93        23
       chips       0.95      0.91      0.93        23
       noise       1.00      1.00      1.00        33
        talk       0.96      1.00      0.98        22

    accuracy                           0.96       137
   macro avg       0.95      0.95      0.95       137
weighted avg       0.96      0.96      0.96       137



## Save Model

In [86]:
import joblib
joblib.dump(rf_classifier, 'rf_model.xz')

['rf_model.xz']