In [None]:
import pandas as pd
df = pd.read_csv('../input/sep28k/SEP-28k_labels.csv')
df.head()

In [None]:
df.shape

In [None]:
df.columns

## Adding Name Column

In [None]:
df['Name'] = df[df.columns[0:3]].apply(
    lambda x: '_'.join(x.dropna().astype(str)),
    axis=1
)
df.head()

In [None]:
df['Name']

## Removing Empty Audios and there Dataset Entries

In [None]:
import os
os.stat("../input/sep28k/clips/stuttering-clips/clips/HeStutters_0_9.wav").st_size

# empty files have st_size of 44

In [None]:
os.stat("../input/sep28k/clips/stuttering-clips/clips/HeStutters_1_1.wav").st_size

# non empty file

## Put empty filenames in a list and ignore them while feature extraction and training

In [None]:
CLIPS_DIR = "../input/sep28k/clips/stuttering-clips/clips/"
ignore_list = []
for filename in os.listdir(CLIPS_DIR):
    file_path = CLIPS_DIR + filename
    if 'FluencyBank' not in filename:
        if os.stat(file_path).st_size == 44:
            ignore_list.append(filename)
            filename = filename[:-4]
            df = df[df.Name != filename]

print(len(ignore_list))

In [None]:
df.shape

# 255 rows removed from df as well

## MFCC Feature Extraction

In [None]:
import librosa
import numpy as np
from tqdm.notebook import tqdm

features = {}
directory = CLIPS_DIR

for filename in tqdm(os.listdir(CLIPS_DIR)):
    filename = filename[:-4]
    if 'FluencyBank' not in filename and ignore_list.count(filename + '.wav') == 0:
        audio, sample_rate = librosa.load(CLIPS_DIR + filename + '.wav', res_type='kaiser_fast', sr=None)
        mfccs = np.mean(librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=13).T,axis=0)
        features[filename] = mfccs

In [None]:
print(len(features))

In [None]:
len(features['HeStutters_1_1'])

## Making Dataset from Features

In [None]:
df_features = pd.DataFrame.from_dict(features)
df_features = df_features.transpose()
df_features

In [None]:
df_features = df_features.reset_index()

In [None]:
df_features

In [None]:
df_features = df_features.sort_values(by='index')
df_features

In [None]:
df_features['index']

In [None]:
df['Name']

## Applying Inner Join on the dataframes

In [None]:
df_features.rename(columns = {'index':'Name'}, inplace = True)
df_features

In [None]:
df_final = pd.merge(df, df_features, how='inner', on='Name')
df_final

In [None]:
df_final.head()

In [None]:
df_final.info()

In [None]:
# df_final['Stutter'] = df['WordRep'] + df['SoundRep'] + df['Prolongation'] + df['Interjection']
# df_final

In [None]:
# df_final['Stutter'].value_counts()

In [None]:
# df_final = df_final[df_final.Stutter != 0]
# Remove Non-Stuttered Clips

# df_final.loc[df_final['Stutter'] >= 1.0, 'Stutter'] = 1.0
# df_final['Stutter'].value_counts()

In [None]:
df_final = df_final[df_final.PoorAudioQuality == 0]
df_final

In [None]:
df_final = df_final[df_final.DifficultToUnderstand == 0]
df_final

In [None]:
df_final = df_final[df_final.Music == 0]
df_final

In [None]:
df_final = df_final[df_final.NoSpeech == 0]
df_final

In [None]:
df_final.to_csv("sep28k-mfcc.csv",index=False)

## Model Creation & Training for SoundRep Classification

In [1]:
import pandas as pd
df = pd.read_csv("../input/sep28kmfcc/sep28k-mfcc.csv")

In [2]:
df = df[df['NoStutteredWords'] != 0]
df = df[df['NaturalPause'] == 0]
df = df[df['Interjection'] == 0]
df = df[df['Prolongation'] == 0]
df = df[df['WordRep'] == 0]
df = df[df['Block'] == 0]
df

Unnamed: 0,Show,EpId,ClipId,Start,Stop,Unsure,PoorAudioQuality,Prolongation,Block,SoundRep,...,3,4,5,6,7,8,9,10,11,12
3,HeStutters,0,20,11197600,11245600,0,0,0,0,0,...,22.788187,-2.626716,-5.491358,-18.113768,0.016458,-17.751234,-10.870638,-3.549485,-9.037821,-6.599053
4,HeStutters,0,26,3562240,3610240,0,0,0,0,0,...,31.284819,-20.774286,-5.054771,-8.849820,-4.670221,-6.783212,-8.442829,-9.905901,-7.322884,-6.587358
8,HeStutters,1,4,28351360,28399360,0,0,0,0,0,...,26.878878,-11.660660,3.515312,-27.021893,-11.834179,-7.582844,-7.805755,-12.518568,-1.051659,-7.828530
18,HeStutters,1,105,6492320,6540320,0,0,0,0,2,...,9.991777,6.028349,-7.291534,-25.489399,-14.340752,-14.337445,1.463035,-16.014809,-0.031667,0.534200
19,HeStutters,1,107,6638560,6686560,0,0,0,0,0,...,29.173650,7.216802,5.993933,-13.808914,-6.984492,-11.859511,-2.531782,-16.870178,-5.008951,-4.917668
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20853,WomenWhoStutter,109,24,16179040,16227040,0,0,0,0,0,...,26.002850,-16.124638,-14.775825,-19.643902,-7.232812,-7.771849,-8.517389,1.568446,-6.449149,1.097682
20854,WomenWhoStutter,109,25,16224160,16272160,0,0,0,0,0,...,8.484955,-29.523212,-2.754525,-15.226071,-7.238213,1.549138,-8.778445,-10.499597,-2.756171,-6.887869
20860,WomenWhoStutter,109,31,20476320,20524320,0,0,0,0,0,...,16.823689,-23.059584,-6.954723,-10.113501,-13.114474,0.588608,-15.267632,-10.101980,-0.440983,0.197351
20862,WomenWhoStutter,109,33,24747360,24795360,0,0,0,0,0,...,18.849451,-23.086061,2.158868,-8.471071,1.332367,2.440633,-1.968005,-7.462198,0.142006,-4.275125


In [3]:
df.loc[df['SoundRep'] >= 1.0, 'SoundRep'] = 1.0
df['SoundRep'].value_counts()

0    2471
1     156
Name: SoundRep, dtype: int64

In [4]:
X = df.iloc[: , -13:]
X

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
3,-300.60464,52.587803,-1.105800,22.788187,-2.626716,-5.491358,-18.113768,0.016458,-17.751234,-10.870638,-3.549485,-9.037821,-6.599053
4,-322.67563,39.411560,-26.036314,31.284819,-20.774286,-5.054771,-8.849820,-4.670221,-6.783212,-8.442829,-9.905901,-7.322884,-6.587358
8,-418.44520,103.886830,-30.266285,26.878878,-11.660660,3.515312,-27.021893,-11.834179,-7.582844,-7.805755,-12.518568,-1.051659,-7.828530
18,-399.46472,92.754060,-31.619010,9.991777,6.028349,-7.291534,-25.489399,-14.340752,-14.337445,1.463035,-16.014809,-0.031667,0.534200
19,-475.87134,93.217606,3.080705,29.173650,7.216802,5.993933,-13.808914,-6.984492,-11.859511,-2.531782,-16.870178,-5.008951,-4.917668
...,...,...,...,...,...,...,...,...,...,...,...,...,...
20853,-396.81604,47.699257,-28.230510,26.002850,-16.124638,-14.775825,-19.643902,-7.232812,-7.771849,-8.517389,1.568446,-6.449149,1.097682
20854,-366.09630,59.089500,-23.645912,8.484955,-29.523212,-2.754525,-15.226071,-7.238213,1.549138,-8.778445,-10.499597,-2.756171,-6.887869
20860,-316.00027,42.245804,-40.056350,16.823689,-23.059584,-6.954723,-10.113501,-13.114474,0.588608,-15.267632,-10.101980,-0.440983,0.197351
20862,-350.42902,27.453857,-35.155910,18.849451,-23.086061,2.158868,-8.471071,1.332367,2.440633,-1.968005,-7.462198,0.142006,-4.275125


In [5]:
y = df['SoundRep']
y

3        0
4        0
8        0
18       1
19       0
        ..
20853    0
20854    0
20860    0
20862    0
20867    0
Name: SoundRep, Length: 2627, dtype: int64

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test =  train_test_split(X, y, test_size=0.4, random_state=42)

In [7]:
from collections import Counter
Counter(y_train)

Counter({0: 1480, 1: 96})

In [14]:
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier(criterion='entropy',random_state=5)
clf.fit(X_train, y_train)
clf.score(X_test,y_test) * 100

89.5337773549001

In [15]:
y_pred = np.array(clf.predict(X_test))
y_actual = np.array(y_test)
print(Counter(y_pred))
print(Counter(y_actual))

Counter({0: 981, 1: 70})
Counter({0: 991, 1: 60})


## Model Creation & Training for WordRep Classification

In [16]:
import pandas as pd
df = pd.read_csv('../input/sep28kmfcc/sep28k-mfcc.csv')

In [17]:
df.columns

Index(['Show', 'EpId', 'ClipId', 'Start', 'Stop', 'Unsure', 'PoorAudioQuality',
       'Prolongation', 'Block', 'SoundRep', 'WordRep', 'DifficultToUnderstand',
       'Interjection', 'NoStutteredWords', 'NaturalPause', 'Music', 'NoSpeech',
       'Name', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11',
       '12'],
      dtype='object')

In [18]:
df.shape

(20868, 31)

In [20]:
df = df[df['NoStutteredWords'] != 0]
df = df[df['NaturalPause'] == 0]
df = df[df['Interjection'] == 0]
df = df[df['Prolongation'] == 0]
df = df[df['SoundRep'] == 0]
df = df[df['Block'] == 0]
df

Unnamed: 0,Show,EpId,ClipId,Start,Stop,Unsure,PoorAudioQuality,Prolongation,Block,SoundRep,...,3,4,5,6,7,8,9,10,11,12
3,HeStutters,0,20,11197600,11245600,0,0,0,0,0,...,22.788187,-2.626716,-5.491358,-18.113768,0.016458,-17.751234,-10.870638,-3.549485,-9.037821,-6.599053
4,HeStutters,0,26,3562240,3610240,0,0,0,0,0,...,31.284819,-20.774286,-5.054771,-8.849820,-4.670221,-6.783212,-8.442829,-9.905901,-7.322884,-6.587358
8,HeStutters,1,4,28351360,28399360,0,0,0,0,0,...,26.878878,-11.660660,3.515312,-27.021893,-11.834179,-7.582844,-7.805755,-12.518568,-1.051659,-7.828530
11,HeStutters,1,8,28539840,28587840,0,0,0,0,0,...,13.250941,4.572205,0.690116,-12.168562,-10.612210,-17.491869,-2.735057,-9.947231,-9.058744,-8.354892
19,HeStutters,1,107,6638560,6686560,0,0,0,0,0,...,29.173650,7.216802,5.993933,-13.808914,-6.984492,-11.859511,-2.531782,-16.870178,-5.008951,-4.917668
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20853,WomenWhoStutter,109,24,16179040,16227040,0,0,0,0,0,...,26.002850,-16.124638,-14.775825,-19.643902,-7.232812,-7.771849,-8.517389,1.568446,-6.449149,1.097682
20854,WomenWhoStutter,109,25,16224160,16272160,0,0,0,0,0,...,8.484955,-29.523212,-2.754525,-15.226071,-7.238213,1.549138,-8.778445,-10.499597,-2.756171,-6.887869
20860,WomenWhoStutter,109,31,20476320,20524320,0,0,0,0,0,...,16.823689,-23.059584,-6.954723,-10.113501,-13.114474,0.588608,-15.267632,-10.101980,-0.440983,0.197351
20862,WomenWhoStutter,109,33,24747360,24795360,0,0,0,0,0,...,18.849451,-23.086061,2.158868,-8.471071,1.332367,2.440633,-1.968005,-7.462198,0.142006,-4.275125


In [21]:
df.loc[df['WordRep'] >= 1.0, 'WordRep'] = 1.0
df['WordRep'].value_counts()

0    2471
1     192
Name: WordRep, dtype: int64

In [22]:
X = df.iloc[: , -13:]
X

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
3,-300.60464,52.587803,-1.105800,22.788187,-2.626716,-5.491358,-18.113768,0.016458,-17.751234,-10.870638,-3.549485,-9.037821,-6.599053
4,-322.67563,39.411560,-26.036314,31.284819,-20.774286,-5.054771,-8.849820,-4.670221,-6.783212,-8.442829,-9.905901,-7.322884,-6.587358
8,-418.44520,103.886830,-30.266285,26.878878,-11.660660,3.515312,-27.021893,-11.834179,-7.582844,-7.805755,-12.518568,-1.051659,-7.828530
11,-494.81573,84.972070,1.017862,13.250941,4.572205,0.690116,-12.168562,-10.612210,-17.491869,-2.735057,-9.947231,-9.058744,-8.354892
19,-475.87134,93.217606,3.080705,29.173650,7.216802,5.993933,-13.808914,-6.984492,-11.859511,-2.531782,-16.870178,-5.008951,-4.917668
...,...,...,...,...,...,...,...,...,...,...,...,...,...
20853,-396.81604,47.699257,-28.230510,26.002850,-16.124638,-14.775825,-19.643902,-7.232812,-7.771849,-8.517389,1.568446,-6.449149,1.097682
20854,-366.09630,59.089500,-23.645912,8.484955,-29.523212,-2.754525,-15.226071,-7.238213,1.549138,-8.778445,-10.499597,-2.756171,-6.887869
20860,-316.00027,42.245804,-40.056350,16.823689,-23.059584,-6.954723,-10.113501,-13.114474,0.588608,-15.267632,-10.101980,-0.440983,0.197351
20862,-350.42902,27.453857,-35.155910,18.849451,-23.086061,2.158868,-8.471071,1.332367,2.440633,-1.968005,-7.462198,0.142006,-4.275125


In [23]:
y = df['WordRep']
y

3        0
4        0
8        0
11       1
19       0
        ..
20853    0
20854    0
20860    0
20862    0
20867    0
Name: WordRep, Length: 2663, dtype: int64

In [24]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test =  train_test_split(X, y, test_size=0.4, random_state=42)

from collections import Counter
Counter(y_train)

Counter({0: 1476, 1: 121})

In [25]:
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier(criterion='gini',random_state=5)
# clf.fit(X_over, y_over)
# clf.fit(X_smote, y_smote)
# clf.fit(X_pipe, y_pipe)
clf.fit(X_train, y_train)
clf.score(X_test,y_test) * 100

86.11632270168855

In [26]:
y_pred = np.array(clf.predict(X_test))
y_actual = np.array(y_test)
print(Counter(y_pred))
print(Counter(y_actual))

Counter({0: 977, 1: 89})
Counter({0: 995, 1: 71})


## Model Creation & Training for Prolongation Classification

In [27]:
import pandas as pd
df = pd.read_csv('../input/sep28kmfcc/sep28k-mfcc.csv')

In [28]:
df.columns

Index(['Show', 'EpId', 'ClipId', 'Start', 'Stop', 'Unsure', 'PoorAudioQuality',
       'Prolongation', 'Block', 'SoundRep', 'WordRep', 'DifficultToUnderstand',
       'Interjection', 'NoStutteredWords', 'NaturalPause', 'Music', 'NoSpeech',
       'Name', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11',
       '12'],
      dtype='object')

In [29]:
df.shape

(20868, 31)

In [30]:
df = df[df['NoStutteredWords'] != 0]
df = df[df['NaturalPause'] == 0]
df = df[df['Interjection'] == 0]
df = df[df['WordRep'] == 0]
df = df[df['SoundRep'] == 0]
df = df[df['Block'] == 0]
df

Unnamed: 0,Show,EpId,ClipId,Start,Stop,Unsure,PoorAudioQuality,Prolongation,Block,SoundRep,...,3,4,5,6,7,8,9,10,11,12
2,HeStutters,0,19,9985280,10033280,0,0,1,0,0,...,-2.408337,-4.000989,1.076604,-3.904765,-3.917793,-7.314900,-5.230086,1.173894,-10.094393,-5.409697
3,HeStutters,0,20,11197600,11245600,0,0,0,0,0,...,22.788187,-2.626716,-5.491358,-18.113768,0.016458,-17.751234,-10.870638,-3.549485,-9.037821,-6.599053
4,HeStutters,0,26,3562240,3610240,0,0,0,0,0,...,31.284819,-20.774286,-5.054771,-8.849820,-4.670221,-6.783212,-8.442829,-9.905901,-7.322884,-6.587358
8,HeStutters,1,4,28351360,28399360,0,0,0,0,0,...,26.878878,-11.660660,3.515312,-27.021893,-11.834179,-7.582844,-7.805755,-12.518568,-1.051659,-7.828530
19,HeStutters,1,107,6638560,6686560,0,0,0,0,0,...,29.173650,7.216802,5.993933,-13.808914,-6.984492,-11.859511,-2.531782,-16.870178,-5.008951,-4.917668
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20853,WomenWhoStutter,109,24,16179040,16227040,0,0,0,0,0,...,26.002850,-16.124638,-14.775825,-19.643902,-7.232812,-7.771849,-8.517389,1.568446,-6.449149,1.097682
20854,WomenWhoStutter,109,25,16224160,16272160,0,0,0,0,0,...,8.484955,-29.523212,-2.754525,-15.226071,-7.238213,1.549138,-8.778445,-10.499597,-2.756171,-6.887869
20860,WomenWhoStutter,109,31,20476320,20524320,0,0,0,0,0,...,16.823689,-23.059584,-6.954723,-10.113501,-13.114474,0.588608,-15.267632,-10.101980,-0.440983,0.197351
20862,WomenWhoStutter,109,33,24747360,24795360,0,0,0,0,0,...,18.849451,-23.086061,2.158868,-8.471071,1.332367,2.440633,-1.968005,-7.462198,0.142006,-4.275125


In [31]:
df.loc[df['Prolongation'] >= 1.0, 'Prolongation'] = 1.0
df['Prolongation'].value_counts()

0    2471
1     914
Name: Prolongation, dtype: int64

In [32]:
X = df.iloc[: , -13:]
X

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
2,-293.65506,73.833070,1.900997,-2.408337,-4.000989,1.076604,-3.904765,-3.917793,-7.314900,-5.230086,1.173894,-10.094393,-5.409697
3,-300.60464,52.587803,-1.105800,22.788187,-2.626716,-5.491358,-18.113768,0.016458,-17.751234,-10.870638,-3.549485,-9.037821,-6.599053
4,-322.67563,39.411560,-26.036314,31.284819,-20.774286,-5.054771,-8.849820,-4.670221,-6.783212,-8.442829,-9.905901,-7.322884,-6.587358
8,-418.44520,103.886830,-30.266285,26.878878,-11.660660,3.515312,-27.021893,-11.834179,-7.582844,-7.805755,-12.518568,-1.051659,-7.828530
19,-475.87134,93.217606,3.080705,29.173650,7.216802,5.993933,-13.808914,-6.984492,-11.859511,-2.531782,-16.870178,-5.008951,-4.917668
...,...,...,...,...,...,...,...,...,...,...,...,...,...
20853,-396.81604,47.699257,-28.230510,26.002850,-16.124638,-14.775825,-19.643902,-7.232812,-7.771849,-8.517389,1.568446,-6.449149,1.097682
20854,-366.09630,59.089500,-23.645912,8.484955,-29.523212,-2.754525,-15.226071,-7.238213,1.549138,-8.778445,-10.499597,-2.756171,-6.887869
20860,-316.00027,42.245804,-40.056350,16.823689,-23.059584,-6.954723,-10.113501,-13.114474,0.588608,-15.267632,-10.101980,-0.440983,0.197351
20862,-350.42902,27.453857,-35.155910,18.849451,-23.086061,2.158868,-8.471071,1.332367,2.440633,-1.968005,-7.462198,0.142006,-4.275125


In [33]:
y = df['Prolongation']
y

2        1
3        0
4        0
8        0
19       0
        ..
20853    0
20854    0
20860    0
20862    0
20867    0
Name: Prolongation, Length: 3385, dtype: int64

In [34]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test =  train_test_split(X, y, test_size=0.4, random_state=42)

from collections import Counter
Counter(y_train)

Counter({0: 1493, 1: 538})

In [37]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)
knn.score(X_test, y_test) * 100

66.83899556868538

In [38]:
import numpy as np
y_pred = np.array(knn.predict(X_test))
y_actual = np.array(y_test)
print(Counter(y_pred))
print(Counter(y_actual))

Counter({0: 1119, 1: 235})
Counter({0: 978, 1: 376})
