You know, StratifiedKFold of scikit-learn cannot deal with multi-label data.  
So, here I would like to get stratified folds using the randomized algorithm. 

In [1]:
# load libraries
import numpy as np
import pandas as pd
import time
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import KFold, StratifiedKFold

In [2]:
# set parameters
NUM_FOLD = 5
SEED = 42

In [3]:
# fix random seed
np.random.seed(SEED)

In [4]:
# load data
df_train = pd.read_csv("../input/train_curated.csv")
df_test = pd.read_csv("../input/sample_submission.csv")

# preprocess labels
labels = df_test.columns[1:].tolist()
NUM_CLASS = len(labels)

for label in labels:
    df_train[label] = df_train['labels'].apply(lambda x: label in x)
print(df_train.shape)
df_train.head()

(4970, 82)


Unnamed: 0,fname,labels,Accelerating_and_revving_and_vroom,Accordion,Acoustic_guitar,Applause,Bark,Bass_drum,Bass_guitar,Bathtub_(filling_or_washing),Bicycle_bell,Burping_and_eructation,Bus,Buzz,Car_passing_by,Cheering,Chewing_and_mastication,Child_speech_and_kid_speaking,Chink_and_clink,Chirp_and_tweet,Church_bell,Clapping,Computer_keyboard,Crackle,Cricket,Crowd,Cupboard_open_or_close,Cutlery_and_silverware,Dishes_and_pots_and_pans,Drawer_open_or_close,Drip,Electric_guitar,Fart,Female_singing,Female_speech_and_woman_speaking,Fill_(with_liquid),Finger_snapping,Frying_(food),Gasp,Glockenspiel,...,Harmonica,Hi-hat,Hiss,Keys_jangling,Knock,Male_singing,Male_speech_and_man_speaking,Marimba_and_xylophone,Mechanical_fan,Meow,Microwave_oven,Motorcycle,Printer,Purr,Race_car_and_auto_racing,Raindrop,Run,Scissors,Screaming,Shatter,Sigh,Sink_(filling_or_washing),Skateboard,Slam,Sneeze,Squeak,Stream,Strum,Tap,Tick-tock,Toilet_flush,Traffic_noise_and_roadway_noise,Trickle_and_dribble,Walk_and_footsteps,Water_tap_and_faucet,Waves_and_surf,Whispering,Writing,Yell,Zipper_(clothing)
0,0006ae4e.wav,Bark,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,0019ef41.wav,Raindrop,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,001ec0ad.wav,Finger_snapping,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,0026c7cb.wav,Run,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,0026f116.wav,Finger_snapping,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [5]:
# try KFold
folds = list(KFold(n_splits=NUM_FOLD, shuffle=True, random_state=SEED).split(np.arange(len(df_train))))
df_train['fold'] = 0
for i in range(NUM_FOLD):
    df_train['fold'][folds[i][1]] = i

In [6]:
# Check how well the folds are stratified.
print("fold                                         1    2    3    4    5   total")
print("==========================================================================")
for label in labels:
    label_padded = label + " "*(40-len(label))
    dist = ": "
    for i in range(NUM_FOLD):
        dist += "{:4d} ".format(df_train[label][folds[i][1]].sum())
    dist += "{:4d} ".format(df_train[label].sum())
    print(label_padded + dist)
label_padded = "total" + " "*(40-len("total"))
dist = ": "
for i in range(NUM_FOLD):
    dist += "{:4d} ".format(df_train.iloc[folds[i][1]].shape[0])
dist += "{:4d} ".format(df_train.shape[0])
print(label_padded + dist)

fold                                         1    2    3    4    5   total
Accelerating_and_revving_and_vroom      :   14   12   15   17   17   75 
Accordion                               :   16   11    7    8    5   47 
Acoustic_guitar                         :   17   14   11   12   21   75 
Applause                                :   16   15   16   14   14   75 
Bark                                    :   16   13   15   17   14   75 
Bass_drum                               :   18   13   16   14   14   75 
Bass_guitar                             :   13   16   17   17   12   75 
Bathtub_(filling_or_washing)            :   14   11   21   15   14   75 
Bicycle_bell                            :    8   14   18   11   16   67 
Burping_and_eructation                  :   21   19   11   11   13   75 
Bus                                     :   11   19   13   15   17   75 
Buzz                                    :   13    7   16   10   10   56 
Car_passing_by                          :   11   

You can see the folds are not stratified well.  
OK. Let's do the more sophisticated splitting.

In [7]:
# calculate number of positive label for each sample
df_train['num_labels'] = df_train[labels].values.sum(axis=1)
df_train.loc[:,['labels', 'num_labels']].head(10)

Unnamed: 0,labels,num_labels
0,Bark,1
1,Raindrop,1
2,Finger_snapping,1
3,Run,1
4,Finger_snapping,1
5,Whispering,1
6,"Acoustic_guitar,Strum",2
7,Hi-hat,1
8,Bass_drum,1
9,"Crowd,Cheering",2


In [8]:
# extract data sample with single label and do StratifiedKFold

df_train_single = df_train[df_train['num_labels']==1].reset_index(drop=True)
single_folds = list(StratifiedKFold(n_splits=NUM_FOLD, shuffle=True, random_state=SEED).split(
    np.arange(len(df_train_single)), df_train_single[labels].values.argmax(axis=1)))
df_train_single['fold'] = 0
for i in range(NUM_FOLD):
    df_train_single['fold'][single_folds[i][1]] = i

In [9]:
# Check how well the folds are stratified.
print("fold                                         1    2    3    4    5   total")
print("==========================================================================")
for label in labels:
    label_padded = label + " "*(40-len(label))
    dist = ": "
    for i in range(NUM_FOLD):
        dist += "{:4d} ".format(df_train_single[label][df_train_single['fold']==i].sum())
    dist += "{:4d} ".format(df_train_single[label].sum())
    print(label_padded + dist)
label_padded = "total" + " "*(40-len("total"))
dist = ": "
for i in range(NUM_FOLD):
    dist += "{:4d} ".format(df_train_single[df_train_single['fold']==i].shape[0])
dist += "{:4d} ".format(df_train_single.shape[0])
print(label_padded + dist)

fold                                         1    2    3    4    5   total
Accelerating_and_revving_and_vroom      :    7    7    6    6    6   32 
Accordion                               :   10   10    9    9    9   47 
Acoustic_guitar                         :    1    0    1    1    0    3 
Applause                                :    0    0    0    0    0    0 
Bark                                    :   14   14   14   14   13   69 
Bass_drum                               :   14   14   14   13   13   68 
Bass_guitar                             :   13   13   13   13   12   64 
Bathtub_(filling_or_washing)            :   12   11   11   11   11   56 
Bicycle_bell                            :   14   14   13   13   13   67 
Burping_and_eructation                  :   15   15   15   15   14   74 
Bus                                     :   14   14   14   14   14   70 
Buzz                                    :   11   11   10   10   10   52 
Car_passing_by                          :   13   

In [10]:
# extract data sample with multi labels
df_train_multi = df_train[df_train['num_labels']!=1].reset_index(drop=True)

# count each label
label_counts = []
for i in range(NUM_CLASS):    
    label = labels[i] + " "*(40-len(labels[i]))
    label_counts.append(df_train_multi[labels[i]].sum())
    print("{:2d} {} {}".format(i, label, label_counts[i]))

 0 Accelerating_and_revving_and_vroom       43
 1 Accordion                                0
 2 Acoustic_guitar                          72
 3 Applause                                 75
 4 Bark                                     6
 5 Bass_drum                                7
 6 Bass_guitar                              11
 7 Bathtub_(filling_or_washing)             19
 8 Bicycle_bell                             0
 9 Burping_and_eructation                   1
10 Bus                                      5
11 Buzz                                     4
12 Car_passing_by                           13
13 Cheering                                 75
14 Chewing_and_mastication                  1
15 Child_speech_and_kid_speaking            3
16 Chink_and_clink                          37
17 Chirp_and_tweet                          9
18 Church_bell                              2
19 Clapping                                 23
20 Computer_keyboard                        0
21 Crackle               

All labels of multi-label data cannot be stratified.  
Label with more counts should be more stratified.  
Therefore, let's select a label with more counts and remove others.  

In [11]:
reduced_label = np.zeros(len(df_train_multi), np.uint8)
for i in range(NUM_CLASS):
    target_idx = np.argsort(label_counts)[i]
    reduced_label[df_train_multi[labels[target_idx]]==1] = target_idx

In [12]:
# Do StratifiedKFold using reduced label
multi_folds = list(StratifiedKFold(n_splits=NUM_FOLD, shuffle=True, random_state=SEED).split(
    np.arange(len(df_train_multi)), reduced_label))
for i in range(NUM_FOLD):
    df_train_multi['fold'][multi_folds[i][1]] = i

In [14]:
# Check how well the folds are stratified.
print("fold                                         1    2    3    4    5   total")
print("==========================================================================")
for label in labels:
    label_padded = label + " "*(40-len(label))
    dist = ": "
    for i in range(NUM_FOLD):
        dist += "{:4d} ".format(df_train_multi[label][df_train_multi['fold']==i].sum())
    dist += "{:4d} ".format(df_train_multi[label].sum())
    print(label_padded + dist)
label_padded = "total" + " "*(40-len("total"))
dist = ": "
for i in range(NUM_FOLD):
    dist += "{:4d} ".format(df_train_multi[df_train_multi['fold']==i].shape[0])
dist += "{:4d} ".format(df_train_multi.shape[0])
print(label_padded + dist)

fold                                         1    2    3    4    5   total
Accelerating_and_revving_and_vroom      :    8    9    9    8    9   43 
Accordion                               :    0    0    0    0    0    0 
Acoustic_guitar                         :   12   15   15   16   14   72 
Applause                                :   16   15   15   15   14   75 
Bark                                    :    2    0    2    1    1    6 
Bass_drum                               :    2    0    1    3    1    7 
Bass_guitar                             :    2    3    2    2    2   11 
Bathtub_(filling_or_washing)            :    4    3    4    5    3   19 
Bicycle_bell                            :    0    0    0    0    0    0 
Burping_and_eructation                  :    0    0    0    1    0    1 
Bus                                     :    1    1    1    1    1    5 
Buzz                                    :    3    1    0    0    0    4 
Car_passing_by                          :    1   

In [15]:
# concatenate single-label data and multi-label data
df_train2 = pd.concat([df_train_single, df_train_multi]).reset_index(drop=True)

In [16]:
# Check how well the folds are stratified.
print("fold                                         1    2    3    4    5   total")
print("==========================================================================")
for label in labels:
    label_padded = label + " "*(40-len(label))
    dist = ": "
    for i in range(NUM_FOLD):
        dist += "{:4d} ".format(df_train2[label][df_train2['fold']==i].sum())
    dist += "{:4d} ".format(df_train2[label].sum())
    print(label_padded + dist)
label_padded = "total" + " "*(40-len("total"))
dist = ": "
for i in range(NUM_FOLD):
    dist += "{:4d} ".format(df_train2[df_train2['fold']==i].shape[0])
dist += "{:4d} ".format(df_train2.shape[0])
print(label_padded + dist)

fold                                         1    2    3    4    5   total
Accelerating_and_revving_and_vroom      :   15   16   15   14   15   75 
Accordion                               :   10   10    9    9    9   47 
Acoustic_guitar                         :   13   15   16   17   14   75 
Applause                                :   16   15   15   15   14   75 
Bark                                    :   16   14   16   15   14   75 
Bass_drum                               :   16   14   15   16   14   75 
Bass_guitar                             :   15   16   15   15   14   75 
Bathtub_(filling_or_washing)            :   16   14   15   16   14   75 
Bicycle_bell                            :   14   14   13   13   13   67 
Burping_and_eructation                  :   15   15   15   16   14   75 
Bus                                     :   15   15   15   15   15   75 
Buzz                                    :   14   12   10   10   10   56 
Car_passing_by                          :   14   

You can see the folds are more stratified well, but still, it's not perfect.  
Then, let's do the randomized algorithm.  
  
If each fold is well stratified, std of the number of labels will become small.  
So Let's define a score to optimize as the mean of each fold's std of labels' count.  
The std of all label's count also should be small.  

In [23]:
def calc_score(df):
    score = np.zeros([5,NUM_CLASS+1])
    for i in range(5):
        score[i] = df.loc[df.fold==i, labels+['num_labels']].values.sum(axis=0)
    score = score.std(axis=0).mean()
    return score
score = calc_score(df_train)
print("KFold score: {:.6f}".format(calc_score(df_train)))
print("StratifiedKFold score: {:.6f}".format(calc_score(df_train2)))

KFold score: 3.292604
StratifiedKFold score: 1.184777


In [28]:
def do_optimize(df, size, steps):
    """
    df: dataframe to optimize folds
    size: number of data to change fold
    steps: number of for loop
    """
    starttime = time.time()
    score = calc_score(df)
    for i in range(steps):
        # select index to change fold
        change_idx = np.random.choice(np.arange(df.shape[0]), size, replace=False)
        # change fold randomly
        change_fold = np.random.randint(0, NUM_FOLD, size)
        df_new = df.copy()
        df_new['fold'][change_idx] = change_fold

        score_new = calc_score(df_new)
        if score_new < score: # if score getting small, folds will be update
            score = score_new
            df = df_new
        if i%100==0:
            print("step: {:4d}, change size: {:2d}, score: {:.6f}, sec: {:.1f}".format(
                i, size, score, time.time()-starttime))
    return df

In [None]:
# Let's do optimization with randomized algorithm.
df_train3 = df_train2.copy()
df_train3 = do_optimize(df_train3, size=64, steps=1000)
df_train3 = do_optimize(df_train3, size=32, steps=1000)
df_train3 = do_optimize(df_train3, size=16, steps=1000)
df_train3 = do_optimize(df_train3, size=8, steps=1000)
df_train3 = do_optimize(df_train3, size=4, steps=1000)
df_train3 = do_optimize(df_train3, size=2, steps=1000)
df_train3 = do_optimize(df_train3, size=1, steps=10000)

print("StratifiedKFold with randomized algorithm score: {:.6f}".format(calc_score(df_train3)))

step:    0, change size: 64, score: 1.184777, sec: 0.1
step:  100, change size: 64, score: 1.184777, sec: 12.3
step:  200, change size: 64, score: 1.184777, sec: 24.4
step:  300, change size: 64, score: 1.174183, sec: 36.0
step:  400, change size: 64, score: 1.151375, sec: 48.2
step:  500, change size: 64, score: 1.151375, sec: 60.4
step:  600, change size: 64, score: 1.151375, sec: 72.1
step:  700, change size: 64, score: 1.151375, sec: 83.6
step:  800, change size: 64, score: 1.151375, sec: 95.0
step:  900, change size: 64, score: 1.151375, sec: 106.4
step:    0, change size: 32, score: 1.151375, sec: 0.1
step:  100, change size: 32, score: 1.148799, sec: 12.0
step:  200, change size: 32, score: 1.133602, sec: 23.0
step:  300, change size: 32, score: 1.127135, sec: 34.2
step:  400, change size: 32, score: 1.107961, sec: 45.9
step:  500, change size: 32, score: 1.107961, sec: 57.2
step:  600, change size: 32, score: 1.107961, sec: 68.4
step:  700, change size: 32, score: 1.107961, sec

In [30]:
# Check how well the folds are stratified.
print("fold                                         1    2    3    4    5   total")
print("==========================================================================")
for label in labels:
    label_padded = label + " "*(40-len(label))
    dist = ": "
    for i in range(NUM_FOLD):
        dist += "{:4d} ".format(df_train3[label][df_train3['fold']==i].sum())
    dist += "{:4d} ".format(df_train3[label].sum())
    print(label_padded + dist)
label_padded = "total" + " "*(40-len("total"))
dist = ": "
for i in range(5):
    dist += "{:4d} ".format(df_train3[df_train3['fold']==i].shape[0])
dist += "{:4d} ".format(df_train3.shape[0])
print(label_padded + dist)

fold                                         1    2    3    4    5   total
Accelerating_and_revving_and_vroom      :   14   16   16   14   15   75 
Accordion                               :    9   10   10    9    9   47 
Acoustic_guitar                         :   12   16   16   16   15   75 
Applause                                :   16   14   14   15   16   75 
Bark                                    :   16   14   15   15   15   75 
Bass_drum                               :   16   14   14   16   15   75 
Bass_guitar                             :   15   16   16   14   14   75 
Bathtub_(filling_or_washing)            :   16   14   15   16   14   75 
Bicycle_bell                            :   14   13   13   12   15   67 
Burping_and_eructation                  :   15   15   15   15   15   75 
Bus                                     :   15   15   15   15   15   75 
Buzz                                    :   14   12   10   10   10   56 
Car_passing_by                          :   14   

OK. Now we've got well-stratified folds.

In [31]:
# save
df_train3.to_csv("train_stratified.csv", index=None)

I'm not so familiar with the randomized algorithm.  
If you know a smarter way, please tell me.