In [8]:
import pandas as pd
import numpy as np

In [10]:
inFea = pd.read_csv("../Data/features_30_sec.csv")
inFea
features_in = inFea[["filename","label"]]

In [12]:
features_in

Unnamed: 0,filename,label
0,blues.00000.wav,blues
1,blues.00001.wav,blues
2,blues.00002.wav,blues
3,blues.00003.wav,blues
4,blues.00004.wav,blues
...,...,...
995,rock.00095.wav,rock
996,rock.00096.wav,rock
997,rock.00097.wav,rock
998,rock.00098.wav,rock


In [14]:
!pip install -v "librosa==0.9"

Using pip 24.2 from /opt/anaconda3/lib/python3.12/site-packages/pip (python 3.12)


In [15]:
def load(filepath):

    filename = os.path.basename(filepath)

    if 'features' in filename:
        return pd.read_csv(filepath, index_col=0, header=[0, 1, 2])

    if 'echonest' in filename:
        return pd.read_csv(filepath, index_col=0, header=[0, 1, 2])

    if 'genres' in filename:
        return pd.read_csv(filepath, index_col=0)

    if 'tracks' in filename:
        tracks = pd.read_csv(filepath, index_col=0, header=[0, 1])

        COLUMNS = [('track', 'tags'), ('album', 'tags'), ('artist', 'tags'),
                   ('track', 'genres'), ('track', 'genres_all')]
        for column in COLUMNS:
            tracks[column] = tracks[column].map(ast.literal_eval)

        COLUMNS = [('track', 'date_created'), ('track', 'date_recorded'),
                   ('album', 'date_created'), ('album', 'date_released'),
                   ('artist', 'date_created'), ('artist', 'active_year_begin'),
                   ('artist', 'active_year_end')]
        for column in COLUMNS:
            tracks[column] = pd.to_datetime(tracks[column])

        SUBSETS = ('small', 'medium', 'large')
        try:
            tracks['set', 'subset'] = tracks['set', 'subset'].astype(
                    'category', categories=SUBSETS, ordered=True)
        except (ValueError, TypeError):
            # the categories and ordered arguments were removed in pandas 0.25
            tracks['set', 'subset'] = tracks['set', 'subset'].astype(
                     pd.CategoricalDtype(categories=SUBSETS, ordered=True))

        COLUMNS = [('track', 'genre_top'), ('track', 'license'),
                   ('album', 'type'), ('album', 'information'),
                   ('artist', 'bio')]
        for column in COLUMNS:
            tracks[column] = tracks[column].astype('category')

        return tracks


In [18]:
#!/usr/bin/env python3

# FMA: A Dataset For Music Analysis
# Michaël Defferrard, Kirell Benzi, Pierre Vandergheynst, Xavier Bresson, EPFL LTS2.

# All features are extracted using [librosa](https://github.com/librosa/librosa).
# Alternatives:
# * [Essentia](http://essentia.upf.edu) (C++ with Python bindings)
# * [MARSYAS](https://github.com/marsyas/marsyas) (C++ with Python bindings)
# * [RP extract](http://www.ifs.tuwien.ac.at/mir/downloads.html) (Matlab, Java, Python)
# * [jMIR jAudio](http://jmir.sourceforge.net) (Java)
# * [MIRtoolbox](https://www.jyu.fi/hum/laitokset/musiikki/en/research/coe/materials/mirtoolbox) (Matlab)

import os
import multiprocessing
import warnings
import numpy as np
from scipy import stats
import pandas as pd
import librosa
from tqdm import tqdm


def columns():
    feature_sizes = dict(mel_spec=128)
    moments = ('mean', 'std', 'skew', 'kurtosis', 'median', 'min', 'max')

    columns = []
    for name, size in feature_sizes.items():
        for moment in moments:
            it = ((name, moment, '{:02d}'.format(i+1)) for i in range(size))
            columns.extend(it)

    names = ('feature', 'statistics', 'number')
    columns = pd.MultiIndex.from_tuples(columns, names=names)
    # More efficient to slice if indexes are sorted.
    return columns.sort_values()


def compute_features(audioName,genreName):

    features = pd.Series(index=columns(), dtype=np.float32)
    
    # Catch warnings as exceptions (audioread leaks file descriptors).
    warnings.filterwarnings('error', module='librosa')

    def feature_stats(name, values):
        features[name, 'mean'] = np.mean(values, axis=1)
        features[name, 'std'] = np.std(values, axis=1)
        features[name, 'skew'] = stats.skew(values, axis=1)
        features[name, 'kurtosis'] = stats.kurtosis(values, axis=1)
        features[name, 'median'] = np.median(values, axis=1)
        features[name, 'min'] = np.min(values, axis=1)
        features[name, 'max'] = np.max(values, axis=1)

    try:
        filepath = "../Data/genres_original/"+genreName+'/'+audioName
        x, sr = librosa.load(filepath, sr=None, mono=True)  # kaiser_fast
        stft = np.abs(librosa.stft(x, n_fft=2048, hop_length=512))
        assert stft.shape[0] == 1 + 2048 // 2
        assert np.ceil(len(x)/512) <= stft.shape[1] <= np.ceil(len(x)/512)+1
        del x
        
        f = librosa.feature.melspectrogram(S=stft)
        feature_stats('mel_spec', f)
        del stft

    except Exception as e:
        print('{}'.format( repr(e)))

    return features



 

In [19]:
feature_val = features_in.apply(lambda row: compute_features(row['filename'], row['label']), axis=1)

  x, sr = librosa.load(filepath, sr=None, mono=True)  # kaiser_fast


NoBackendError()


In [21]:
newFeatureCols = [("track_id",'',''),('genre','','')]+feature_val.columns.to_list()

In [22]:
features_all=pd.concat([features_in,feature_val],axis=1)

In [23]:
features_all.columns = pd.MultiIndex.from_tuples(newFeatureCols)

In [28]:
features_all


Unnamed: 0_level_0,track_id,genre,mel_spec,mel_spec,mel_spec,mel_spec,mel_spec,mel_spec,mel_spec,mel_spec,mel_spec,mel_spec,mel_spec,mel_spec,mel_spec,mel_spec,mel_spec,mel_spec,mel_spec,mel_spec,mel_spec
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,...,std,std,std,std,std,std,std,std,std,std
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,01,02,03,04,05,06,07,08,...,90,91,92,93,94,95,96,97,98,99
0,blues.00000.wav,blues,4.545462,2.428123,0.971933,1.487167,1.169425,1.625228,0.553048,0.947657,...,0.013656,0.019719,0.019596,0.006628,0.003570,0.002016,0.000734,0.000214,0.000074,0.000054
1,blues.00001.wav,blues,1.877189,0.731742,0.432973,-0.243875,0.962582,0.542744,1.044824,3.405566,...,0.016012,0.017563,0.017143,0.007377,0.002923,0.001276,0.000458,0.000161,0.000100,0.000095
2,blues.00002.wav,blues,3.196195,0.331318,-0.384444,0.061518,1.126822,1.134549,0.905932,3.999423,...,0.013496,0.010367,0.007239,0.004774,0.003003,0.001428,0.000617,0.000229,0.000125,0.000117
3,blues.00003.wav,blues,9.931634,1.057769,0.115139,0.501024,6.040970,2.219354,3.198832,1.194213,...,0.010027,0.008531,0.006047,0.003424,0.001941,0.000859,0.000360,0.000100,0.000023,0.000010
4,blues.00004.wav,blues,40.081970,46.160686,4.799722,4.871468,6.944076,5.145172,1.861009,5.865312,...,0.009556,0.006197,0.003265,0.002155,0.001030,0.000444,0.000171,0.000066,0.000049,0.000048
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,rock.00095.wav,rock,6.234760,0.866811,2.064066,-0.006483,4.181277,-0.310089,1.562398,1.514468,...,0.016380,0.012500,0.009881,0.007557,0.004069,0.001762,0.000684,0.000233,0.000095,0.000083
996,rock.00096.wav,rock,3.122699,2.571311,0.757304,1.755872,1.213300,1.717747,8.646959,1.209470,...,0.011921,0.009854,0.007112,0.004676,0.002511,0.001171,0.000475,0.000156,0.000059,0.000048
997,rock.00097.wav,rock,1.031788,1.298310,2.353392,1.787102,1.351364,0.987491,1.479288,3.693381,...,0.014529,0.012461,0.008469,0.005428,0.003328,0.001589,0.000602,0.000258,0.000201,0.000195
998,rock.00098.wav,rock,1.217965,0.490440,0.412368,0.067220,0.125028,2.976808,1.677172,0.949260,...,0.006513,0.005094,0.003659,0.002649,0.001635,0.000834,0.000328,0.000107,0.000042,0.000037


In [30]:
features_all.to_csv("custom_more_data_mel_spec.csv")

In [849]:
features_all_one_hot_df = features_all['genre'].apply(lambda x: pd.Series({val: 1 for val in x})).fillna(0)

In [220]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, mean_squared_error, r2_score
from sklearn.ensemble import RandomForestClassifier

In [319]:
model = RandomForestClassifier(n_estimators=100)

In [321]:
X = features_all.drop(["track_id","genre"],axis=1)
y = features_all["genre"]

  X = features_all.drop(["track_id","genre"],axis=1)


In [336]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [338]:
model.fit(x_train,y_train)

In [340]:
y_pred=model.predict(x_test)

In [342]:
accuracy = accuracy_score(y_test,y_pred)

In [344]:
accuracy

0.79

In [362]:
itend = pd.read_csv('itend.csv',index_col=[0],header=[0, 1,2])

In [364]:
itend

feature,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,...,tonnetz,tonnetz,tonnetz,zcr,zcr,zcr,zcr,zcr,zcr,zcr
statistics,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,...,std,std,std,kurtosis,max,mean,median,min,skew,std
number,01,02,03,04,05,06,07,08,09,10,...,04,05,06,01,01,01,01,01,01,01
0,-0.140905,-0.213307,-0.416046,-1.174401,-1.175695,0.382804,0.045361,-0.807864,-0.578678,-0.104263,...,0.150906,0.047585,0.058541,39.566994,0.575195,0.05988,0.056152,0.0,4.427992,0.039989


In [368]:
model.predict(itend)

array(['hiphop'], dtype=object)

In [426]:
features = load("../../Genre Prediction/Data/Original Data/fma_metadata/features.csv")

In [428]:
features

feature,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,...,tonnetz,tonnetz,tonnetz,zcr,zcr,zcr,zcr,zcr,zcr,zcr
statistics,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,...,std,std,std,kurtosis,max,mean,median,min,skew,std
number,01,02,03,04,05,06,07,08,09,10,...,04,05,06,01,01,01,01,01,01,01
track_id,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2,7.180653,5.230309,0.249321,1.347620,1.482478,0.531371,1.481593,2.691455,0.866868,1.341231,...,0.054125,0.012226,0.012111,5.758890,0.459473,0.085629,0.071289,0.000000,2.089872,0.061448
3,1.888963,0.760539,0.345297,2.295201,1.654031,0.067592,1.366848,1.054094,0.108103,0.619185,...,0.063831,0.014212,0.017740,2.824694,0.466309,0.084578,0.063965,0.000000,1.716724,0.069330
5,0.527563,-0.077654,-0.279610,0.685883,1.937570,0.880839,-0.923192,-0.927232,0.666617,1.038546,...,0.040730,0.012691,0.014759,6.808415,0.375000,0.053114,0.041504,0.000000,2.193303,0.044861
10,3.702245,-0.291193,2.196742,-0.234449,1.367364,0.998411,1.770694,1.604566,0.521217,1.982386,...,0.074358,0.017952,0.013921,21.434212,0.452148,0.077515,0.071777,0.000000,3.542325,0.040800
20,-0.193837,-0.198527,0.201546,0.258556,0.775204,0.084794,-0.289294,-0.816410,0.043851,-0.804761,...,0.095003,0.022492,0.021355,16.669037,0.469727,0.047225,0.040039,0.000977,3.189831,0.030993
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155316,-0.490129,0.463834,2.321970,-0.084352,1.662914,2.115189,-0.237794,5.695442,0.830353,1.951819,...,0.128410,0.022547,0.019816,4.448255,0.172852,0.028773,0.028320,0.003906,0.955388,0.012385
155317,-0.461559,-0.229601,-0.496632,-0.422033,0.130612,-0.263825,-0.628103,-0.082687,-0.229483,-0.492753,...,0.132964,0.023548,0.026527,3.270612,0.196289,0.031116,0.027832,0.002441,1.283060,0.019059
155318,0.552473,-0.110498,-0.532014,0.263131,-0.224011,-0.530972,1.713526,1.418444,1.325197,0.120333,...,0.108324,0.017540,0.020471,2.356727,0.212891,0.038450,0.037109,0.003418,0.828569,0.017904
155319,-0.176901,0.187208,-0.050664,0.368843,0.066005,-0.857354,-0.780860,0.626281,-0.630938,-0.787229,...,0.088311,0.018328,0.017936,6.188604,0.167480,0.041480,0.038086,0.004883,1.818740,0.020133


In [380]:
import ast
tracks = load("../../Genre Prediction/Data/Original Data/fma_metadata/tracks.csv")

In [386]:
tracks=tracks['track']

In [390]:
tracks_genre = tracks.reset_index()[['track_id','genre_top']]

In [392]:
tracks_genre

Unnamed: 0,track_id,genre_top
0,2,Hip-Hop
1,3,Hip-Hop
2,5,Hip-Hop
3,10,Pop
4,20,
...,...,...
106569,155316,Rock
106570,155317,Rock
106571,155318,Rock
106572,155319,Rock


In [400]:
col_fea = features.columns

In [430]:
features = features.droplevel(0,axis=1)
features = features.droplevel(0,axis=1)

In [434]:
features.reset_index(inplace=True)

In [436]:
features_genre = pd.merge(tracks_genre,features,on="track_id",how="inner")

In [462]:
features_genre.columns = pd.MultiIndex.from_tuples([("track_id",'',''),("genre",'','')]+col_fea.to_list()[1:])

In [464]:
features_genre.to_csv("feature_gerne_clean.csv")

In [466]:
features_genre

Unnamed: 0_level_0,track_id,genre,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,...,tonnetz,tonnetz,tonnetz,zcr,zcr,zcr,zcr,zcr,zcr,zcr
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,...,std,std,std,kurtosis,max,mean,median,min,skew,std
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,01,02,03,04,05,06,07,08,...,04,05,06,01,01,01,01,01,01,01
0,2,Hip-Hop,7.180653,5.230309,0.249321,1.347620,1.482478,0.531371,1.481593,2.691455,...,0.054125,0.012226,0.012111,5.758890,0.459473,0.085629,0.071289,0.000000,2.089872,0.061448
1,3,Hip-Hop,1.888963,0.760539,0.345297,2.295201,1.654031,0.067592,1.366848,1.054094,...,0.063831,0.014212,0.017740,2.824694,0.466309,0.084578,0.063965,0.000000,1.716724,0.069330
2,5,Hip-Hop,0.527563,-0.077654,-0.279610,0.685883,1.937570,0.880839,-0.923192,-0.927232,...,0.040730,0.012691,0.014759,6.808415,0.375000,0.053114,0.041504,0.000000,2.193303,0.044861
3,10,Pop,3.702245,-0.291193,2.196742,-0.234449,1.367364,0.998411,1.770694,1.604566,...,0.074358,0.017952,0.013921,21.434212,0.452148,0.077515,0.071777,0.000000,3.542325,0.040800
4,20,,-0.193837,-0.198527,0.201546,0.258556,0.775204,0.084794,-0.289294,-0.816410,...,0.095003,0.022492,0.021355,16.669037,0.469727,0.047225,0.040039,0.000977,3.189831,0.030993
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106569,155316,Rock,-0.490129,0.463834,2.321970,-0.084352,1.662914,2.115189,-0.237794,5.695442,...,0.128410,0.022547,0.019816,4.448255,0.172852,0.028773,0.028320,0.003906,0.955388,0.012385
106570,155317,Rock,-0.461559,-0.229601,-0.496632,-0.422033,0.130612,-0.263825,-0.628103,-0.082687,...,0.132964,0.023548,0.026527,3.270612,0.196289,0.031116,0.027832,0.002441,1.283060,0.019059
106571,155318,Rock,0.552473,-0.110498,-0.532014,0.263131,-0.224011,-0.530972,1.713526,1.418444,...,0.108324,0.017540,0.020471,2.356727,0.212891,0.038450,0.037109,0.003418,0.828569,0.017904
106572,155319,Rock,-0.176901,0.187208,-0.050664,0.368843,0.066005,-0.857354,-0.780860,0.626281,...,0.088311,0.018328,0.017936,6.188604,0.167480,0.041480,0.038086,0.004883,1.818740,0.020133


In [460]:
features_all

Unnamed: 0_level_0,track_id,genre,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,chroma_cens,...,tonnetz,tonnetz,tonnetz,zcr,zcr,zcr,zcr,zcr,zcr,zcr
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,kurtosis,...,std,std,std,kurtosis,max,mean,median,min,skew,std
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,01,02,03,04,05,06,07,08,...,04,05,06,01,01,01,01,01,01,01
0,blues.00000.wav,blues,-0.078091,-0.290040,0.142337,0.046511,-1.092855,-0.786504,-0.835958,-0.446122,...,0.138799,0.026220,0.037989,3.902308,0.242676,0.083045,0.078125,0.033203,1.427675,0.027694
1,blues.00001.wav,blues,0.497728,-0.156311,0.986335,0.067993,-0.790453,3.687446,-1.138991,-0.923531,...,0.179517,0.029268,0.041067,9.201668,0.323730,0.056040,0.047852,0.006348,2.363820,0.038046
2,blues.00002.wav,blues,-0.532496,-0.776484,1.007275,0.825708,0.276671,-0.682883,1.406657,0.022634,...,0.073724,0.033821,0.028017,37.734068,0.440918,0.076291,0.073730,0.012695,4.185553,0.031731
3,blues.00003.wav,blues,3.513676,0.430777,0.822450,-1.208157,-0.965539,-1.065736,-0.546926,-1.031764,...,0.132739,0.060522,0.034751,2.021524,0.145020,0.033309,0.030762,0.006348,1.073028,0.020561
4,blues.00004.wav,blues,1.143850,0.123680,-0.372205,-0.437249,-0.086288,0.243630,-0.011765,-0.096848,...,0.089078,0.032872,0.019889,5.706599,0.366211,0.101461,0.092773,0.039551,1.938532,0.044205
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,rock.00095.wav,rock,0.464216,-0.515369,-0.988768,-0.729247,-0.215835,-0.199535,-1.169909,-0.421062,...,0.133570,0.056552,0.041105,4.357486,0.318848,0.089227,0.084473,0.021973,1.406124,0.036121
996,rock.00096.wav,rock,-0.859656,1.034301,-0.437038,-0.836010,-0.170238,-0.948016,-1.013720,-0.831747,...,0.084923,0.074198,0.026935,0.690220,0.195312,0.097664,0.098145,0.011719,-0.306379,0.029454
997,rock.00097.wav,rock,0.262118,-0.432864,0.424227,-0.359552,-0.628410,-0.872857,-0.822605,-0.701298,...,0.095067,0.031494,0.034359,2.288512,0.314453,0.121823,0.122559,0.026367,0.811464,0.041192
998,rock.00098.wav,rock,0.515022,-1.052786,3.600510,0.012773,-0.701663,0.112954,-0.750285,-0.647902,...,0.169529,0.050952,0.069180,2.488822,0.193848,0.048724,0.042969,0.008789,1.362758,0.028417


In [471]:
all_data = pd.concat([features_genre,features_all])

In [477]:
X = all_data.dropna().drop(["track_id","genre"],axis=1)
y = all_data.dropna()["genre"]

  X = all_data.dropna().drop(["track_id","genre"],axis=1)


In [780]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [828]:
new_model = RandomForestClassifier(n_estimators=1000)

In [784]:
new_model.fit(x_train,y_train)

In [785]:
new_y_pred=new_model.predict(x_test)

In [786]:
accuracy_new = accuracy_score(y_test,new_y_pred)

In [646]:
from imblearn.under_sampling import RandomUnderSampler

undersampler = RandomUnderSampler(random_state=42)
X_resampled, y_resampled = undersampler.fit_resample(X_train, y_train)

NameError: name 'X_train' is not defined

In [787]:
accuracy_new

0.6228590250329381

In [788]:
from imblearn.under_sampling import RandomUnderSampler

undersampler = RandomUnderSampler()
X_resampled, y_resampled = undersampler.fit_resample(X, y)

In [789]:
y_resampled.value_counts()

genre
Blues                  24
Classical              24
reggae                 24
pop                    24
metal                  24
jazz                   24
hiphop                 24
disco                  24
country                24
classical              24
blues                  24
Spoken                 24
Soul-RnB               24
Rock                   24
Pop                    24
Old-Time / Historic    24
Jazz                   24
International          24
Instrumental           24
Hip-Hop                24
Folk                   24
Experimental           24
Electronic             24
Easy Listening         24
Country                24
rock                   24
Name: count, dtype: int64

In [811]:
x_train, x_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.3)

In [830]:
new_model.fit(X_resampled,y_resampled)

In [832]:
new_y= new_model.predict(x_test)

In [834]:
accuracy_newst = accuracy_score(y_test,new_y)

In [837]:
accuracy_newst

1.0

In [839]:
new_y == y_test

41569    True
1779     True
34427    True
468      True
753      True
         ... 
84135    True
28464    True
84791    True
9711     True
1806     True
Name: genre, Length: 188, dtype: bool

In [841]:
new_model.predict(itend)

array(['hiphop'], dtype=object)

In [853]:
features_all_one_hot_df.columns

Index(['b', 'l', 'u', 'e', 's', 'c', 'a', 'i', 'o', 'n', 't', 'r', 'y', 'd',
       'h', 'p', 'j', 'z', 'm', 'g', 'k'],
      dtype='object')