In [1]:
import numpy as np 


class SVM:

    def __init__(self, learning_rate=0.001, lambda_param=0.01, n_iters=1000):
        self.lr = learning_rate
        self.lambda_param = lambda_param
        self.n_iters = n_iters
        self.w = None
        self.b = None


    def fit(self, X, y):
        n_samples, n_features = X.shape
        
        y_ = np.where(y <= 0, -1, 1)
        
        self.w = np.zeros(n_features)
        self.b = 0

        for _ in range(self.n_iters):
            for idx, x_i in enumerate(X):
                condition = y_[idx] * (np.dot(x_i, self.w) - self.b) >= 1
                if condition:
                    self.w -= self.lr * (2 * self.lambda_param * self.w)
                else:
                    self.w -= self.lr * (2 * self.lambda_param * self.w - np.dot(x_i, y_[idx]))
                    self.b -= self.lr * y_[idx]


    def predict(self, X):
        approx = np.dot(X, self.w) - self.b
        return np.sign(approx)

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets

In [3]:
X, y =  datasets.make_blobs(n_samples=50, n_features=2, centers=2, cluster_std=1.05, random_state=40)
y = np.where(y == 0, -1, 1)

clf = SVM()
clf.fit(X, y)
#predictions = clf.predict(X)
 
print(clf.w, clf.b)

[0.58977016 0.17946483] -0.1520000000000001


In [4]:
def accuracy(y_true, y_pred):
  accuracy = np.sum(y_true == y_pred)/len(y_true)
  return accuracy

In [5]:
y_pred = clf.predict(X)
acc = accuracy(y, y_pred)

In [6]:
acc

1.0

In [7]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"tanvipenumudy","key":"487bfae60e4b65e36e1a308cf7412e23"}'}

In [8]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [9]:
!kaggle datasets download -d andradaolteanu/gtzan-dataset-music-genre-classification

Downloading gtzan-dataset-music-genre-classification.zip to /content
100% 1.21G/1.21G [00:31<00:00, 49.6MB/s]
100% 1.21G/1.21G [00:31<00:00, 40.8MB/s]


In [10]:
!ls

gtzan-dataset-music-genre-classification.zip  kaggle.json  sample_data


In [11]:
!unzip "gtzan-dataset-music-genre-classification.zip" -d /tmp

Archive:  gtzan-dataset-music-genre-classification.zip
  inflating: /tmp/Data/features_30_sec.csv  
  inflating: /tmp/Data/features_3_sec.csv  
  inflating: /tmp/Data/genres_original/blues/blues.00000.wav  
  inflating: /tmp/Data/genres_original/blues/blues.00001.wav  
  inflating: /tmp/Data/genres_original/blues/blues.00002.wav  
  inflating: /tmp/Data/genres_original/blues/blues.00003.wav  
  inflating: /tmp/Data/genres_original/blues/blues.00004.wav  
  inflating: /tmp/Data/genres_original/blues/blues.00005.wav  
  inflating: /tmp/Data/genres_original/blues/blues.00006.wav  
  inflating: /tmp/Data/genres_original/blues/blues.00007.wav  
  inflating: /tmp/Data/genres_original/blues/blues.00008.wav  
  inflating: /tmp/Data/genres_original/blues/blues.00009.wav  
  inflating: /tmp/Data/genres_original/blues/blues.00010.wav  
  inflating: /tmp/Data/genres_original/blues/blues.00011.wav  
  inflating: /tmp/Data/genres_original/blues/blues.00012.wav  
  inflating: /tmp/Data/genres_origina

In [12]:
import os
print(len(os.listdir('/tmp/Data/genres_original')))
print(len(os.listdir('/tmp/Data/genres_original/blues')))
print(len(os.listdir('/tmp/Data/genres_original/classical')))

10
100
100


In [13]:
import glob
import librosa
import csv

In [14]:
header = ['filename','chroma_stft','rmse','spectral_centroid','spectral_bandwidth','rolloff','zero_crossing_rate']
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'

In [16]:
file = open('data.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)
genres = ['blues','classical']
for g in genres:
    for filename in os.listdir(f'/tmp/Data/genres_original/{g}'):
        songname = f'/tmp/Data/genres_original/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=30)
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        rmse = librosa.feature.rmse(y=y)
        to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {g}'
        file = open('data.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())

In [17]:
import pandas as pd

In [30]:
data = pd.read_csv('data.csv')
data.head()

Unnamed: 0,filename,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,mfcc5,mfcc6,mfcc7,mfcc8,mfcc9,mfcc10,mfcc11,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
0,blues.00071.wav,0.305262,0.120542,2429.260934,2286.625281,5065.027706,0.134687,-89.357906,93.786055,-14.574538,54.391654,-33.50838,19.420994,-19.761601,13.02632,-13.865957,14.607766,-14.969526,10.585571,-18.026934,6.654247,-10.204747,6.527642,-10.846745,3.319423,-6.189024,2.994841,blues
1,blues.00073.wav,0.341865,0.259714,2094.91942,2158.335963,4482.447895,0.10057,-30.094571,103.732289,-19.75424,56.40477,-12.71099,20.188002,-17.340381,16.292435,-18.4396,8.924175,-11.648016,11.213472,-8.842511,9.090302,-3.579252,4.372714,-2.830807,4.725089,0.485058,1.475955,blues
2,blues.00007.wav,0.307921,0.131785,1451.754147,1577.369917,2955.348796,0.061435,-179.395447,136.459244,-26.656359,39.988027,5.289679,10.924427,-20.561889,8.513764,-11.356908,-3.469077,-8.414554,-6.954827,-3.544535,-8.051242,-8.959537,-8.424337,-10.558885,-10.788159,-4.693749,-8.638613,blues
3,blues.00000.wav,0.349943,0.130225,1784.420446,2002.650192,3806.485316,0.083066,-113.596742,121.557302,-19.158825,42.351029,-6.376457,18.618875,-13.697911,15.34463,-12.285266,10.980491,-8.324323,8.810668,-3.667367,5.75169,-5.162761,0.750947,-1.691937,-0.409954,-2.300208,1.219928,blues
4,blues.00070.wav,0.322734,0.106164,2159.546654,2130.376826,4685.04667,0.098349,-133.015981,105.72269,-22.138973,41.81862,-26.988738,19.12245,-26.566396,19.239805,-15.332086,6.986052,-10.813206,6.791205,-11.727092,4.39739,-6.039441,1.966114,-12.320324,3.253893,-8.809824,-3.37992,blues


In [31]:
data.shape

(200, 28)

In [32]:
data = data.drop(['filename'],axis=1)

In [33]:
data.head()

Unnamed: 0,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,mfcc5,mfcc6,mfcc7,mfcc8,mfcc9,mfcc10,mfcc11,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
0,0.305262,0.120542,2429.260934,2286.625281,5065.027706,0.134687,-89.357906,93.786055,-14.574538,54.391654,-33.50838,19.420994,-19.761601,13.02632,-13.865957,14.607766,-14.969526,10.585571,-18.026934,6.654247,-10.204747,6.527642,-10.846745,3.319423,-6.189024,2.994841,blues
1,0.341865,0.259714,2094.91942,2158.335963,4482.447895,0.10057,-30.094571,103.732289,-19.75424,56.40477,-12.71099,20.188002,-17.340381,16.292435,-18.4396,8.924175,-11.648016,11.213472,-8.842511,9.090302,-3.579252,4.372714,-2.830807,4.725089,0.485058,1.475955,blues
2,0.307921,0.131785,1451.754147,1577.369917,2955.348796,0.061435,-179.395447,136.459244,-26.656359,39.988027,5.289679,10.924427,-20.561889,8.513764,-11.356908,-3.469077,-8.414554,-6.954827,-3.544535,-8.051242,-8.959537,-8.424337,-10.558885,-10.788159,-4.693749,-8.638613,blues
3,0.349943,0.130225,1784.420446,2002.650192,3806.485316,0.083066,-113.596742,121.557302,-19.158825,42.351029,-6.376457,18.618875,-13.697911,15.34463,-12.285266,10.980491,-8.324323,8.810668,-3.667367,5.75169,-5.162761,0.750947,-1.691937,-0.409954,-2.300208,1.219928,blues
4,0.322734,0.106164,2159.546654,2130.376826,4685.04667,0.098349,-133.015981,105.72269,-22.138973,41.81862,-26.988738,19.12245,-26.566396,19.239805,-15.332086,6.986052,-10.813206,6.791205,-11.727092,4.39739,-6.039441,1.966114,-12.320324,3.253893,-8.809824,-3.37992,blues


In [34]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [35]:
genre_list = data.iloc[:, -1]
encoder = LabelEncoder()
y = encoder.fit_transform(genre_list)

In [36]:
data.head()

Unnamed: 0,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,mfcc5,mfcc6,mfcc7,mfcc8,mfcc9,mfcc10,mfcc11,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
0,0.305262,0.120542,2429.260934,2286.625281,5065.027706,0.134687,-89.357906,93.786055,-14.574538,54.391654,-33.50838,19.420994,-19.761601,13.02632,-13.865957,14.607766,-14.969526,10.585571,-18.026934,6.654247,-10.204747,6.527642,-10.846745,3.319423,-6.189024,2.994841,blues
1,0.341865,0.259714,2094.91942,2158.335963,4482.447895,0.10057,-30.094571,103.732289,-19.75424,56.40477,-12.71099,20.188002,-17.340381,16.292435,-18.4396,8.924175,-11.648016,11.213472,-8.842511,9.090302,-3.579252,4.372714,-2.830807,4.725089,0.485058,1.475955,blues
2,0.307921,0.131785,1451.754147,1577.369917,2955.348796,0.061435,-179.395447,136.459244,-26.656359,39.988027,5.289679,10.924427,-20.561889,8.513764,-11.356908,-3.469077,-8.414554,-6.954827,-3.544535,-8.051242,-8.959537,-8.424337,-10.558885,-10.788159,-4.693749,-8.638613,blues
3,0.349943,0.130225,1784.420446,2002.650192,3806.485316,0.083066,-113.596742,121.557302,-19.158825,42.351029,-6.376457,18.618875,-13.697911,15.34463,-12.285266,10.980491,-8.324323,8.810668,-3.667367,5.75169,-5.162761,0.750947,-1.691937,-0.409954,-2.300208,1.219928,blues
4,0.322734,0.106164,2159.546654,2130.376826,4685.04667,0.098349,-133.015981,105.72269,-22.138973,41.81862,-26.988738,19.12245,-26.566396,19.239805,-15.332086,6.986052,-10.813206,6.791205,-11.727092,4.39739,-6.039441,1.966114,-12.320324,3.253893,-8.809824,-3.37992,blues


In [37]:
scaler = StandardScaler()
X = scaler.fit_transform(np.array(data.iloc[:, :-1], dtype = float))

In [38]:
from sklearn.model_selection import train_test_split

In [39]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=123)

In [40]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(150, 26) (50, 26) (150,) (50,)


In [41]:
np.unique(y,return_counts=True)

(array([0, 1]), array([100, 100]))

In [42]:
clf = SVM()
clf.fit(X_train, y_train)

In [43]:
y_pred = clf.predict(X_train)
acc = accuracy(y_train, y_pred)
acc

0.87333333333333333

In [44]:
y_pred = clf.predict(X_test)
acc = accuracy(y_test, y_pred)
acc

0.7854295667230126