In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#importing libraries
import pandas as pd
import os
from skimage.transform import resize
from skimage.io import imread
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

In [None]:
#importing and extracting the zip file with cropped mels in the shape of (96, 1406)
import zipfile
with zipfile.ZipFile("/content/drive/MyDrive/colab_data/cropped_archive_22.zip","r") as zip_ref:
    zip_ref.extractall("/content/sample_mel")

In [None]:
#importing csv that contains the metafile of all the tracks
df_all = pd.read_csv('/content/mtg_cleaned.csv', index_col = [0])

In [None]:
'''preparing the dataframe to only include info about those
mel-spectrograms present in the sample dataset'''

mel_dir = '/content/sample_mel'
mel_list = os.listdir(mel_dir)

#list of all the track_ids in the sample zip file as int
track_ids = [int(file_name.replace('.npy', '')) for file_name in mel_list]

#filtering only those tracks to the dataset
df = df_all[df_all['track_id'].isin(track_ids)]

#only taking the first genre from the genre column
df = df.copy()
df.loc[:, 'genre'] = df['genre'].str.split(',').str[0]

#only keeping the tracks and genre column
df = df[['track_id', 'genre']]

In [None]:
'''filtering the dataframe to only include the top n genres'''

n = 10 #num_classes

#calculating the most frequent genres in the sample and only keeping the top five in the df
genre_count = pd.DataFrame(df.genre.value_counts().rename_axis('genre').reset_index(name='counts'))

#list of top five genres
top_genres = list(genre_count.genre[:n])

#filtering df to only keep
df = df[df['genre'].isin(top_genres)].sort_values(by = 'track_id', ascending = True).reset_index(drop = True)

df

Unnamed: 0,track_id,genre
0,6606,dance
1,6607,dance
2,6608,dance
3,6609,dance
4,6610,dance
...,...,...
3096,1420700,chillout
3097,1420704,dance
3098,1420705,dance
3099,1420706,dance


In [None]:
#List of all the track_ids that are in the sample and have top n
train_track_ids = list(df['track_id'].values)
train_track_ids[:5]

[6606, 6607, 6608, 6609, 6610]

In [None]:
''' Create a dictionary with genre as key and track_ids as values'''
genre_dict = {}

# Iterate through unique genres in the DataFrame
for genre in df['genre'].unique():
    # Filter DataFrame for each genre and get track_ids as a list
    track_id_list = df[df['genre'] == genre]['track_id'].tolist()

    # Add entry to the dictionary
    genre_dict[genre] = track_id_list

In [None]:
'''arranging the X and y'''
flat_data_arr=[] #input array
target_arr=[] #output array

# Iterate over each genre
for genre, track_ids in genre_dict.items():
    print(f'loading... genre: {genre}')

    # Iterate over each track_id within the genre
    for track_id in track_ids:
        img_array = np.load(os.path.join(mel_dir,str(track_id) +'.npy'))
        # img_resized = resize(img_array,(150,150,3))
        flat_data_arr.append(img_array.flatten())
        target_arr.append(top_genres.index(genre))
    print(f'loaded category:{genre} successfully')
flat_data=np.array(flat_data_arr)
target=np.array(target_arr)


loading... genre: dance
loaded category:dance successfully
loading... genre: alternative
loaded category:alternative successfully
loading... genre: pop
loaded category:pop successfully
loading... genre: easylistening
loaded category:easylistening successfully
loading... genre: electronic
loaded category:electronic successfully
loading... genre: folk
loaded category:folk successfully
loading... genre: classical
loaded category:classical successfully
loading... genre: ambient
loaded category:ambient successfully
loading... genre: chillout
loaded category:chillout successfully
loading... genre: soundtrack
loaded category:soundtrack successfully


In [None]:
#dataframe
df_svc=pd.DataFrame(flat_data)
df_svc['Target']=target
df_svc.shape

(3101, 134977)

In [None]:
#input data
x=df_svc.iloc[:,:-1]
#output data
y=df_svc.iloc[:,-1]

In [None]:
# Splitting the data into training and testing sets
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.20,
											random_state=77,
											stratify=y)

In [None]:
# Defining the parameters grid for GridSearchCV
param_grid={'C':[0.1,1],
            'gamma':[0.0001,0.001],
            'kernel':['rbf','poly']}

# Creating a support vector classifier
svc=svm.SVC(probability=True)

# Creating a model using GridSearchCV with the parameters grid
model=GridSearchCV(svc,param_grid)

In [None]:
# Training the model using the training data
model.fit(x_train,y_train)

In [None]:
# Testing the model using the testing data
y_pred = model.predict(x_test)

# Calculating the accuracy of the model
accuracy = accuracy_score(y_pred, y_test)

# Print the accuracy of the model
print(f"The model is {accuracy*100}% accurate")

In [None]:
'''

Categories=['cats','dogs']
flat_data_arr=[] #input array
target_arr=[] #output array
datadir='IMAGES/'
#path which contains all the categories of images
for i in Categories:

    print(f'loading... category : {i}')
    path=os.path.join(datadir,i) # needs the images to be stored in a seperate folder for each class
    for img in os.listdir(path):
        img_array=imread(os.path.join(path,img))
        img_resized=resize(img_array,(150,150,3))
        flat_data_arr.append(img_resized.flatten())
        target_arr.append(Categories.index(i))
    print(f'loaded category:{i} successfully')
flat_data=np.array(flat_data_arr)
target=np.array(target_arr)

# a list of class labels
# needs the images to be stored in a seperate folder for each class

#a for loop to over every image in every class folder

# read the image
#resize the image
#flattening the image

'''