# Experiments

In [1]:
from src.data_representations.tf_idf import TfIdf
from src.data_representations.vector import Vector
from src.data_representations.structure import Structure
from src.data_representations.data_representations import BOW
from src.classifiers.knn import Knn
from src.classifiers.knn_deprecated import Knn as Knn2
from src.preprocessing.preprocessing import Preprocessor
from src.evaluation.evaluation import Evaluator

# %load_ext autoreload
# %autoreload 2

In [2]:
filepath_train = "./data/songs_train.txt"
dataset_train = Preprocessor(filepath=filepath_train, read_limit=10000)
filepath_test = "./data/songs_test.txt"
dataset_test = Preprocessor(filepath=filepath_test, read_limit=100)

# Create numerical representations of labels for mapping
artists = list(set(dataset_train.artists) | set(dataset_test.artists))
label_to_num = {artist:i for i, artist in enumerate(artists)}
num_to_label = {value:key for key, value in label_to_num.items()}

In [3]:
training_examples = [BOW(tok) for tok in dataset_train.tokenized]
training_labels = [label_to_num[label] for label in dataset_train.artists]
classifier = Knn2(training_examples, training_labels)

# Test predictions
test_examples = [BOW(tok) for tok in dataset_test.tokenized]
test_labels = [label_to_num[label] for label in dataset_test.artists]

### Using different Tversky settings

#### $\alpha=0.1$, $\beta=0.9$

In [4]:
predictions = classifier.predict(test_examples, k=4, measure="tversky", alpha=0.1, beta=0.9)
# Run evaluation of algorithms performance
evaluator = Evaluator(test_labels, predictions)
print("Accuracy:\n", evaluator.accuracy())
print("Micro Precision:\n", evaluator.micro_precision())
print("Micro Recall:\n", evaluator.micro_recall())
print("Micro F-Score:\n", evaluator.micro_fscore())

Accuracy:
 0.03
Micro Precision:
 0.15789473684210525
Micro Recall:
 0.03
Micro F-Score:
 0.050420168067226885


#### $\alpha=0.2$, $\beta=0.8$

In [5]:
predictions = classifier.predict(test_examples, k=4, measure="tversky", alpha=0.2, beta=0.8)
# Run evaluation of algorithms performance
evaluator = Evaluator(test_labels, predictions)
print("Accuracy:\n", evaluator.accuracy())
print("Micro Precision:\n", evaluator.micro_precision())
print("Micro Recall:\n", evaluator.micro_recall())
print("Micro F-Score:\n", evaluator.micro_fscore())

Accuracy:
 0.05
Micro Precision:
 0.3125
Micro Recall:
 0.05
Micro F-Score:
 0.08620689655172414


#### $\alpha=0.3$, $\beta=0.7$

In [6]:
predictions = classifier.predict(test_examples, k=4, measure="tversky", alpha=0.3, beta=0.7)
# Run evaluation of algorithms performance
evaluator = Evaluator(test_labels, predictions)
print("Accuracy:\n", evaluator.accuracy())
print("Micro Precision:\n", evaluator.micro_precision())
print("Micro Recall:\n", evaluator.micro_recall())
print("Micro F-Score:\n", evaluator.micro_fscore())

Accuracy:
 0.05
Micro Precision:
 0.3125
Micro Recall:
 0.05
Micro F-Score:
 0.08620689655172414


#### $\alpha=\frac{1}{3}$, $\beta=\frac{2}{3}$

In [7]:
predictions = classifier.predict(test_examples, k=4, measure="tversky", alpha=1/3, beta=2/3)
# Run evaluation of algorithms performance
evaluator = Evaluator(test_labels, predictions)
print("Accuracy:\n", evaluator.accuracy())
print("Micro Precision:\n", evaluator.micro_precision())
print("Micro Recall:\n", evaluator.micro_recall())
print("Micro F-Score:\n", evaluator.micro_fscore())

Accuracy:
 0.05
Micro Precision:
 0.2777777777777778
Micro Recall:
 0.05
Micro F-Score:
 0.08474576271186442


#### $\alpha=0.4$, $\beta=0.6$

In [8]:
predictions = classifier.predict(test_examples, k=4, measure="tversky", alpha=0.4, beta=0.6)
# Run evaluation of algorithms performance
evaluator = Evaluator(test_labels, predictions)
print("Accuracy:\n", evaluator.accuracy())
print("Micro Precision:\n", evaluator.micro_precision())
print("Micro Recall:\n", evaluator.micro_recall())
print("Micro F-Score:\n", evaluator.micro_fscore())

Accuracy:
 0.05
Micro Precision:
 0.22727272727272727
Micro Recall:
 0.05
Micro F-Score:
 0.08196721311475409


#### $\alpha=0.6$, $\beta=0.4$

In [9]:
predictions = classifier.predict(test_examples, k=4, measure="tversky", alpha=0.6, beta=0.4)
# Run evaluation of algorithms performance
evaluator = Evaluator(test_labels, predictions)
print("Accuracy:\n", evaluator.accuracy())
print("Micro Precision:\n", evaluator.micro_precision())
print("Micro Recall:\n", evaluator.micro_recall())
print("Micro F-Score:\n", evaluator.micro_fscore())

Accuracy:
 0.05
Micro Precision:
 0.20833333333333334
Micro Recall:
 0.05
Micro F-Score:
 0.08064516129032258


#### $\alpha=\frac{2}{3}$, $\beta=\frac{1}{3}$

In [10]:
predictions = classifier.predict(test_examples, k=4, measure="tversky", alpha=2/3, beta=1/3)
# Run evaluation of algorithms performance
evaluator = Evaluator(test_labels, predictions)
print("Accuracy:\n", evaluator.accuracy())
print("Micro Precision:\n", evaluator.micro_precision())
print("Micro Recall:\n", evaluator.micro_recall())
print("Micro F-Score:\n", evaluator.micro_fscore())

Accuracy:
 0.05
Micro Precision:
 0.20833333333333334
Micro Recall:
 0.05
Micro F-Score:
 0.08064516129032258


#### $\alpha=0.7$, $\beta=0.3$

In [11]:
predictions = classifier.predict(test_examples, k=4, measure="tversky", alpha=0.7, beta=0.3)
# Run evaluation of algorithms performance
evaluator = Evaluator(test_labels, predictions)
print("Accuracy:\n", evaluator.accuracy())
print("Micro Precision:\n", evaluator.micro_precision())
print("Micro Recall:\n", evaluator.micro_recall())
print("Micro F-Score:\n", evaluator.micro_fscore())

Accuracy:
 0.06
Micro Precision:
 0.2222222222222222
Micro Recall:
 0.06
Micro F-Score:
 0.09448818897637795


#### $\alpha=0.8$, $\beta=0.2$

In [12]:
predictions = classifier.predict(test_examples, k=4, measure="tversky", alpha=0.8, beta=0.2)
# Run evaluation of algorithms performance
evaluator = Evaluator(test_labels, predictions)
print("Accuracy:\n", evaluator.accuracy())
print("Micro Precision:\n", evaluator.micro_precision())
print("Micro Recall:\n", evaluator.micro_recall())
print("Micro F-Score:\n", evaluator.micro_fscore())

Accuracy:
 0.06
Micro Precision:
 0.18181818181818182
Micro Recall:
 0.06
Micro F-Score:
 0.09022556390977443


#### $\alpha=0.1$, $\beta=0.9$

In [13]:
predictions = classifier.predict(test_examples, k=4, measure="tversky", alpha=0.9, beta=0.1)
# Run evaluation of algorithms performance
evaluator = Evaluator(test_labels, predictions)
print("Accuracy:\n", evaluator.accuracy())
print("Micro Precision:\n", evaluator.micro_precision())
print("Micro Recall:\n", evaluator.micro_recall())
print("Micro F-Score:\n", evaluator.micro_fscore())

Accuracy:
 0.01
Micro Precision:
 0.03125
Micro Recall:
 0.01
Micro F-Score:
 0.015151515151515152


#### $\alpha=1$, $\beta=0$

In [14]:
predictions = classifier.predict(test_examples, k=4, measure="tversky", alpha=1, beta=0)
# Run evaluation of algorithms performance
evaluator = Evaluator(test_labels, predictions)
print("Accuracy:\n", evaluator.accuracy())
print("Micro Precision:\n", evaluator.micro_precision())
print("Micro Recall:\n", evaluator.micro_recall())
print("Micro F-Score:\n", evaluator.micro_fscore())

Accuracy:
 0.01
Micro Precision:
 0.03225806451612903
Micro Recall:
 0.01
Micro F-Score:
 0.015267175572519083


#### $\alpha=0$, $\beta=1$

In [15]:
predictions = classifier.predict(test_examples, k=4, measure="tversky", alpha=0, beta=1)
# Run evaluation of algorithms performance
evaluator = Evaluator(test_labels, predictions)
print("Accuracy:\n", evaluator.accuracy())
print("Micro Precision:\n", evaluator.micro_precision())
print("Micro Recall:\n", evaluator.micro_recall())
print("Micro F-Score:\n", evaluator.micro_fscore())

Accuracy:
 0.02
Micro Precision:
 0.11764705882352941
Micro Recall:
 0.02
Micro F-Score:
 0.03418803418803419


# Subset training and testing

In [1]:
from src.data_representations.tf_idf import TfIdf
from src.data_representations.vector import Vector
from src.data_representations.structure import Structure
from src.classifiers.knn import Knn
from src.classifiers.knn_deprecated import Knn as Knn2
from src.preprocessing.preprocessing import Preprocessor
from src.evaluation.evaluation import Evaluator
from src.data_representations.data_representations import BOW
import gc

In [2]:
def report(evaluator):
    print("Accuracy:\n", evaluator.accuracy())
    print("Micro Precision:\n", evaluator.micro_precision())
    print("Micro Recall:\n", evaluator.micro_recall())
    print("Micro F-Score:\n", evaluator.micro_fscore())

def run_prediction(classifier, test_examples, test_labels, measure):
    predictions = classifier.predict(test_examples, k=4, measure=measure)
    evaluator = Evaluator(test_labels, predictions)
    report(evaluator)

In [18]:
# Read dataset
filepath_train = "./data/songs_train.txt"
dataset_train = Preprocessor(filepath=filepath_train, read_limit=10000)
filepath_test = "./data/songs_test.txt"
dataset_test = Preprocessor(filepath=filepath_test, read_limit=100)

# Create numerical representations of labels for mapping
artists = list(set(dataset_train.artists) | set(dataset_test.artists))
label_to_num = {artist:i for i, artist in enumerate(set(dataset_train.artists) | set(dataset_test.artists))}
num_to_label = {value:key for key, value in label_to_num.items()}

# how many process are gonna be run
number_processes = 8

## Random chance
This experiment is set up by picking a random artist from the list of artist N times (N being number of testing examples).

Because it's random, we're taking the average of **10 tests**.

In [19]:
import random

test_labels = [label_to_num[label] for label in dataset_test.artists]

accuracy = []
micro_precision = []
micro_recall = []
micro_fscore = []

num_experiments = 10

for i in range(num_experiments):
    random_labels = random.choices(range(len(artists)), k=len(test_labels))

    evaluator = Evaluator(test_labels, random_labels)
    accuracy.append(evaluator.accuracy())
    micro_precision.append(evaluator.micro_precision())
    micro_recall.append(evaluator.micro_recall())
    micro_fscore.append(evaluator.micro_fscore())

print("Accuracy:\n", sum(accuracy) / num_experiments)
print("Micro Precision:\n", sum(micro_precision) / num_experiments)
print("Micro Recall:\n", sum(micro_recall) / num_experiments)
print("Micro F-Score:\n", sum(micro_fscore) / num_experiments)

Accuracy:
 0.004
Micro Precision:
 0.02760854341736695
Micro Recall:
 0.004
Micro F-Score:
 0.006973639891062759


## BOW

In [20]:
training_examples = [BOW(tok) for tok in dataset_train.tokenized]
training_labels = [label_to_num[label] for label in dataset_train.artists]
classifier = Knn(training_examples, training_labels, multi_process=number_processes)

# Test predictions
test_examples = [BOW(tok) for tok in dataset_test.tokenized]
test_labels = [label_to_num[label] for label in dataset_test.artists]

### Using Jaccard

In [21]:
run_prediction(classifier, test_examples, test_labels, "jaccard")

Accuracy:
 0.07
Micro Precision:
 0.3181818181818182
Micro Recall:
 0.07
Micro F-Score:
 0.11475409836065574


### Using Sørensen-Dice

In [23]:
run_prediction(classifier, test_examples, test_labels, "dsc")

Accuracy:
 0.07
Micro Precision:
 0.3181818181818182
Micro Recall:
 0.07
Micro F-Score:
 0.11475409836065574


### Using Overlap index

In [24]:
probably run_prediction(classifier, test_examples, test_labels, "overlap")

Accuracy:
 0.01
Micro Precision:
 0.047619047619047616
Micro Recall:
 0.01
Micro F-Score:
 0.01652892561983471


### Using Tversky 

In [25]:
classifier = Knn2(training_examples, training_labels)
predictions = classifier.predict(test_examples, k=4, measure="tversky", alpha=0.7, beta=0.3)
evaluator = Evaluator(test_labels, predictions)
report(evaluator)

Accuracy:
 0.06
Micro Precision:
 0.2222222222222222
Micro Recall:
 0.06
Micro F-Score:
 0.09448818897637795


## TF-idf

In [22]:
%load_ext autoreload
%autoreload 2

In [23]:
tf_idf = TfIdf()
train = tf_idf.fit_transform(dataset_train.tokenized)
test = tf_idf.transform(dataset_test.tokenized)

# Initiate Knn classifier
training_examples = [Vector([ex]) for ex in train]
training_labels = [label_to_num[label] for label in dataset_train.artists]

classifier = Knn(training_examples, training_labels, number_processes)

test_examples = [Vector([ex]) for ex in test]
test_labels = [label_to_num[label] for label in dataset_test.artists]
del tf_idf, train, test, training_examples, training_labels


### Cosine similarity

In [24]:
run_prediction(classifier, test_examples, test_labels, "cosine")

Accuracy:
 0.07
Micro Precision:
 0.23333333333333334
Micro Recall:
 0.07
Micro F-Score:
 0.1076923076923077


### Euclidean distance

In [25]:
run_prediction(classifier, test_examples, test_labels, "euclidean")

Accuracy:
 0.01
Micro Precision:
 0.09090909090909091
Micro Recall:
 0.01
Micro F-Score:
 0.018018018018018018


Print what artist was chosen and what is the real one:

In [26]:
#for pred, lab in zip(predictions, test_labels):
#    print(num_to_label[pred], " - ", num_to_label[lab])

## TF-idf + structural

In [31]:
tf_idf = TfIdf()
train = tf_idf.fit_transform(dataset_train.tokenized)
test = tf_idf.transform(dataset_test.tokenized)

train_struc =  Structure(dataset_train.tokenized)
test_struc = Structure(dataset_test.tokenized)

# Initiate Knn classifier
training_examples = [Vector([ex, [n], [d]]) for ex, n, d in zip(train, train_struc.number_lines, train_struc.doc_length)]
training_labels = [label_to_num[label] for label in dataset_train.artists]

classifier = Knn(training_examples, training_labels, number_processes)

test_examples = [Vector([ex, [n], [d]]) for ex, n, d in zip(test, test_struc.number_lines, test_struc.doc_length)]
test_labels = [label_to_num[label] for label in dataset_test.artists]
del tf_idf, train, test, train_struc, test_struc 


### Cosine similarity

In [32]:
run_prediction(classifier, test_examples, test_labels, "cosine")

Accuracy:
 0.02
Micro Precision:
 0.08333333333333333
Micro Recall:
 0.02
Micro F-Score:
 0.03225806451612903


### Euclidean distance

In [33]:
run_prediction(classifier, test_examples, test_labels, "euclidean")

Accuracy:
 0.01
Micro Precision:
 0.05555555555555555
Micro Recall:
 0.01
Micro F-Score:
 0.016949152542372885


-----
# Bigger subsettraining and test

In [3]:
filepath_train = "./data/songs_train.txt"
dataset_train = Preprocessor(filepath=filepath_train, read_limit=20000)
filepath_test = "./data/songs_test.txt"
dataset_test = Preprocessor(filepath=filepath_test, read_limit=100)

# Create numerical representations of labels for mapping
artists = list(set(dataset_train.artists) | set(dataset_test.artists))
label_to_num = {artist:i for i, artist in enumerate(artists)}
num_to_label = {value:key for key, value in label_to_num.items()}

# how many process are gonna be run
number_processes = 8

## Random chance
This experiment is set up by picking a random artist from the list of artist N times (N being number of testing examples).

Because it's random, we're taking the average of **10 tests**.

In [4]:
import random

test_labels = [label_to_num[label] for label in dataset_test.artists]

accuracy = []
micro_precision = []
micro_recall = []
micro_fscore = []

num_experiments = 10

for i in range(num_experiments):
    random_labels = random.choices(range(len(artists)), k=len(test_labels))

    evaluator = Evaluator(test_labels, random_labels)
    accuracy.append(evaluator.accuracy())
    micro_precision.append(evaluator.micro_precision())
    micro_recall.append(evaluator.micro_recall())
    micro_fscore.append(evaluator.micro_fscore())

print("Accuracy:\n", sum(accuracy) / num_experiments)
print("Micro Precision:\n", sum(micro_precision) / num_experiments)
print("Micro Recall:\n", sum(micro_recall) / num_experiments)
print("Micro F-Score:\n", sum(micro_fscore) / num_experiments)

Accuracy:
 0.001
Micro Precision:
 0.006666666666666666
Micro Recall:
 0.001
Micro F-Score:
 0.0017391304347826088


## BOW

In [7]:
training_examples = [BOW(tok) for tok in dataset_train.tokenized]
training_labels = [label_to_num[label] for label in dataset_train.artists]
classifier = Knn(training_examples, training_labels, multi_process=number_processes)

# Test predictions
test_examples = [BOW(tok) for tok in dataset_test.tokenized]
test_labels = [label_to_num[label] for label in dataset_test.artists]

### Using Jaccard

In [8]:
run_prediction(classifier, test_examples, test_labels, "jaccard")

Accuracy:
 0.09
Micro Precision:
 0.42857142857142855
Micro Recall:
 0.09
Micro F-Score:
 0.1487603305785124


### Using Sørensen-Dice

In [9]:
run_prediction(classifier, test_examples, test_labels, "dsc")

Accuracy:
 0.09
Micro Precision:
 0.42857142857142855
Micro Recall:
 0.09
Micro F-Score:
 0.1487603305785124


### Using Overlap index

In [10]:
run_prediction(classifier, test_examples, test_labels, "overlap")

Accuracy:
 0.01
Micro Precision:
 0.05555555555555555
Micro Recall:
 0.01
Micro F-Score:
 0.016949152542372885


### Using Tversky 

In [11]:
classifier = Knn2(training_examples, training_labels)
predictions = classifier.predict(test_examples, k=4, measure="tversky", alpha=0.7, beta=0.3)
evaluator = Evaluator(test_labels, predictions)
report(evaluator)

Accuracy:
 0.05
Micro Precision:
 0.25
Micro Recall:
 0.05
Micro F-Score:
 0.08333333333333334


## TF-IDF

In [16]:
tf_idf = TfIdf()
train = tf_idf.fit_transform(dataset_train.tokenized)
test = tf_idf.transform(dataset_test.tokenized)
print('done')

# Initiate Knn classifier
training_examples = [Vector([ex]) for ex in train]
training_labels = [label_to_num[label] for label in dataset_train.artists]

classifier = Knn(training_examples, training_labels, number_processes)

test_examples = [Vector([ex]) for ex in test]
test_labels = [label_to_num[label] for label in dataset_test.artists]
del tf_idf, train, test, training_examples, training_labels
gc.collect()

done


12

### Cosine similarity

In [17]:
run_prediction(classifier, test_examples, test_labels, "cosine")

Accuracy:
 0.08
Micro Precision:
 0.2857142857142857
Micro Recall:
 0.08
Micro F-Score:
 0.125


### Euclidean distance

In [19]:
run_prediction(classifier, test_examples, test_labels, "euclidean")

Accuracy:
 0.04
Micro Precision:
 0.3333333333333333
Micro Recall:
 0.04
Micro F-Score:
 0.07142857142857142


## TF-idf + structural

In [4]:
tf_idf = TfIdf()
train = tf_idf.fit_transform(dataset_train.tokenized)
test = tf_idf.transform(dataset_test.tokenized)

train_struc =  Structure(dataset_train.tokenized)
test_struc = Structure(dataset_test.tokenized)

# Initiate Knn classifier
training_examples = [Vector([ex, [n], [d]]) for ex, n, d in zip(train, train_struc.number_lines, train_struc.doc_length)]
training_labels = [label_to_num[label] for label in dataset_train.artists]

classifier = Knn(training_examples, training_labels, number_processes)

test_examples = [Vector([ex, [n], [d]]) for ex, n, d in zip(test, test_struc.number_lines, test_struc.doc_length)]
test_labels = [label_to_num[label] for label in dataset_test.artists]
del tf_idf, train, test, train_struc, test_struc 


### Cosine similarity

In [22]:
run_prediction(classifier, test_examples, test_labels, "cosine")

Accuracy:
 0.01
Micro Precision:
 0.05555555555555555
Micro Recall:
 0.01
Micro F-Score:
 0.016949152542372885


### Euclidean distance

In [5]:
run_prediction(classifier, test_examples, test_labels, "euclidean")

Accuracy:
 0.01
Micro Precision:
 0.07142857142857142
Micro Recall:
 0.01
Micro F-Score:
 0.01754385964912281
