In [None]:
import random
from tqdm import tqdm
from collections import Counter
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA 
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans

from src.model_rt import RealtimeModel, svc, linear_svc, nu_svc, lda, qda
from src.utils_import import load_data
from src.utils_preprocess import split_data, compute_energy_matrix_and_labels
from src.utils_clustering import create_cluster, cluster_mapping
from src.utils_preprocess import *


random.seed(1337)
np.random.seed(1337)

# Asumption: all signals consist of 50k samples
n_samples = 50000
interv = 1024 # Hyperparameter 1
array_length = (n_samples // interv) - 1
n_frec_div = 32 # Hyperparameter 2

# Load data
signals_clean = load_data('dataset/Jamming/Clean', 'dataset/Jamming/metadata.csv')
signals_narrowband = load_data('dataset/Jamming/Narrowband', 'dataset/Jamming/metadata.csv')
signals_wideband = load_data('dataset/Jamming/Wideband', 'dataset/Jamming/metadata.csv')

# Partition train=0.8, test=0.2
clean_train, clean_test = split_data(signals_clean, 0.8)
narrowband_train, narrowband_test = split_data(signals_narrowband, 0.8)
wideband_train, wideband_test = split_data(signals_wideband, 0.8)

train = clean_train + narrowband_train + wideband_train
test = clean_test + narrowband_test + wideband_test

print(f"Nº señales entrenamiento: {len(train)}")
print(f"Nº señales test: {len(test)}")

random.shuffle(train)
random.shuffle(test)

class_mapping = {"Clean": 0, "Narrowband Start": 1, "Narrowband Stop": 2, "Wideband Start": 3, "Wideband Stop": 4}
class_unmapping = {v:k for k,v in class_mapping.items()}


In [None]:
d = train[0]["Data"] # Clean
f = signal_interval(d)
print(f.shape)
plt.plot(f[20])
plt.show()

d = train[1]["Data"] # Narrowband
start = train[1]['JammingStartTime']//1024
print(train[1])
f = signal_interval(d)
print(f.shape)
plt.plot(f[start-1])
plt.plot(f[start])
plt.show()


d = train[6]["Data"] # Wideband
start = train[6]['JammingStartTime']//1024
print(train[6])
f = signal_interval(d)
print(f.shape)
plt.plot(f[start-1])
plt.plot(f[start])
plt.show()

t = f[start-2:start+1]
print("-"*100)
print(t)
e=energy_arrays(t, 16)
print(e)

In [None]:
# 1) -- Train --

# Building energy arrays for each train signal (x=window samples, y=frecuency divisions z=signal)
train_energy_dif_matrix, sample_labels = compute_energy_matrix_and_labels(train, n_samples, interv, n_frec_div, class_mapping, balance_data=True)

"""
# Creating K-Means model based on energy arrays
cluster = create_cluster(train_energy_dif_matrix, k=5)
print(f"\n--- Centros de cluster ---\n{cluster.cluster_centers_}") 

# Mapping cluster to original classes
cluster_map = cluster_mapping(cluster.labels_, sample_labels, class_mapping)
print(f"\nMapping clusters to predominant classes: {cluster_map}")
"""

In [None]:
# 2) -- Test -- 

test_energy_dif_matrix, y_true = compute_energy_matrix_and_labels(test, n_samples, interv, n_frec_div, class_mapping)

"""
y_pred = [cluster_map[label] for label in cluster.predict(test_energy_dif_matrix)]
# Nota: por el momento predice en exceso clase 1 (corregir desbalanceo, clase mayoritaria tiene muchas más ocurrencias)
print(np.bincount(y_pred))

# True signal classification
#signal_true = np.zeros(len(test), dtype=np.int8)
#for i, signal in enumerate(test):
#    signal_true[i] = class_mapping[signal["Class"]]

# Predicted signal classification
#signal_pred = predict_labels(y_pred, N=len(test), array_length=array_length)
"""

In [None]:
# 1) -- Train --

svc_model = svc(train_energy_dif_matrix, sample_labels)
lin_svc = linear_svc(train_energy_dif_matrix, sample_labels)
nu_SVC = nu_svc(train_energy_dif_matrix, sample_labels)
lda_model = lda(train_energy_dif_matrix, sample_labels)
qda_model = qda(train_energy_dif_matrix, sample_labels)

# 2) -- Test -- 

y_pred = svc_model.predict(test_energy_dif_matrix)
y_pred2 = lin_svc.predict(test_energy_dif_matrix)
y_pred3 = nu_SVC.predict(test_energy_dif_matrix)
y_pred4 = lda_model.predict(test_energy_dif_matrix)
y_pred5 = qda_model.predict(test_energy_dif_matrix)

# 3) -- Metrics --

print("\n-- SVC --")

acc = accuracy_score(y_true, y_pred)
print(f"\nAccuracy: {acc}")        

cm = confusion_matrix(y_true, y_pred)
print(f"\nConfusion Matrix:\n{cm}")

print(f"\nClassification Report:")
print(classification_report(y_true, y_pred))

#

print("\n-- Linear SVC --")

acc = accuracy_score(y_true, y_pred2)
print(f"\nAccuracy: {acc}")        

cm = confusion_matrix(y_true, y_pred2)
print(f"\nConfusion Matrix:\n{cm}")

print(f"\nClassification Report:")
print(classification_report(y_true, y_pred2))

#

print("\n-- Nu SVC --")

acc = accuracy_score(y_true, y_pred3)
print(f"\nAccuracy: {acc}")        

cm = confusion_matrix(y_true, y_pred3)
print(f"\nConfusion Matrix:\n{cm}")

print(f"\nClassification Report:")
print(classification_report(y_true, y_pred3))

#

print("\n-- LDA --")

acc = accuracy_score(y_true, y_pred4)
print(f"\nAccuracy: {acc}")        

cm = confusion_matrix(y_true, y_pred4)
print(f"\nConfusion Matrix:\n{cm}")

print(f"\nClassification Report:")
print(classification_report(y_true, y_pred4))

#

print("\n-- QDA --")

acc = accuracy_score(y_true, y_pred5)
print(f"\nAccuracy: {acc}")        

cm = confusion_matrix(y_true, y_pred5)
print(f"\nConfusion Matrix:\n{cm}")

print(f"\nClassification Report:")
print(classification_report(y_true, y_pred5))

In [None]:
fold = StratifiedKFold(5)
model = KNeighborsClassifier()
parameters = {"n_neighbors":list(range(1,31)), "weights":("uniform","distance")}
clf = GridSearchCV(model, parameters, cv=fold, refit=True)
clf.fit(train_energy_dif_matrix, sample_labels)
best_params = clf.best_params_ 

# leaf_size only affects execution time, so it's done separately
model = KNeighborsClassifier(**best_params)
clf = GridSearchCV(model, {"leaf_size":[20,30,50,100,200,300,400]}, cv=fold, refit=True)
clf.fit(train_energy_dif_matrix, sample_labels)
best_index = np.argmin(clf.cv_results_['mean_fit_time'])
best_params['leaf_size'] = clf.cv_results_['params'][best_index]['leaf_size']

model = clf.best_estimator_
y_hat = model.predict(test_energy_dif_matrix)
acc = accuracy_score(y_true, y_hat)

print("KNN")

# Accuracy
acc = accuracy_score(y_true, y_hat)
print(f"\nAccuracy: {acc}")        

# Confusion Matrix
cm = confusion_matrix(y_true, y_hat)
print(f"\nConfusion Matrix:\n{cm}")

# Classification Report
print(f"\nClassification Report:")
print(classification_report(y_true, y_hat))

In [None]:
realtime_model = RealtimeModel(svc_model, classes = {"Clean": 0, 
                                                    "Narrowband Start": 1, 
                                                    "Narrowband Stop": 2, 
                                                    "Wideband Start": 3, 
                                                    "Wideband Stop": 4}, 
                                        class_map = {0: "Clean", 
                                                    1: "Narrowband", 
                                                    2: "Clean", 
                                                    3: "Wideband", 
                                                    4: "Clean"}, 
                                        class_type = {0: "Clean", 
                                                    1: "Narrowband", 
                                                    2: "Narrowband", 
                                                    3: "Wideband", 
                                                    4: "Wideband"},
                                        offset=4,
                                        nfft=interv, n_partitions=n_frec_div, verbose=True)
#print(test[0]["Data"])
pred = realtime_model.classificate_recordings(test)

y_signal_true = [s["Class"] for s in test]
y_hat = [s["Class"] for s in pred]

# Accuracy
acc = accuracy_score(y_signal_true, y_hat)
print(f"\nAccuracy: {acc}")        

# Confusion Matrix
cm = confusion_matrix(y_signal_true, y_hat)
print(f"\nConfusion Matrix:\n{cm}")

# Classification Report
print(f"\nClassification Report:")
print(classification_report(y_signal_true, y_hat))

In [None]:
display(pred)

In [None]:
# 3) -- Metrics --

# Accuracy
#acc = accuracy_score(signal_true, signal_pred)
#print(f"\nAccuracy: {acc}")        

# Confusion Matrix
#cm = confusion_matrix(signal_true, signal_pred)
#print(f"\nConfusion Matrix:\n{cm}")

# Classification Report
#print(f"\nClassification Report:")
#print(classification_report(signal_true, signal_pred))

In [None]:
#from sklearn.mixture import GaussianMixture

pca = PCA(2) 
X = test_energy_dif_matrix
pca_data = pd.DataFrame(pca.fit_transform(X),columns=['PC1','PC2']) 
kmeans =create_cluster(X)
pca_data['cluster'] = pd.Categorical(kmeans.labels_)
#kmeans = GaussianMixture(n_components5)
#pca_data['cluster'] = pd.Categorical(kmeans.fit_predict(X,sample_labels))

fig,ax = plt.subplots()
scatter = ax.scatter(pca_data['PC1'], pca_data['PC2'],c=pca_data['cluster'],cmap='Set3',alpha=0.1)
legend1 = ax.legend(*scatter.legend_elements(),
                    loc="upper left", title="")
ax.add_artist(legend1)
plt.title("Clustering classification")
plt.show()

In [None]:
pca = PCA(2) 
X = test_energy_dif_matrix
pca_data = pd.DataFrame(pca.fit_transform(X),columns=['PC1','PC2']) 
pca_data['cluster'] = pd.Categorical(y_true)
#kmeans = GaussianMixture(n_components=5)
#pca_data['cluster'] = pd.Categorical(kmeans.fit_predict(X,sample_labels))

fig,ax = plt.subplots()
scatter = ax.scatter(pca_data['PC1'], pca_data['PC2'],c=pca_data['cluster'],cmap='Set3',alpha=0.1)
#legend1 = ax.legend(*scatter.legend_elements(),
legend1 = ax.legend(scatter.legend_elements()[0], class_mapping.keys(), # WARNING: labels could be wrong
                    loc="upper left", title="")
ax.add_artist(legend1)
plt.title("True classification")
plt.show()

In [None]:
np.bincount(sample_labels)