In [None]:
### SVM and SVR

In [1]:
# IMPORTS
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
# from sklearn.cross_validation import *
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC, SVR, LinearSVR
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
%matplotlib inline

In [8]:
# INIT
number_samples = [5] + list(range(10, 101, 10)) + [120, 150, 200, 250, 300, 400, 500, 700] + list(range(1000, 4001, 1000))
number_samples = [256, 512, 1024, 2048, 4096, 8192]
# number_samples = [4096]
C_vec = list(np.arange(0.1, 10, 0.5))
max_pus_number, max_sus_number, num_sensors = 15, 1, 225
IS_SENSORS = False
DUMMY_LOC_VALUE, DUMMY_POWER_VALUE = -1, -1

In [9]:
# LOAD DATA
num_columns = (num_sensors if IS_SENSORS else max_pus_number * 3 + 1) + max_sus_number * 3 + 2
cols = [i for i in range(num_columns)]
dataframe = pd.read_csv('../../../java_workspace/research/spectrum_allocation/resources/data/' +
                        'dynamic_pus_using_pus_60000_15PUs_1SUs_square100grid_splat_2020_06_17_21_46.txt', 
                        delimiter=',', header=None, names=cols)
dataframe_max = pd.read_csv('../../../java_workspace/research/spectrum_allocation/resources/data/' +
                            'dynamic_pus_max_power_60000_15PUs_1SUs_square100grid_splat_2020_06_17_21_46.txt', delimiter=',', header=None)

dataframe.reset_index(drop=True, inplace=True)
dataframe_max.reset_index(drop=True, inplace=True)

dataframe_tot = pd.concat([dataframe, dataframe_max.iloc[:, dataframe_max.columns.values[-1]]], axis=1,
                        ignore_index=True)
idx = dataframe_tot[dataframe_tot[dataframe_tot.columns[-1]] == -float('inf')].index
dataframe_tot.drop(idx, inplace=True)

data_reg = np.concatenate((dataframe_tot.values[:, 0:dataframe_tot.shape[1]-3], 
                           dataframe_tot.values[:, dataframe_tot.shape[1]-1:dataframe_tot.shape[1]]), axis=1)
data_reg[data_reg < -90.0] = -90.0
data_class = dataframe_tot.values[:, 0:dataframe_tot.shape[1]-1]
y_class_power = dataframe_tot.values[:, -1]
del dataframe, dataframe_max

In [10]:
data_reg

array([[15.   , 51.   , 15.   , ..., 39.   , 60.   ,  8.127],
       [15.   , 91.   , 15.   , ..., 18.   ,  7.   , -3.86 ],
       [15.   ,  6.   , 46.   , ...,  6.   , 42.   ,  3.581],
       ...,
       [15.   , 21.   , 54.   , ..., 80.   , 49.   ,  2.292],
       [15.   , 98.   , 23.   , ...,  1.   , 52.   , 21.347],
       [15.   , 31.   , 79.   , ..., 87.   , 23.   , 10.367]])

In [5]:
# Splitting data
# def split_data(data, train_samples):
#     num_inputs = data.shape[1] - 1
#     val_samples = round(train_samples/3)
#     X_train, y_train = data[0:train_samples, 0: num_inputs], data[0:train_samples, -1]
#     X_val, y_val = data[train_samples:train_samples+val_samples, 0: num_inputs],data[train_samples:train_samples+val_samples, -1]
#     X_test, y_test = data[train_samples:, 0: num_inputs], data[train_samples:, -1]
#     return X_train,X_val, X_test, y_train, y_val, y_test

def split_data(data: np.ndarray, train_samples):
    num_inputs = (max_sus_number - 1) * 3 + 2 + (max_pus_number * 3 
                                                 if not IS_SENSORS else num_sensors)
    val_samples = round(train_samples/3)
    test_samples = data.shape[0] - val_samples - train_samples
    #init arrays
    X_train = np.ones((train_samples, num_inputs), dtype=float) * DUMMY_LOC_VALUE
    X_val = np.ones((val_samples, num_inputs), dtype=float) * DUMMY_LOC_VALUE
    X_test = np.ones((test_samples, num_inputs), dtype=float) * DUMMY_LOC_VALUE
    # read values
    if not IS_SENSORS:
        # fill train
        for train_sample in range(train_samples):
            num_pus = int(data[train_sample, 0])
            num_sus = int(data[train_sample, 1 + num_pus * 3])
            X_train[train_sample, :num_pus * 3] = data[train_sample, 1:1 + num_pus * 3]#pus
            #sus except power of last su
            X_train[train_sample, max_pus_number * 3: (max_pus_number + num_sus) * 3 
                    - 1] = data[train_sample, 2 + num_pus * 3: 
                                1 + (num_pus + num_sus) * 3]
        # fill validation
        for val_sample in range(train_samples, train_samples + val_samples):
            num_pus = int(data[val_sample, 0])
            num_sus = int(data[val_sample, 1 + num_pus * 3])
            X_val[val_sample - train_samples, :num_pus * 3] = data[val_sample, 1:1 + num_pus * 3]
            X_val[val_sample - train_samples, max_pus_number * 3: 
                  (max_pus_number + num_sus) * 3 - 1] = data[val_sample, 2 + num_pus * 3:
                                                             1 + (num_pus + num_sus) * 3]
        # fill test
        for test_sample in range(train_samples + val_samples, 
                                 train_samples + val_samples + test_samples):
            num_pus = int(data[test_sample, 0])
            num_sus = int(data[test_sample, 1 + num_pus * 3])
            X_test[test_sample - (train_samples + val_samples), :num_pus * 3] = data[
                test_sample, 1:1 + num_pus * 3]
            X_test[test_sample - (train_samples + val_samples), max_pus_number * 3:
                   (max_pus_number + num_sus) * 3 - 1] = data[test_sample, 2 + num_pus * 3:
                                                              1 + (num_pus + num_sus) * 3]
    else:
        # read sensors
        X_train[:, :num_sensors] = data[:train_samples, :num_sensors]
        X_val[:, :num_sensors] = data[train_samples: train_samples + val_samples,
                                         :num_sensors]
        X_test[:, :num_sensors] = data[train_samples + val_samples : , :num_sensors]
        #read sus
        for train_sample in range(train_samples):
            num_sus = int(data[train_sample, num_sensors])
            X_train[train_sample, num_sensors + 1: num_sensors + num_sus * 3] = data[
                train_sample, num_sensors + 1:num_sensors + num_sus * 3]
            
        for val_sample in range(train_samples, train_samples + val_samples):
            num_sus = int(data[val_sample, num_sensors])
            X_val[val_sample - train_samples, num_sensors + 1: num_sensors + num_sus * 3] = data[
                val_sample, num_sensors + 1:num_sensors + num_sus * 3]
            
        for test_sample in range(train_samples + val_samples, 
                                 train_samples + val_samples + test_samples):
            num_sus = int(data[test_sample, num_sensors])
            X_test[test_sample - (train_samples + val_samples), num_sensors + 1:
                   num_sensors + num_sus * 3] = data[test_sample, num_sensors + 1:num_sensors + num_sus * 3]

    
    y_train = data[0 : train_samples, -1]
    y_val = data[train_samples : train_samples + val_samples, -1]
    y_test = data[train_samples + val_samples:, -1]
    return X_train,X_val, X_test, y_train, y_val, y_test

In [11]:
#utils
def false_analysis(y_test, y_pred):
    tp = sum(y_pred[y_test==1])
    fp = sum(y_pred) - tp
    return fp, sum(y_test) - tp

In [12]:
X_train, X_val, X_test, y_train, y_val, y_test = split_data(data_reg, 256)
print(data_reg[0])
print(X_train[0])
print(y_train[0])
print("***")
print(data_reg[256])
print(X_val[0])
print(y_val[0])
print("***")
print(data_reg[256+85])
print(X_test[0])
print(y_test[0])

[ 15.     51.     15.     -3.758  71.     90.    -10.665  19.     22.
 -20.781  32.     77.    -20.519   0.     60.     -6.948  23.      7.
  -7.9    43.     33.    -21.315  28.     74.    -16.492  11.     98.
 -17.556  58.     65.    -13.686  89.     73.     -9.114   4.     30.
 -15.266  16.     62.    -14.142  52.     24.    -17.158  23.     37.
 -23.546   1.     39.     60.      8.127]
[ 51.     15.     -3.758  71.     90.    -10.665  19.     22.    -20.781
  32.     77.    -20.519   0.     60.     -6.948  23.      7.     -7.9
  43.     33.    -21.315  28.     74.    -16.492  11.     98.    -17.556
  58.     65.    -13.686  89.     73.     -9.114   4.     30.    -15.266
  16.     62.    -14.142  52.     24.    -17.158  23.     37.    -23.546
  39.     60.   ]
8.127
***
[ 15.     32.     44.     -7.992  16.      2.    -11.236  59.     49.
 -10.14   85.     72.     -8.34   32.     99.    -15.521  14.     69.
 -16.729  88.     60.     -1.609  50.      8.    -28.241  21.     75.
 -20.51

In [11]:
data_reg

array([[-16.501,  87.   ,  52.   , ...,  10.   ,  34.   , -21.746],
       [ -4.746,  74.   ,  71.   , ...,     nan,     nan,   5.038],
       [-22.775,  97.   ,  57.   , ...,   0.   ,     nan,   2.342],
       ...,
       [ -2.106,   1.   ,  62.   , ...,     nan,     nan,  16.766],
       [-26.324,  77.   ,   2.   , ...,  71.   ,  10.   ,   7.17 ],
       [-23.057,   5.   ,  82.   , ...,     nan,     nan,  14.104]])

In [None]:
## SVM(SVC)
average_class_diff_power = []
fp_mean_power = []
accuracy, f_score, false_positive, false_negative = [], [], [], []
best_c_lst = []
fp_penalty_coef, fn_penalty_coef = 1, 1
metric = "fp_min"  # {"accuracy", "fp_min"}
class_weight = {0:fp_penalty_coef/(fp_penalty_coef + fn_penalty_coef), 1:fn_penalty_coef/(fp_penalty_coef + fn_penalty_coef)}
best_c, bestsvcclassifier = None, None
for number_sample in number_samples:
    best_accuracy = -float('inf')
    best_fp = float('inf')
    X_train, X_val, X_test, y_train, y_val, y_test = split_data(data_class, number_sample)
    scaler_x = StandardScaler()
    scaler_x.fit(X_train)
    X_train = scaler_x.transform(X_train)
    X_val = scaler_x.transform(X_val)
    for c in C_vec:
        svclassifier = SVC(kernel='rbf', C=c, class_weight = class_weight)
        svclassifier.fit(X_train, y_train)
        
        #validating
        y_pred_val = svclassifier.predict(X_val)
        if metric == "accuracy":
            accuracy_tmp = metrics.accuracy_score(y_val, y_pred_val)
            if accuracy_tmp > best_accuracy:
                best_accuracy = accuracy_tmp
                best_c = c
                bestsvcclassifier = svclassifier
        elif metric == "fp_min":
            conf_mat = metrics.confusion_matrix(y_val, y_pred_val)
            fp_tmp = conf_mat[0][1] if len(conf_mat) == 2 else 0
            if fp_tmp < best_fp:
                best_fp = fp_tmp
                best_c = c
                bestsvcclassifier = svclassifier
                    
            
    best_c_lst.append(best_c)
    #predicting
    X_test = scaler_x.transform(X_test)
    y_pred = bestsvcclassifier.predict(X_test)
    
    #evaluating
    accuracy.append(round(metrics.accuracy_score(y_test, y_pred), 3))
    f_score.append(round(metrics.f1_score(y_true=y_test, y_pred=y_pred), 3))
    fp, fn = false_analysis(y_test, y_pred)
    false_positive.append(int(fp))
    false_negative.append(int(fn))
    
    #Power max calculations
    y_class_power_test = y_class_power[len(y_class_power)-X_test.shape[0]:]
    y_class_power_pred = np.zeros(y_class_power_test.size)
    max_power = max(y_class_power_test) + 10  # 10 is added to increase higher bound
    min_power = min(y_class_power_test) - 10  # 10 is subtracted to decrease lower bound
    for i in range(len(y_class_power_test)):
        h = max_power
        l = min_power
        while h - l > 0.5:
            mid = l + (h - l)/2;
            mid_norm = (mid - scaler_x.mean_[-1])/scaler_x.scale_[-1]
            X_test[i][-1] = mid_norm
            res_tmp = bestsvcclassifier.predict(X_test[i:i+1])
            if res_tmp[0]:
                l = mid
            else:
                h = mid
        y_class_power_pred[i] = l + (h - l)/2
    average_class_diff_power.append(round(np.mean(np.absolute(y_class_power_pred - y_class_power_test)), 3))
    fp_samples = np.zeros(len(y_class_power_pred), dtype=float)
    fp_samples[y_class_power_pred > y_class_power_test] = (y_class_power_pred - y_class_power_test)[y_class_power_pred > 
                                                                                                    y_class_power_test]
    fp_mean_power.append(round(np.mean(fp_samples), 3))
    print('Number_samples:', number_sample, ', accuracy:', accuracy[-1], ', f_score:', f_score[-1], 
          ', fp:', fp,', fn:', fn, ', error:', average_class_diff_power[-1], 'fp_error:', fp_mean_power[-1])
del svclassifier

In [None]:
print(average_class_diff_power)
print(fp_mean_power)

In [None]:
### MAX_POWER ANAlysis
fig = plt.figure(figsize=(15,8))
ax = fig.add_subplot(1,1,1)
plt.plot(number_samples, accuracy)
plt.xlabel('# training samples')
plt.ylabel('%')
plt.title('SVM: Classification Accuracy(Dynamic PUs, Using PUs, Test_size=40k)')
plt.grid(True)
plt.savefig('ML\\results\\changing_training_test40k_4kx4k_smallVal_compare_dynamicPUS_svmAcc.png')

fig = plt.figure(figsize=(15,8))
ax = fig.add_subplot(1,1,1)
plt.plot(number_samples, f_score)
plt.xlabel('# training samples')
plt.ylabel('%')
plt.title('SVM: Classification F_score(Dynamic PUs, Using PUs, Test_size=40k)')
plt.grid(True)
plt.savefig('ML\\results\\changing_training_test40k_4kx4k_smallVal_compare_dynamicPUS_svmfscore.png')

fig = plt.figure(figsize=(15,8))
ax = fig.add_subplot(1,1,1)
plt.plot(number_samples, false_positive)
plt.xlabel('# training samples')
plt.ylabel('#')
plt.title('SVM: Classification FP(Dynamic PUs, Using PUs, Test_size=40k)')
plt.grid(True)
plt.savefig('ML\\results\\changing_training_test40k_4kx4k_smallVal_compare_dynamicPUS_svmfp.png')

fig = plt.figure(figsize=(15,8))
ax = fig.add_subplot(1,1,1)
plt.plot(number_samples, false_negative)
plt.xlabel('# training samples')
plt.ylabel('#')
plt.title('SVM: Classification FN(Dynamic PUs, Using PUs, Test_size=40k)')
plt.grid(True)
plt.savefig('ML\\results\\changing_training_test40k_4kx4k_smallVal_compare_dynamicPUS_svmfn.png')

In [13]:
# SVR
average_reg_diff_power, best_c_reg_lst, fp_mean_power = [], [], []
best_c_reg, bestsvrclassifier =  None, None
# TODO: Having different penalties for fp and fn
for number_sample in number_samples:
    min_err = float('inf')
    X_train, X_val, X_test, y_train, y_val, y_test = split_data(data_reg, number_sample)
    scaler_x = StandardScaler()
    scaler_x.fit(X_train)
    X_train = scaler_x.transform(X_train)
    X_val = scaler_x.transform(X_val)
    for c in C_vec:
        svrlassifier = SVR(kernel='rbf', C=c, degree=X_train.shape[1]+1)
        svrlassifier.fit(X_train, y_train)
        
        #validating
        y_pred_val = svrlassifier.predict(X_val)
        err_tmp = np.mean(np.absolute(y_pred_val - y_val))
        if err_tmp < min_err:
            min_err = err_tmp
            best_c_reg = c
            bestsvrclassifier = svrlassifier
            
    best_c_reg_lst.append(best_c_reg)
    #predicting
    X_test = scaler_x.transform(X_test)
    y_pred = bestsvrclassifier.predict(X_test)
    
    
    #evaluating
    average_reg_diff_power.append(round(np.mean(np.absolute(y_test - y_pred)), 3))
    fp_samples = np.zeros(len(y_test), dtype=float)
    fp_samples[y_pred > y_test] = (y_pred - y_test)[y_pred > y_test]
    fp_mean_power.append(round(np.mean(fp_samples), 3))
    print('Number_samples: ', number_sample, ' error: ', average_reg_diff_power[-1], 
          ', fp_error:', fp_mean_power[-1])

Number_samples:  256  error:  6.83 , fp_error: 3.796
Number_samples:  512  error:  6.782 , fp_error: 3.738
Number_samples:  1024  error:  6.731 , fp_error: 3.5
Number_samples:  2048  error:  6.696 , fp_error: 3.455
Number_samples:  4096  error:  6.601 , fp_error: 3.358
Number_samples:  8192  error:  6.479 , fp_error: 3.293


In [None]:
# Linear SVR
average_reg_diff_power, best_c_reg_lst, fp_mean_power = [], [], []
best_c_reg, bestsvrclassifier =  None, None
for number_sample in number_samples:
    min_err = float('inf')
    X_train, X_val, X_test, y_train, y_val, y_test = split_data(data_reg, number_sample)
    scaler_x = StandardScaler()
    scaler_x.fit(X_train)
    X_train = scaler_x.transform(X_train)
    X_val = scaler_x.transform(X_val)
    for c in C_vec:
        svrlassifier = LinearSVR(C=c, loss='epsilon_insensitive')
        svrlassifier.fit(X_train, y_train)
        
        #validating
        y_pred_val = svrlassifier.predict(X_val)
        err_tmp = np.mean(np.absolute(y_pred_val - y_val))
        if err_tmp < min_err:
            min_err = err_tmp
            best_c_reg = c
            bestsvrclassifier = svrlassifier
            
    best_c_reg_lst.append(best_c_reg)
    #predicting
    X_test = scaler_x.transform(X_test)
    y_pred = bestsvrclassifier.predict(X_test)
    
    
    #evaluating
    average_reg_diff_power.append(round(np.mean(np.absolute(y_test - y_pred)), 3))
    fp_samples = np.zeros(len(y_test), dtype=float)
    fp_samples[y_pred > y_test] = (y_pred - y_test)[y_pred > y_test]
    fp_mean_power.append(round(np.mean(fp_samples), 3))
    print('Number_samples: ', number_sample, ' error: ', average_reg_diff_power[-1], ', fp_error:', fp_mean_power[-1])

In [8]:
print(average_reg_diff_power)
print(fp_mean_power)

[6.905, 6.867, 6.803, 6.769, 6.728, 6.607]
[3.3, 3.223, 3.399, 3.284, 3.185, 3.195]


In [None]:
fig = plt.figure(figsize=(15,8))
ax = fig.add_subplot(1,1,1)
plt.plot(number_samples, average_class_diff_power)
plt.plot(number_samples, average_reg_diff_power, 'r--')
plt.xlabel('# training samples')
plt.ylabel('Diff(dB)')
plt.title('Average absolute difference power(Dynamic PUs, Using PUs, Test_size=40k)')
plt.grid(True)

ax.set_yticks(np.arange(0,20, 2))
ax.set_ylim([2,20])
ax.set_xticks(np.arange(5,4100, 500))
# plt.grid(which='minor')
# plt.text(40, 50, '# Validation = 34k')
# plt.text(400, 45, '# Test = 34k')
plt.legend(['SVM', 'SVR'])
plt.savefig('ML\\results\\changing_training_test40k_4kx4k_smallVal_compare_dynamicPUS_averag_powerSVMSVR.png')

In [None]:
average_reg_diff_power_tot = []


In [None]:
print(number_samples)

In [None]:
average_reg_diff_power_tot.append(average_reg_diff_power)

In [None]:
number_samples = [5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 120, 150, 200, 250, 300, 400, 500, 700, 1000, 2000, 3000, 4000]
average_reg_diff_power_tot = [[4.91, 4.915, 5.02, 7.274, 5.21, 5.476, 5.547, 5.448, 5.358, 5.486, 5.424, 5.585, 5.423, 5.369,
                               5.275, 5.22, 5.065, 5.058, 4.915, 4.873, 4.8, 4.787, 4.782], 
                              [4.739, 4.728, 4.858, 4.9, 4.761, 4.835, 4.898, 4.959, 4.806, 4.913, 4.945, 4.88, 4.8, 4.812,
                               4.756, 4.74, 4.764, 4.832, 4.729, 4.745, 4.728, 4.728, 4.731], 
                              [5.59, 5.33, 4.764, 4.731, 4.728, 4.923, 5.086, 4.798, 4.816, 5.035, 4.827, 4.842, 4.757, 4.796,
                               4.734, 4.748, 4.74, 4.734, 4.728, 4.729, 4.726, 4.726, 4.728], 
                              [4.791, 4.781, 4.732, 5.677, 4.757, 4.837, 4.841, 5.242, 4.81, 4.955, 4.972, 5.106, 4.801, 4.754,
                               4.764, 4.748, 4.757, 4.763, 4.737, 4.735, 4.737, 4.74, 4.744]]

In [None]:
fig = plt.figure(figsize=(15,8))
ax = fig.add_subplot(1,1,1)
plt.plot(number_samples, average_reg_diff_power_tot[0])
plt.plot(number_samples, average_reg_diff_power_tot[1], 'r--')
plt.plot(number_samples, average_reg_diff_power_tot[2], 'g.-')
plt.plot(number_samples, average_reg_diff_power_tot[3], 'y->')
plt.xlabel('# training samples')
plt.ylabel('Diff(dB)')
plt.title('Average absolute difference power(Dynamic PUs, Using PUs, Test_size=40k)')
plt.grid(True)

ax.set_yticks(np.arange(0,8, 2))
ax.set_ylim([2,8])
ax.set_xticks(np.arange(5,4100, 500))
# plt.grid(which='minor')
# plt.text(40, 50, '# Validation = 34k')
# plt.text(400, 45, '# Test = 34k')
plt.legend(['linear', 'rbf', 'poly', 'sigmoid'])
# plt.savefig('ML\\results\\changing_training_test40k_4kx4k_smallVal_compare_dynamicPUS_averag_powerSVMSVR.png')