# Generate datasets

See procedures.txt for furthermore information.

## Imports

In [None]:
import sys
sys.path.append("../python/")

import numpy as np
import matplotlib.pyplot as plt
import scipy.signal as sig
import pandas as pd
import utils
from pathlib import Path
from joblib import Parallel, delayed
from time import time
from IPython.display import display, clear_output

## Global parameters


In [None]:
dirDatasets = Path("../datasets/")
nJobs       = -1
P           = 1 # Users power
n_pilot     = 300
n_eve       = 1
n_trials    = 100
range_Pe    = np.arange(0, 2.51, 0.5)
columns     = ["n_users", "snr", "E", "eta", "Pe", "target"]

np.random.seed(0)

### Balancing the cases without pilot contamination

There should be the same number of cases with and without PC

In [None]:
range_Pe = np.concatenate((np.zeros(np.count_nonzero(range_Pe>0) - np.count_nonzero(range_Pe==0)), range_Pe))
print(range_Pe)

## Functions

### Generate channels

In [None]:
def gen_channels(n_antennas, n_users, n_eve):
    # Authentic users' channels
    H = np.sqrt(0.5)*(np.random.normal(0, 1, size=(n_antennas, n_users))
                      + 1j*np.random.normal(0, 1, size=(n_antennas, n_users)))
    
    # Eavesdropper channel
    g = np.sqrt(0.5)*(np.random.normal(0, 1, size=(n_antennas, n_eve))
                      + 1j*np.random.normal(0, 1, size=(n_antennas, n_eve)))
    
    return H, g

### Simulate uplink

In [None]:
def simulate_uplink(n_pilot, n_antennas, n_users, n_eve, Pe, snr):
    
    # Generate channels
    Haut, g = gen_channels(int(n_antennas), int(n_users), int(n_eve))
    
    # Generate QPSK pilot symbols at the users
    b = np.random.choice([0, 1], 2*n_pilot*n_users) # The 2 is because a QPSK symbols requires 2 bits
    s = utils.qpskmodulator(b)
    xp = s.reshape(n_users, n_pilot) # Row k corresponds to the symbols of the k-th user

    # generate pilot signal at the eavesdropper:
    xpe = np.sqrt(Pe)*xp[0, :] # xp[0, :] corresponds to the pilot sequence of the first user
    
    # Concatenate signals and channels to simulate transmission
    xptx = np.concatenate((xp, [xpe]))
    H    = np.concatenate((Haut, g), axis=1) 
    
    # Transmission
    Y = np.dot(H, xptx) # Fading
    Y = utils.awgn(Y, SNR=snr) # Additive white Gaussian Noise    
    
    return Y, xp, H[:,0]

### Channel estimation

In [None]:
def channel_estimation(Y, xp):
    Hest = np.matmul(
        np.matmul(Y, np.conjugate(xp).T), 
        np.linalg.lstsq(
            np.matmul(xp, np.conjugate(xp).T), 
            np.eye(
                np.matmul(xp, np.conjugate(xp).T).shape[0], 
                np.matmul(xp, np.conjugate(xp).T).shape[0]
            ),
            rcond=None
        )[0]
    )
    return Hest[:,0]

### Channel estimate energy and threshold

In [None]:
def channel_energy(h, snr, n_antennas, n_pilot):
    N0       = 1/(10**(snr/10))
    sovertau = n_antennas*N0/n_pilot
    ln       = np.log((2+sovertau)/(1+sovertau))
    eta      = ((1 + sovertau)*(2+sovertau)*ln).real
    E        = (np.matmul(np.conjugate(h).T, h)/n_antennas).real
    return E, eta

### Generate sample

In [None]:
def generate_sample(n_pilot, n_antennas, n_users, Pe, snr, n_eve, csv_path):
    
    # Uplink
    Y, xp, h = simulate_uplink(n_pilot, n_antennas, n_users, n_eve, Pe, snr)
    
    # Channel Estimation
    hest = channel_estimation(Y, xp)
    
    # Energy and threshold for hypothesys test
    E, eta = channel_energy(hest, snr, n_antennas, n_pilot)
    
    # Create label for sample
    target = True if Pe else False
    
    # Save new row in the CSV
    linhaNovaCSV = pd.DataFrame([[
        n_users,
        snr,
        E,
        eta,
        Pe, 
        target
    ]]).to_csv(csv_path, mode="a", header=False, index=False)

## Procedure 1 training dataset

### Parameters

In [None]:
n_antennas  = 256
range_snr   = np.arange(-10, 31, 5)
range_users = np.concatenate(([1], np.arange(16, 257, 16)))

### Generating samples

In [None]:
# Initiating the CSV to save data
current_iteration = 1
total_iterations  = len(range_users) * len(range_snr) * len(range_Pe)
df_path           = dirDatasets.joinpath("procedure_1_train.csv")
df                = pd.DataFrame(columns=columns).to_csv(df_path, index=False)
    
for n_users in range_users:
    for Pe in range_Pe:
        for snr in range_snr:

            # Parallel processing to generate samples
            Parallel(n_jobs=nJobs, verbose=0)(
                delayed(generate_sample)(
                    n_pilot, 
                    n_antennas, 
                    n_users, 
                    Pe, 
                    snr, 
                    n_eve, 
                    df_path) for trial in range(n_trials))

            # Print information about current iteration
            printStr  = "Number of antennas: " + str(n_antennas) + "\n"
            printStr += "Number of users:    " + str(n_users) + "\n"                
            printStr += "Eve power, Pe:      " + str(Pe) + "\n"
            printStr += "SNR:                " + str(snr) + "\n"
            printStr += "Progress:           " + str(100*(current_iteration/total_iterations))[:7] + "%"
            clear_output(wait=True)
            print(printStr)
            current_iteration += 1

## Procedure 1 test datasets

### Test 1.1 - SNR influence over fixed number of antennas and users

### Parameters

In [None]:
n_antennas  = 256
range_snr   = np.arange(-10, 31, 1)
range_users = np.array([64])

### Generating samples

In [None]:
# Initiating the CSV to save data
current_iteration = 1
total_iterations  = len(range_users) * len(range_snr) * len(range_Pe)
df_path           = dirDatasets.joinpath("procedure_1_test_1.csv")
df                = pd.DataFrame(columns=columns).to_csv(df_path, index=False)
    
for n_users in range_users:
    for Pe in range_Pe:
        for snr in range_snr:

            # Parallel processing to generate samples
            Parallel(n_jobs=nJobs, verbose=0)(
                delayed(generate_sample)(
                    n_pilot, 
                    n_antennas, 
                    n_users, 
                    Pe, 
                    snr, 
                    n_eve, 
                    df_path) for trial in range(n_trials))

            # Print information about current iteration
            printStr  = "Number of antennas: " + str(n_antennas) + "\n"
            printStr += "Number of users:    " + str(n_users) + "\n"                
            printStr += "Eve power, Pe:      " + str(Pe) + "\n"
            printStr += "SNR:                " + str(snr) + "\n"
            printStr += "Progress:           " + str(100*(current_iteration/total_iterations))[:7] + "%"
            clear_output(wait=True)
            print(printStr)
            current_iteration += 1

### Test 1.2 - number of users influence over fixed number of antennas and SNR

### Parameters

In [None]:
n_antennas  = 256
range_snr   = np.array([10])
range_users = np.concatenate(([1], np.arange(4, 257, 4)))

### Generating samples

In [None]:
# Initiating the CSV to save data
current_iteration = 1
total_iterations  = len(range_users) * len(range_snr) * len(range_Pe)
df_path           = dirDatasets.joinpath("procedure_1_test_2_aaa.csv")
df                = pd.DataFrame(columns=columns).to_csv(df_path, index=False)
    
for n_users in range_users:
    for Pe in range_Pe:
        for snr in range_snr:

            # Parallel processing to generate samples
            Parallel(n_jobs=nJobs, verbose=0)(
                delayed(generate_sample)(
                    n_pilot, 
                    n_antennas, 
                    n_users, 
                    Pe, 
                    snr, 
                    n_eve, 
                    df_path) for trial in range(n_trials))

            # Print information about current iteration
            printStr  = "Number of antennas: " + str(n_antennas) + "\n"
            printStr += "Number of users:    " + str(n_users) + "\n"                
            printStr += "Eve power, Pe:      " + str(Pe) + "\n"
            printStr += "SNR:                " + str(snr) + "\n"
            printStr += "Progress:           " + str(100*(current_iteration/total_iterations))[:7] + "%"
            clear_output(wait=True)
            print(printStr)
            current_iteration += 1

## Procedure 2 datasets

### Parameters

In [None]:
range_antennas = np.arange(16, 241, 16)
range_snr      = np.arange(-10, 31, 5)
range_users    = np.concatenate(([1], np.arange(16, 257, 16)))

### Generating datasets

In [None]:
# Initiating the CSV to save data
current_iteration = 1
total_iterations  = len(range_antennas) * len(range_users) * len(range_snr) * len(range_Pe)

# One dataset for each number of antennas
for n_antennas in range_antennas:
    df_path = dirDatasets.joinpath("procedure_2_" + str(n_antennas) + "_antennas.csv")
    df      = pd.DataFrame(columns=columns).to_csv(df_path, index=False)

    for n_users in range_users:
        for Pe in range_Pe:
            for snr in range_snr:

                # Parallel processing to generate samples
                Parallel(n_jobs=nJobs, verbose=0)(
                    delayed(generate_sample)(
                        n_pilot, 
                        n_antennas, 
                        n_users, 
                        Pe, 
                        snr, 
                        n_eve, 
                        df_path) for trial in range(n_trials))

                # Print information about current iteration
                printStr  = "Number of antennas: " + str(n_antennas) + "\n"
                printStr += "Number of users:    " + str(n_users) + "\n"                
                printStr += "Eve power, Pe:      " + str(Pe) + "\n"
                printStr += "SNR:                " + str(snr) + "\n"
                printStr += "Progress:           " + str(100*(current_iteration/total_iterations))[:7] + "%"
                clear_output(wait=True)
                print(printStr)
                current_iteration += 1