In [1]:
# -*- coding: utf-8 -*-
"""
Created on Wed Jun 19 18:29:14 2024

@author: l00416959
"""

import numpy as np
import itertools

cfg_path = '../Dataset0/Dataset0CfgData1.txt'
inputdata_path = '../Dataset0/Dataset0InputData1.txt'

# func to read in slices
def read_slice_of_file(file_path, start, end):
    with open(file_path, 'r') as file:
        # use itertools.islice to get slices
        slice_lines = list(itertools.islice(file, start, end))
    return slice_lines

# read RoundYCfgDataX.txt 
slice_lines = read_slice_of_file(cfg_path, 1, 6)
info = np.loadtxt(slice_lines)
tol_samp_num = int(info[0])
port_num = int(info[2])
ant_num = int(info[3])
sc_num = int(info[4])

# read RoundYInputDataX. in slices 
H = []
slice_samp_num = 1000   #number of samples
slice_num = int(tol_samp_num / slice_samp_num) #number of slices
for slice_idx in range(slice_num):
    print(slice_idx)
    slice_lines = read_slice_of_file(inputdata_path, slice_idx * slice_samp_num, (slice_idx + 1) * slice_samp_num)
    Htmp = np.loadtxt(slice_lines)
    Htmp = np.reshape(Htmp, (slice_samp_num, 2, sc_num, ant_num, port_num))
    Htmp = Htmp[:, 0, :, :, :] + 1j*Htmp[:, 1, :, :, :]
    Htmp = np.transpose(Htmp, (0,3,2,1))
    
    if np.size(H) == 0:
        H = Htmp
    else:
        H = np.concatenate((H, Htmp), axis=0)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19


In [7]:
for i in range(4):
    print(np.size(H, i))

20000
2
64
408


In [18]:
len(H[0, 0, 0, :])

408

In [19]:
H[0][0][0][0]

np.complex128(0.734228+0.770911j)

In [5]:
print(H[0])

[[[ 0.734228+0.770911j  1.256584-0.088519j  0.553896-1.088371j ...
    0.066881+1.217681j  1.111822+0.595838j  0.98297 -0.408424j]
  [ 0.781669+0.652409j  1.038199+0.020232j  0.749548-0.974443j ...
    0.025998+0.994898j  0.922855+0.757614j  1.112302-0.401542j]
  [ 0.827215+0.791147j  0.943926-0.076186j  0.750974-0.716739j ...
    0.137272+0.976557j  0.705829+0.680568j  1.161513-0.168052j]
  ...
  [-0.335698+0.27306j   0.109357+0.469059j  0.337797+0.137317j ...
   -0.473159+0.072984j -0.141195+0.365209j  0.067999+0.326774j]
  [-0.278269+0.152633j -0.09328 +0.435103j  0.350276+0.257186j ...
   -0.408211-0.087382j -0.280637+0.363858j  0.106142+0.326327j]
  [-0.246592+0.179119j -0.151702+0.283239j  0.218753+0.372924j ...
   -0.273529-0.131017j -0.384274+0.181545j  0.015319+0.41183j ]]

 [[ 0.893547+0.618462j  0.901772-0.309352j  0.459753-0.789159j ...
    0.334546+0.927889j  0.78556 +0.408464j  0.965627-0.322806j]
  [ 0.742873+0.778431j  1.089285-0.212112j  0.453431-0.846259j ...
    0.19

In [2]:
import numpy as np

def extract_features(H, n_components=50):
    """
    Extract spatial and frequency features from the complex channel matrix H using Complex PCA.
    
    Parameters:
        H (numpy.ndarray): Complex channel matrix with shape (samples, 64, 2, 408).
        n_components (int): Number of dimensions to reduce to using PCA.
        
    Returns:
        numpy.ndarray: Reduced feature representation (samples, n_components).
    """
    num_samples, num_antennas, num_user_antennas, num_subcarriers = H.shape

    # Step 1: Compute spatial covariance matrix
    spatial_features = []
    for sample in H:
        # Combine user antenna dimensions into a single matrix (64, 816)
        reshaped_sample = sample.reshape(num_antennas, -1)  # Shape: (64, 816)
        # Compute spatial covariance (64x64)
        spatial_cov = np.matmul(reshaped_sample, reshaped_sample.conj().T)
        spatial_features.append(spatial_cov.flatten())  # Flatten into 1D array

    spatial_features = np.array(spatial_features)  # Shape: (samples, 64*64)

    # Step 2: Compute frequency domain features
    frequency_features = []
    for sample in H:
        # Average across antennas and user antennas for each subcarrier (408)
        freq_avg = np.mean(np.abs(sample), axis=(0, 1))  # Shape: (408,)
        frequency_features.append(freq_avg)

    frequency_features = np.array(frequency_features)  # Shape: (samples, 408)

    # Step 3: Combine spatial and frequency features
    combined_features = np.hstack((spatial_features, frequency_features))  # Shape: (samples, 64*64 + 408)

    # Step 4: Apply Complex PCA for dimensionality reduction
    # Compute covariance matrix of the combined features
    cov_matrix = np.cov(combined_features.T)  # Shape: (features, features)
    # Compute eigenvalues and eigenvectors
    eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
    # Sort by eigenvalues in descending order
    idx = eigenvalues.argsort()[::-1]
    eigenvectors = eigenvectors[:, idx]
    # Select the top n_components eigenvectors
    principal_components = eigenvectors[:, :n_components]  # Shape: (features, n_components)
    # Project data onto the principal components
    reduced_features = np.dot(combined_features, principal_components)  # Shape: (samples, n_components)

    return reduced_features


In [3]:
test_reduced = extract_features(H)

In [4]:
import numpy as np

def convert_to_real_amplitude_phase(H):
    """
    Convert complex data to a real-valued matrix using amplitude and phase.
    
    Parameters:
        H (numpy.ndarray): Complex-valued matrix with shape (samples, features).
        
    Returns:
        numpy.ndarray: Real-valued matrix with shape (samples, features * 2).
    """
    amplitude = np.abs(H)  # 振幅
    phase = np.angle(H)    # 相位
    return np.hstack((amplitude, phase))

# 使用示例
real_features = convert_to_real_amplitude_phase(test_reduced)


In [9]:
def compute_distance_matrix_in_batches(features, batch_size=1000):
    """
    分批次计算距离矩阵，以节省内存。
    
    Parameters:
        features (numpy.ndarray): 高维特征矩阵，形状为 (samples, n_features)。
        batch_size (int): 每个批次的大小，用于控制内存使用。
        
    Returns:
        numpy.ndarray: 计算后的距离矩阵，形状为 (samples, samples)。
    """
    n_samples = features.shape[0]
    distance_matrix = np.zeros((n_samples, n_samples), dtype=np.float64)

    # 逐块计算距离
    for i in range(0, n_samples, batch_size):
        end_i = min(i + batch_size, n_samples)
        for j in range(0, n_samples, batch_size):
            end_j = min(j + batch_size, n_samples)
            # 计算 i 和 j 块之间的距离
            block_distances = np.linalg.norm(features[i:end_i, np.newaxis] - features[j:end_j], axis=2)
            distance_matrix[i:end_i, j:end_j] = block_distances

    return distance_matrix

def reduce_dimension_mds(features, n_components=2, batch_size=1000):
    # 计算距离矩阵
    distance_matrix = compute_distance_matrix_in_batches(features, batch_size=batch_size)

    # 使用 MDS 降维
    mds = MDS(n_components=n_components, dissimilarity="precomputed", random_state=0)
    low_dim_coords = mds.fit_transform(distance_matrix)
    
    return low_dim_coords



In [10]:
low_dim_coords = reduce_dimension_mds(real_features)

KeyboardInterrupt: 

In [6]:
from sklearn.utils.extmath import randomized_svd
import numpy as np

def complex_to_real_matrix(H):
    """
    Convert a complex matrix into a doubled real-valued matrix.
    Parameters:
        H (numpy.ndarray): Complex-valued input matrix.
    Returns:
        numpy.ndarray: Real-valued doubled matrix.
    """
    real_part = np.real(H)
    imag_part = np.imag(H)
    H_real = np.block([
        [real_part, -imag_part],
        [imag_part, real_part]
    ])
    return H_real

def extract_features(H):
    """
    Extract spatial and frequency features from the channel matrix H.
    Parameters:
        H (numpy.ndarray): Channel matrix with shape (samples, 64, 2, 408)
    Returns:
        numpy.ndarray: Reduced feature representation (samples, reduced_dim)
    """
    num_samples, num_antennas, num_user_antennas, num_subcarriers = H.shape

    # Step 1: Compute spatial covariance matrix
    spatial_features = []
    for sample in H:
        # Combine user antenna dimensions into a single matrix (64, 816)
        reshaped_sample = sample.reshape(num_antennas, -1)
        # Compute spatial covariance (64x64)
        spatial_cov = np.matmul(reshaped_sample, reshaped_sample.conj().T)
        # Flatten spatial covariance matrix to get the spatial features
        spatial_features.append(spatial_cov.flatten())  # Or use PCA here

    spatial_features = np.array(spatial_features)

    # Step 2: Compute frequency domain features
    frequency_features = []
    for sample in H:
        # Average across antennas and user antennas for each subcarrier (408)
        freq_avg = np.mean(np.abs(sample), axis=(0, 1))
        frequency_features.append(freq_avg)

    frequency_features = np.array(frequency_features)

    # Step 3: Combine spatial and frequency features
    combined_features = np.hstack((spatial_features, frequency_features))

    # Step 4: Convert the combined features to real-valued matrix for SVD
    combined_features_real = complex_to_real_matrix(combined_features)

    # Step 5: Apply Randomized SVD for dimensionality reduction
    U, Sigma, VT = randomized_svd(combined_features_real, n_components=50, random_state=42)

    # Step 6: Reconstruct reduced features using U and Sigma
    reduced_features = np.dot(U, np.diag(Sigma))

    return reduced_features

In [7]:
test2_reduced = extract_features(H)

In [10]:
import numpy as np
from sklearn.manifold import TSNE

def complex_tsne(H, n_components=2):
    """
    Apply t-SNE for dimensionality reduction on complex data.
    Parameters:
        H (numpy.ndarray): Complex-valued input data of shape (samples, 64, 2, 408).
        n_components (int): Number of components to retain (typically 2 for 2D visualization).
    Returns:
        numpy.ndarray: Reduced feature representation (samples, n_components).
    """
    # Step 1: Reshape the complex data into a 2D matrix
    num_samples, num_antennas, num_user_antennas, num_subcarriers = H.shape
    reshaped_H = H.reshape(num_samples, -1)  # Reshape to (samples, 64*2*408)

    # Step 2: Convert the complex data into real-valued data by separating real and imaginary parts
    # Concatenate real and imaginary parts of H to form a real-valued matrix
    real_part = np.real(reshaped_H)
    imag_part = np.imag(reshaped_H)
    combined_real_data = np.concatenate((real_part, imag_part), axis=1)  # Shape: (samples, 2*64*2*408)

    # Step 3: Apply t-SNE for dimensionality reduction
    # We use TSNE to reduce the dimensionality while preserving the structure of the data
    tsne = TSNE(n_components=n_components, random_state=42)
    reduced_features = tsne.fit_transform(combined_real_data)

    return reduced_features

In [11]:
test3 = complex_tsne(H)

: 