In [2]:
import pandas as pd

# Read matrix data from text files into DataFrames.
# Set delim_whitespace=True since the numbers are whitespace separated, and header=None because there is no header.
df1 = pd.read_csv("Matrix_Data.txt", delim_whitespace=True, header=None)
df2 = pd.read_csv("Observed_Signal.txt", delim_whitespace=True, header=None)


# Display the shape (rows, columns) of each matrix.
print("Matrix1 dimensions:", df1.shape)  # (rows, columns)
print("Matrix2 dimensions:", df2.shape)

# Find matching rows between the two DataFrames.
# Here, we compare each row (converted to a list) in df1 with each row in df2.
match_rows = []
for i in range(len(df1)):
    row1 = df1.iloc[i].tolist()
    for j in range(len(df2)):
        row2 = df2.iloc[j].tolist()
        if row1 == row2:
            match_rows.append((i, j))
print("Matching rows (df1 row index, df2 row index):", match_rows)

# Find matching columns between the two DataFrames.
# We iterate over each column, converting it to a list.
match_columns = []
for i in range(df1.shape[1]):
    col1 = df1.iloc[:, i].tolist()
    for j in range(df2.shape[1]):
        col2 = df2.iloc[:, j].tolist()
        if col1 == col2:
            match_columns.append((i, j))
print("Matching columns (df1 column index, df2 column index):", match_columns)


Matrix1 dimensions: (50, 100)
Matrix2 dimensions: (50, 1)
Matching rows (df1 row index, df2 row index): []
Matching columns (df1 column index, df2 column index): []


  df1 = pd.read_csv("Matrix_Data.txt", delim_whitespace=True, header=None)
  df2 = pd.read_csv("Observed_Signal.txt", delim_whitespace=True, header=None)


In [7]:
import pandas as pd
import numpy as np

# Load the matrix data. Assumes whitespace-separated values and no header.
matrix_df = pd.read_csv("Matrix_Data.txt", delim_whitespace=True, header=None)

# Load the observed signal. Assumes the file contains a single column of data.
# If the observed signal file contains a single row instead, you may need to transpose it.
observed_df = pd.read_csv("Observed_Signal.txt", delim_whitespace=True, header=None)
# Convert observed signal to a 1D numpy array.
observed_signal = observed_df.squeeze().to_numpy()

# Check that the observed signal has the same number of rows as the matrix.
if len(observed_signal) != matrix_df.shape[0]:
    raise ValueError("The observed signal length must match the number of rows in the matrix.")

# Compute the Euclidean distance between the observed signal and each column in the matrix.
distances = {}
for col in matrix_df.columns:
    col_vector = matrix_df[col].to_numpy()
    # Compute Euclidean distance.
    distance = np.linalg.norm(col_vector - observed_signal)
    distances[col] = distance

# Sort the columns by the computed distance (smallest distance first)
sorted_columns = sorted(distances, key=distances.get)
closest_four = sorted_columns[:4]

print("The 4 columns closest to the observed signal are:")
for col in closest_four:
    print(f"Column {col} with distance {distances[col]:.4f}")


The 4 columns closest to the observed signal are:
Column 50 with distance 30.3355
Column 5 with distance 33.1938
Column 23 with distance 33.6027
Column 3 with distance 33.6291


  matrix_df = pd.read_csv("Matrix_Data.txt", delim_whitespace=True, header=None)
  observed_df = pd.read_csv("Observed_Signal.txt", delim_whitespace=True, header=None)


In [8]:
import pandas as pd
import numpy as np
from concurrent.futures import ThreadPoolExecutor

# Load the matrix data
matrix_df = pd.read_csv("Matrix_Data.txt", delim_whitespace=True, header=None)

# Load the observed signal
observed_df = pd.read_csv("Observed_Signal.txt", delim_whitespace=True, header=None)
observed_signal = observed_df.squeeze().to_numpy()

# Validate dimensions
if len(observed_signal) != matrix_df.shape[0]:
    raise ValueError("The observed signal length must match the number of rows in the matrix.")

# Function to compute Euclidean distance for a column
def compute_distance(col):
    col_vector = matrix_df[col].to_numpy()
    return col, np.linalg.norm(col_vector - observed_signal)

# Use multithreading to compute distances
distances = {}
with ThreadPoolExecutor() as executor:
    results = executor.map(compute_distance, matrix_df.columns)
    distances = dict(results)  # Collect results in a dictionary

# Sort the distances and get the top 4 closest columns
sorted_columns = sorted(distances, key=distances.get)
closest_four = sorted_columns[:4]

# Output the results
print("The 4 columns closest to the observed signal are:")
for col in closest_four:
    print(f"Column {col} with distance {distances[col]:.4f}")


The 4 columns closest to the observed signal are:
Column 50 with distance 30.3355
Column 5 with distance 33.1938
Column 23 with distance 33.6027
Column 3 with distance 33.6291


  matrix_df = pd.read_csv("Matrix_Data.txt", delim_whitespace=True, header=None)
  observed_df = pd.read_csv("Observed_Signal.txt", delim_whitespace=True, header=None)
