In [25]:
import pandas as pd
import numpy as np

In [None]:
# Load train and test data
df_train = pd.read_csv('ACSF1_TRAIN.tsv', sep='\t', header=None)
df_test = pd.read_csv('ACSF1_TEST.tsv', sep='\t', header=None)

In [27]:
#  Separate labels and data of train 
labels_train = df_train.iloc[:, 0].values        
data_train = df_train.iloc[:, 1:]     
print("train Labels shape:", labels_train.shape)
print("train Data shape: ", data_train.shape)

train Labels shape: (100,)
train Data shape:  (100, 1460)


In [28]:
#  Separate labels and data of test 
labels_test = df_test.iloc[:, 0].values        
data_test = df_test.iloc[:, 1:]     
print("test Labels shape:", labels_test.shape)
print("test Data shape: ", data_test.shape)

test Labels shape: (100,)
test Data shape:  (100, 1460)


In [29]:
#  Verify there are no missing values
print("\nAny missing values in the train dataset?", data_train.isnull().any().any())
print("\nAny missing values in the test dataset?", data_test.isnull().any().any())


Any missing values in the train dataset? False

Any missing values in the test dataset? False


In [30]:
print(data_train.head())
print(data_test.head())

       1         2         3         4         5         6         7     \
0 -0.584754 -0.584754  1.730991 -0.584754 -0.584754 -0.584754  1.729917   
1 -0.591434 -0.511104  1.726820 -0.580422 -0.591434 -0.511104  1.727921   
2 -0.577945 -0.577945  1.730793 -0.577945 -0.578946 -0.564882  1.731094   
3 -0.588925 -0.538088  1.735718 -0.588716 -0.589962 -0.523551  1.735619   
4 -0.596633 -0.532188  1.718067 -0.592117 -0.596633 -0.532188  1.715241   

       8         9         10    ...      1451      1452      1453      1454  \
0 -0.584754 -0.584754 -0.584754  ...  1.732726 -0.584734 -0.583729 -0.578603   
1 -0.580422 -0.591434 -0.511104  ...  1.727396 -0.580731 -0.580731 -0.580731   
2 -0.577829 -0.580956 -0.548788  ...  1.734727 -0.577751 -0.580956 -0.549798   
3 -0.588646 -0.588925 -0.524598  ...  1.743664 -0.588876 -0.586852 -0.576483   
4 -0.592117 -0.595605 -0.532188  ...  1.743258 -0.592403 -0.591524 -0.575158   

       1455      1456      1457      1458      1459      1460  
0  1

In [31]:
def dtw_distance(ts_a, ts_b):
    """Compute DTW distance between two time series arrays."""
    M, N = len(ts_a), len(ts_b)
    dtw_matrix = np.full((M + 1, N + 1), np.inf)
    dtw_matrix[0, 0] = 0

    for i in range(1, M + 1):
        for j in range(1, N + 1):
            cost = abs(ts_a[i - 1] - ts_b[j - 1])
            last_min = min(
                dtw_matrix[i - 1, j],    # insertion
                dtw_matrix[i, j - 1],    # deletion
                dtw_matrix[i - 1, j - 1] # match
            )
            dtw_matrix[i, j] = cost + last_min
    return dtw_matrix[M, N]


In [None]:
# 1-NN DTW classification with progress printing
y_pred = []
for idx, test_ts in enumerate(data_test.values):
    if (idx + 1) % 10 == 0 or idx == 0:
        print(f"Processing test sample {idx + 1}/{len(data_test)}")
    min_dist = float('inf')
    pred_label = None
    for train_ts, train_label in zip(data_train.values, labels_train):
        dist = dtw_distance(test_ts, train_ts)
        if dist < min_dist:
            min_dist = dist
            pred_label = train_label
    y_pred.append(pred_label)

# Calculate error (misclassification rate)
y_pred = np.array(y_pred)
error_count = np.sum(y_pred != labels_test)
error_rate = error_count / len(labels_test)
print(f"Error rate: {error_rate:.2f}")

Processing test sample 1/100
Processing test sample 10/100
Processing test sample 20/100
Processing test sample 30/100
Processing test sample 40/100
Processing test sample 50/100
Processing test sample 60/100
