In [1]:
from neucube import Reservoir
from neucube.encoder import RateEncoder, Deltav2
from neucube.validation import Pipeline
from neucube.sampler import SpikeCount, DeSNN
from neucube.datamanager import DataManager
import torch
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
import random

In [2]:
params = {
            'source_data_path': 'example_data/labquake_source',
            'samples_path':'example_data/labquake_samples',
            'sampling_rate': 5000,
            'batch_duration': 18000,
        }
datamanager = DataManager(params)

In [3]:
datamanager.process_data()

Batch 1 from both systems saved to example_data/labquake_samples/sample_1.csv
Batch 2 from both systems saved to example_data/labquake_samples/sample_2.csv
Batch 3 from both systems saved to example_data/labquake_samples/sample_3.csv
Batch 4 from both systems saved to example_data/labquake_samples/sample_4.csv
Batch 5 from both systems saved to example_data/labquake_samples/sample_5.csv
Batch 6 from both systems saved to example_data/labquake_samples/sample_6.csv
Batch 7 from both systems saved to example_data/labquake_samples/sample_7.csv
Batch 8 from both systems saved to example_data/labquake_samples/sample_8.csv
Batch 9 from both systems saved to example_data/labquake_samples/sample_9.csv
Batch 10 from both systems saved to example_data/labquake_samples/sample_10.csv
Batch 11 from both systems saved to example_data/labquake_samples/sample_11.csv
Batch 12 from both systems saved to example_data/labquake_samples/sample_12.csv
Batch 13 from both systems saved to example_data/labquake_

In [4]:
## List of CSV files with growing 'n'
#num_files = 270  # specify the number of files
#column_name = 'Channel_13'  # specify the column to plot
#
## Initialize plot
#plt.figure(figsize=(15,5))
#
## Initialize the starting index for plotting
#start_index = 0
#
## Loop over the files
#for n in range(1, num_files + 1):
#    # Construct file name
#    file_name = f'example_data/labquake_samples/sample_{n}.csv'
#    
#    # Read CSV file
#    df = pd.read_csv(file_name)
#    
#    # Get the length of the current column
#    column_length = len(df[column_name])
#    
#    # Create a new index range that continues from where the last one left off
#    index_range = range(start_index, start_index + column_length)
#    
#    # Plot the column from each CSV file using the new index range
#    plt.plot(index_range, df[column_name])
#    
#    # Update the start_index for the next file
#    start_index += column_length
#
## Add labels and title (no legend)
#plt.xlabel('Index')
#plt.ylabel('Value')
#plt.title(f'Plot of {column_name} across multiple files')
#
## Show the plot
#plt.show()

In [4]:
def check_csv_row_sizes(directory):
    # List all CSV files in the specified directory
    csv_files = [f for f in os.listdir(directory) if f.endswith('.csv')]
    
    # Dictionary to store file names and their respective row counts
    row_sizes = {}
    
    # Iterate over each file, read it with pandas, and get the number of rows
    for file_name in csv_files:
        file_path = os.path.join(directory, file_name)
        try:
            df = pd.read_csv(file_path)
            row_sizes[file_name] = len(df)
        except Exception as e:
            print(f"Error reading {file_name}: {e}")
    
    # Print row counts for each file
    for file_name, count in row_sizes.items():
        print(f"{file_name}: {count} rows")

# Example usage:
#directory_path = params['samples_path']
#check_csv_row_sizes(directory_path)

In [5]:
def compute_negative_moving_average_derivatives(dataset, window_size):
    """
    Computes the negative of the moving average derivatives for the entire dataset.

    Args:
        dataset (torch.Tensor): Input dataset, shape (batch_size, time_steps, num_features)
        window_size (int): Window size for the moving average filter.

    Returns:
        torch.Tensor: Negative moving average derivatives, shape (batch_size, time_steps, num_features)
    """
    batch_size, time_steps, num_features = dataset.shape
    all_negative_derivatives = []

    # Instantiate a temporary RateEncoder to use its moving_average method
    temp_encoder = RateEncoder(min_values=None, max_values=None, window_size=window_size)

    for i in range(batch_size):
        sample_negative_derivatives = []
        for j in range(num_features):
            sample = dataset[i][:, j]  # Shape: (time_steps,)

            # Compute derivative with same size
            derivative = torch.zeros_like(sample)
            derivative[1:] = sample[1:] - sample[:-1]
            derivative[0] = 0  # Handle as appropriate

            # Apply moving average
            smoothed_derivative = temp_encoder.moving_average(derivative)

            # Take the negative of the smoothed derivative
            negative_derivative = -smoothed_derivative

            sample_negative_derivatives.append(negative_derivative)

        sample_negative_derivatives = torch.stack(sample_negative_derivatives, dim=1)  # Shape: (time_steps, num_features)
        all_negative_derivatives.append(sample_negative_derivatives)

    all_negative_derivatives = torch.stack(all_negative_derivatives, dim=0)  # Shape: (batch_size, time_steps, num_features)
    return all_negative_derivatives

def compute_thresholds(negative_derivatives, percentile):
    """
    Computes thresholds for each feature based on the given percentile of the negative derivatives.

    Args:
        negative_derivatives (torch.Tensor): Negative moving average derivatives, shape (num_samples, num_features)
        percentile (float): Percentile value (between 0 and 100) to compute thresholds.

    Returns:
        torch.Tensor: Thresholds for each feature, shape (num_features,)
    """
    # Convert to numpy array for percentile computation
    negative_derivatives_np = negative_derivatives.numpy()

    thresholds = []
    for i in range(negative_derivatives_np.shape[1]):
        feature_data = negative_derivatives_np[:, i]
        threshold = np.percentile(feature_data, percentile)
        thresholds.append(threshold)
    thresholds = torch.tensor(thresholds)
    return thresholds


In [6]:
filenameslist = ['sample_'+str(idx)+'.csv' for idx in range(1,179)]

dfs = []
for filename in filenameslist:
  dfs.append(pd.read_csv('./example_data/labquake_samples/'+filename, header=0))

fulldf = pd.concat(dfs)

# Load the CSV file
labels = pd.read_csv('./example_data/labquake_samples/all_class_labels.csv')

# Extract each column into a separate 1D array
y1 = labels['Zone1'].values
y2 = labels['Zone2'].values
y3 = labels['Zone3'].values
y4 = labels['Zone4'].values

In [7]:
import torch
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from tqdm import tqdm

# Assume the necessary functions and classes are defined:
# compute_negative_moving_average_derivatives, compute_thresholds,
# Deltav2, Reservoir, DeSNN, Pipeline

# Set random seed for reproducibility
seed = 123
torch.manual_seed(seed)
np.random.seed(seed)

# Load and preprocess your dataset
X = torch.tensor(fulldf.values.reshape(178, 900, 25))  # Adjust dimensions as per your dataset

# Compute negative moving average derivatives
negative_moving_avg_derivatives = compute_negative_moving_average_derivatives(X, window_size=10)

# Reshape to a 2D tensor for percentile computation
reshaped_negative_derivatives = negative_moving_avg_derivatives.view(-1, X.shape[2])  # Shape: (batch_size * time_steps, num_features)

# Compute thresholds for each feature at the desired percentile (e.g., 90th percentile)
percentile_value = 90.0  # Change as needed
thresholds = compute_thresholds(reshaped_negative_derivatives, percentile=percentile_value)

# Initialize your encoder
encoder = Deltav2(thresholds)
encoded_dataset = encoder.encode_dataset(X)

# Load labels
labels = pd.read_csv('./example_data/labquake_samples/all_class_labels.csv')
ys = [labels[col].values for col in labels]  # Extract each column into a list of arrays

# Set up K-Folds
kf = KFold(n_splits=5, shuffle=True, random_state=seed)
results = {}

for y_idx, y in enumerate(ys, start=1):
    # Compute overall class balance for the current Zone
    classes, counts = np.unique(y, return_counts=True)
    total_samples = len(y)
    print(f"Class balance for Zone {y_idx}:")
    for cls, count in zip(classes, counts):
        percentage = (count / total_samples) * 100
        print(f"  Class {cls}: {count} samples, {percentage:.2f}%")
    print()

    y_total, pred_total = [], []

    # Enumerate folds to keep track of fold number
    for fold_num, (train_index, test_index) in enumerate(tqdm(kf.split(X), desc=f'Zone {y_idx}'), start=1):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        # Compute class balance in the training set for the current fold
        classes_fold, counts_fold = np.unique(y_train, return_counts=True)
        total_samples_fold = len(y_train)
        print(f"Class balance in training set for Zone {y_idx}, Fold {fold_num}:")
        for cls, count in zip(classes_fold, counts_fold):
            percentage = (count / total_samples_fold) * 100
            print(f"  Class {cls}: {count} samples, {percentage:.2f}%")
        print()

        # Proceed with model training and testing
        res = Reservoir(inputs=25)
        # sam = SpikeCount()
        sam = DeSNN()
        clf = LogisticRegression(solver='liblinear')
        pipe = Pipeline(res, sam, clf)

        pipe.fit(X_train, y_train)
        pred = pipe.predict(X_test)

        y_total.extend(y_test)
        pred_total.extend(pred)

    # Compute overall accuracy and confusion matrix for the current Zone
    acc = accuracy_score(y_total, pred_total)
    cm = confusion_matrix(y_total, pred_total)
    results[f'Zone {y_idx}'] = {'accuracy': acc, 'confusion_matrix': cm}
    print(f"Results for Zone {y_idx}:")
    print(f"Accuracy: {acc}")
    print(f"Confusion Matrix:\n{cm}\n")


Class balance for Zone 1:
  Class 0: 58 samples, 32.58%
  Class 1: 120 samples, 67.42%



Zone 1: 0it [00:00, ?it/s]

Class balance in training set for Zone 1, Fold 1:
  Class 0: 46 samples, 32.39%
  Class 1: 96 samples, 67.61%



Zone 1: 1it [00:14, 14.62s/it]

Class balance in training set for Zone 1, Fold 2:
  Class 0: 45 samples, 31.69%
  Class 1: 97 samples, 68.31%



Zone 1: 2it [00:28, 14.42s/it]

Class balance in training set for Zone 1, Fold 3:
  Class 0: 46 samples, 32.39%
  Class 1: 96 samples, 67.61%



Zone 1: 3it [00:43, 14.43s/it]

Class balance in training set for Zone 1, Fold 4:
  Class 0: 45 samples, 31.47%
  Class 1: 98 samples, 68.53%



Zone 1: 4it [00:57, 14.39s/it]

Class balance in training set for Zone 1, Fold 5:
  Class 0: 50 samples, 34.97%
  Class 1: 93 samples, 65.03%



Zone 1: 5it [01:12, 14.42s/it]


Results for Zone 1:
Accuracy: 0.7359550561797753
Confusion Matrix:
[[ 27  31]
 [ 16 104]]

Class balance for Zone 2:
  Class 0: 92 samples, 51.69%
  Class 1: 86 samples, 48.31%



Zone 2: 0it [00:00, ?it/s]

Class balance in training set for Zone 2, Fold 1:
  Class 0: 77 samples, 54.23%
  Class 1: 65 samples, 45.77%



Zone 2: 1it [00:14, 14.35s/it]

Class balance in training set for Zone 2, Fold 2:
  Class 0: 72 samples, 50.70%
  Class 1: 70 samples, 49.30%



Zone 2: 2it [00:28, 14.35s/it]

Class balance in training set for Zone 2, Fold 3:
  Class 0: 73 samples, 51.41%
  Class 1: 69 samples, 48.59%



Zone 2: 3it [00:43, 14.38s/it]

Class balance in training set for Zone 2, Fold 4:
  Class 0: 70 samples, 48.95%
  Class 1: 73 samples, 51.05%



Zone 2: 4it [00:57, 14.35s/it]

Class balance in training set for Zone 2, Fold 5:
  Class 0: 76 samples, 53.15%
  Class 1: 67 samples, 46.85%



Zone 2: 5it [01:11, 14.36s/it]


Results for Zone 2:
Accuracy: 0.6573033707865169
Confusion Matrix:
[[57 35]
 [26 60]]

Class balance for Zone 3:
  Class 0: 148 samples, 83.15%
  Class 1: 30 samples, 16.85%



Zone 3: 0it [00:00, ?it/s]

Class balance in training set for Zone 3, Fold 1:
  Class 0: 116 samples, 81.69%
  Class 1: 26 samples, 18.31%



Zone 3: 1it [00:14, 14.39s/it]

Class balance in training set for Zone 3, Fold 2:
  Class 0: 119 samples, 83.80%
  Class 1: 23 samples, 16.20%



Zone 3: 2it [00:28, 14.33s/it]

Class balance in training set for Zone 3, Fold 3:
  Class 0: 121 samples, 85.21%
  Class 1: 21 samples, 14.79%



Zone 3: 3it [00:43, 14.37s/it]

Class balance in training set for Zone 3, Fold 4:
  Class 0: 120 samples, 83.92%
  Class 1: 23 samples, 16.08%



Zone 3: 4it [00:57, 14.33s/it]

Class balance in training set for Zone 3, Fold 5:
  Class 0: 116 samples, 81.12%
  Class 1: 27 samples, 18.88%



Zone 3: 5it [01:11, 14.36s/it]


Results for Zone 3:
Accuracy: 0.8314606741573034
Confusion Matrix:
[[145   3]
 [ 27   3]]

Class balance for Zone 4:
  Class 0: 89 samples, 50.00%
  Class 1: 89 samples, 50.00%



Zone 4: 0it [00:00, ?it/s]

Class balance in training set for Zone 4, Fold 1:
  Class 0: 75 samples, 52.82%
  Class 1: 67 samples, 47.18%



Zone 4: 1it [00:14, 14.29s/it]

Class balance in training set for Zone 4, Fold 2:
  Class 0: 69 samples, 48.59%
  Class 1: 73 samples, 51.41%



Zone 4: 2it [00:28, 14.31s/it]

Class balance in training set for Zone 4, Fold 3:
  Class 0: 71 samples, 50.00%
  Class 1: 71 samples, 50.00%



Zone 4: 3it [00:43, 14.36s/it]

Class balance in training set for Zone 4, Fold 4:
  Class 0: 68 samples, 47.55%
  Class 1: 75 samples, 52.45%



Zone 4: 4it [00:57, 14.35s/it]

Class balance in training set for Zone 4, Fold 5:
  Class 0: 73 samples, 51.05%
  Class 1: 70 samples, 48.95%



Zone 4: 5it [01:11, 14.32s/it]

Results for Zone 4:
Accuracy: 0.6966292134831461
Confusion Matrix:
[[60 29]
 [25 64]]




