In [8]:
from neucube import Reservoir
from neucube.encoder import RateEncoder
from neucube.validation import Pipeline
from neucube.sampler import SpikeCount, DeSNN
from neucube.datamanager import DataManager
import torch
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt

In [2]:
params = {
            'source_data_path': 'example_data/labquake_source',
            'samples_path':'example_data/labquake_samples',
            'sampling_rate': 5000,
            'batch_duration': 12000,
        }
datamanager = DataManager(params)

In [4]:
#datamanager.process_data()

In [4]:
## List of CSV files with growing 'n'
#num_files = 270  # specify the number of files
#column_name = 'Channel_13'  # specify the column to plot
#
## Initialize plot
#plt.figure(figsize=(15,5))
#
## Initialize the starting index for plotting
#start_index = 0
#
## Loop over the files
#for n in range(1, num_files + 1):
#    # Construct file name
#    file_name = f'example_data/labquake_samples/sample_{n}.csv'
#    
#    # Read CSV file
#    df = pd.read_csv(file_name)
#    
#    # Get the length of the current column
#    column_length = len(df[column_name])
#    
#    # Create a new index range that continues from where the last one left off
#    index_range = range(start_index, start_index + column_length)
#    
#    # Plot the column from each CSV file using the new index range
#    plt.plot(index_range, df[column_name])
#    
#    # Update the start_index for the next file
#    start_index += column_length
#
## Add labels and title (no legend)
#plt.xlabel('Index')
#plt.ylabel('Value')
#plt.title(f'Plot of {column_name} across multiple files')
#
## Show the plot
#plt.show()

In [5]:
def check_csv_row_sizes(directory):
    # List all CSV files in the specified directory
    csv_files = [f for f in os.listdir(directory) if f.endswith('.csv')]
    
    # Dictionary to store file names and their respective row counts
    row_sizes = {}
    
    # Iterate over each file, read it with pandas, and get the number of rows
    for file_name in csv_files:
        file_path = os.path.join(directory, file_name)
        try:
            df = pd.read_csv(file_path)
            row_sizes[file_name] = len(df)
        except Exception as e:
            print(f"Error reading {file_name}: {e}")
    
    # Print row counts for each file
    for file_name, count in row_sizes.items():
        print(f"{file_name}: {count} rows")

# Example usage:
#directory_path = params['samples_path']
#check_csv_row_sizes(directory_path)

In [5]:
filenameslist = ['sample_'+str(idx)+'.csv' for idx in range(1,273)]

dfs = []
for filename in filenameslist:
  dfs.append(pd.read_csv('./example_data/labquake_samples/'+filename, header=0))

fulldf = pd.concat(dfs)

# Load the CSV file
labels = pd.read_csv('./example_data/labquake_samples/all_class_labels.csv')

# Extract each column into a separate 1D array
y1 = labels['Zone1'].values
y2 = labels['Zone2'].values
y3 = labels['Zone3'].values
y4 = labels['Zone4'].values

In [9]:
from sklearn.metrics import accuracy_score 
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression
from tqdm import tqdm

# Loading data
X = torch.tensor(fulldf.values.reshape(272, 600, 25))  # Check if reshaping is correctly matching your dataset
derivatives = torch.diff(X, dim=1)
min_values = derivatives.min(dim=1)[0].min(dim=0)[0]
max_values = derivatives.max(dim=1)[0].max(dim=0)[0]

encoder = RateEncoder(min_values, max_values, max_rate=1.0)
X = encoder.encode(X)  # Ensure this returns a tensor or array suitable for model input

labels = pd.read_csv('./example_data/labquake_samples/all_class_labels.csv')
ys = [labels[col].values for col in labels]  # Extract each column into a list of arrays

# Set up K-Folds
kf = KFold(n_splits=5, shuffle=True, random_state=123)
results = {}

for y_idx, y in enumerate(ys, start=1):
    y_total, pred_total = [], []

    for train_index, test_index in tqdm(kf.split(X), desc=f'Zone {y_idx}'):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        res = Reservoir(inputs=25)
        #sam = SpikeCount()
        sam = DeSNN() 
        clf = LogisticRegression(solver='liblinear')
        pipe = Pipeline(res, sam, clf)
        
        pipe.fit(X_train, y_train)
        pred = pipe.predict(X_test)

        y_total.extend(y_test)
        pred_total.extend(pred)
    
    acc = accuracy_score(y_total, pred_total)
    cm = confusion_matrix(y_total, pred_total)
    results[f'Zone {y_idx}'] = {'accuracy': acc, 'confusion_matrix': cm}
    print(f"Results for Zone {y_idx}:")
    print(f"Accuracy: {acc}")
    print(f"Confusion Matrix:\n{cm}")

Zone 1: 5it [01:58, 23.69s/it]


Results for Zone 1:
Accuracy: 0.4889705882352941
Confusion Matrix:
[[74  3 17 17]
 [ 4  3  5  8]
 [20  7 11 18]
 [19  4 17 45]]


Zone 2: 5it [01:59, 23.85s/it]


Results for Zone 2:
Accuracy: 0.4632352941176471
Confusion Matrix:
[[77 18 21  2]
 [18 24 16  1]
 [29 14 22  4]
 [ 7  8  8  3]]


Zone 3: 5it [01:59, 23.80s/it]


Results for Zone 3:
Accuracy: 0.5845588235294118
Confusion Matrix:
[[111  31   7   2]
 [ 40  43   3   0]
 [  5  12   5   3]
 [  7   1   2   0]]


Zone 4: 5it [01:58, 23.67s/it]

Results for Zone 4:
Accuracy: 0.43014705882352944
Confusion Matrix:
[[76 21 13 10]
 [25 13 10  3]
 [24 14 20  8]
 [12  2 13  8]]



