In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
import os

warnings.filterwarnings("ignore")
plt.rcParams['figure.figsize'] = 30, 15

root_folder_train = r'C:\Users\acer\Desktop\rwork1\train'
root_folder_test = r'C:\Users\acer\Desktop\rwork1\test'
category = ['Normal', 'Abnormal']
destination = r'C:\Users\acer\Desktop\latif ready data'

def load_data(root_folder):
    final_list = []  # Initialize an empty list to store data
    labels = []      # Initialize an empty list to store labels
    
    for cat in category:
        for filename in os.listdir(os.path.join(root_folder, cat)):
            path = os.path.join(root_folder, cat, filename)
            
            # Print debugging information
            print(f"Processing file: {path}")
            
            try:
                # Read the CSV using Pandas
                df = pd.read_csv(path)
                
                # Print debugging information
                print(f"Number of rows in {filename}: {len(df)}")
                
                # Check if the DataFrame has the expected columns
                if 'Time' in df.columns and 'ECG1' in df.columns and 'ECG2' in df.columns:
                    # Drop the first row, which contains units of measurement
                    df = df.iloc[1:]
                    df
                    # Convert 'ECG1' and 'ECG2' columns to numeric
                    df['ECG1'] = pd.to_numeric(df['ECG1'])
                    df['ECG2'] = pd.to_numeric(df['ECG2'])
                    
                    # Split each file into 30 parts and append them to final_list
                    df_split = np.array_split(df, 30)
                    for splitted_array in df_split:
                        final_list.append(np.array(splitted_array))
                        
                        # Assign labels (0 for 'Normal', 1 for 'Abnormal')
                        labels.append(0 if cat == 'Normal' else 1)
                else:
                    print(f"Columns in {filename} do not match the expected format.")
            except Exception as e:
                print(f"Error processing {filename}: {str(e)}")
    
    # Convert the lists to arrays
    X = np.array(final_list)
    y = np.expand_dims(np.array(labels), axis=1)
    
    return X, y

# Debugging information
print("Loading training data...")
X_train, y_train = load_data(root_folder_train)
print("Loading testing data...")
X_test, y_test = load_data(root_folder_test)

# Debugging information
print("Shapes of loaded data:")
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

# Check if the destination folder exists and create it if not
if not os.path.exists(destination):
    os.makedirs(destination)

# Save the formatted data for easy access later
np.save(os.path.join(destination, "X_train.npy"), X_train)
np.save(os.path.join(destination, "y_train.npy"), y_train)
np.save(os.path.join(destination, "X_test.npy"), X_test)
np.save(os.path.join(destination, "y_test.npy"), y_test)


Loading training data...
Processing file: C:\Users\acer\Desktop\rwork1\train\Normal\samples (10).csv
Number of rows in samples (10).csv: 7681
Columns in samples (10).csv do not match the expected format.
Processing file: C:\Users\acer\Desktop\rwork1\train\Normal\samples (11).csv
Number of rows in samples (11).csv: 7681
Columns in samples (11).csv do not match the expected format.
Processing file: C:\Users\acer\Desktop\rwork1\train\Normal\samples (12).csv
Number of rows in samples (12).csv: 7681
Columns in samples (12).csv do not match the expected format.
Processing file: C:\Users\acer\Desktop\rwork1\train\Normal\samples (13).csv
Number of rows in samples (13).csv: 7681
Columns in samples (13).csv do not match the expected format.
Processing file: C:\Users\acer\Desktop\rwork1\train\Normal\samples (14).csv
Number of rows in samples (14).csv: 7681
Columns in samples (14).csv do not match the expected format.
Processing file: C:\Users\acer\Desktop\rwork1\train\Normal\samples (15).csv
Numb