In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
%cd /content/drive/MyDrive/Heart_sound_classification-master/Heart_sound_classification-master

/content/drive/MyDrive/Heart_sound_classification-master/Heart_sound_classification-master


In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Reusing the read_variable_length_csv function
def read_variable_length_csv(file_path):
    data_list = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            line_data = line.strip().split(',')
            data_list.append(line_data)
    # Infer the maximum number of features
    max_features = max(len(line_data) for line_data in data_list) - 2  # Subtract SampleName and Label
    column_names = ['SampleName', 'Label'] + [f'Feature{i}' for i in range(1, max_features + 1)]
    df = pd.DataFrame(data_list, columns=column_names)
    df = df.sample(frac=1, random_state=42).reset_index(drop=True)
    return df

# Read data
csv_file_path = '/content/drive/MyDrive/Heart_sound_classification-master/Heart_sound_classification-master/csv/training-b.csv'
data = read_variable_length_csv(csv_file_path)

# Replace 'None' with zeros and convert features to floats
features = data.iloc[:, 2:].fillna(0).astype(float)

# Flatten the features since SVM does not need the data to be padded as in sequential models
features_flat = features.values

# Encode the labels
labels = LabelEncoder().fit_transform(data.iloc[:, 1].values)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(features_flat, labels, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create SVM model
svm_model = SVC(kernel='rbf', C=1.0, random_state=42)

# Train the SVM model
svm_model.fit(X_train_scaled, y_train)

# Predict on the test set
y_pred = svm_model.predict(X_test_scaled)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy of SVM model: {accuracy:.4f}')


Accuracy of SVM model: 0.6951


In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

def read_variable_length_csv(file_path):
    data_list = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            line_data = line.strip().split(',')
            data_list.append(line_data)
    max_features = max(len(line_data) for line_data in data_list) - 2  # Subtract SampleName and Label
    column_names = ['SampleName', 'Label'] + [f'Feature{i}' for i in range(1, max_features + 1)]
    df = pd.DataFrame(data_list, columns=column_names)
    # Randomly shuffle the dataframe
    df = df.sample(frac=1, random_state=42).reset_index(drop=True)
    return df

# Path to your CSV file
csv_file_path = '/content/drive/MyDrive/Heart_sound_classification-master/Heart_sound_classification-master/csv/training-b.csv'
data = read_variable_length_csv(csv_file_path)

# Replace 'None' with zeros and convert features to floats
features = data.iloc[:, 2:].fillna(0).astype(float).values

# Encode the labels
labels = LabelEncoder().fit_transform(data.iloc[:, 1].values)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=43)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create and train the XGBoost model
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=43)
xgb_model.fit(X_train_scaled, y_train)

# Predict on the test set
y_pred = xgb_model.predict(X_test_scaled)

# Calculate and print accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy of XGBoost model: {accuracy:.4f}')


Accuracy of XGBoost model: 0.7317


In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import accuracy_score

def read_variable_length_csv(file_path):
    data_list = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            line_data = line.strip().split(',')
            data_list.append(line_data)
    max_features = max(len(line_data) for line_data in data_list) - 2  # Subtract SampleName and Label
    column_names = ['SampleName', 'Label'] + [f'Feature{i}' for i in range(1, max_features + 1)]
    df = pd.DataFrame(data_list, columns=column_names)
    # Randomly shuffle the dataframe
    df = df.sample(frac=1, random_state=42).reset_index(drop=True)
    return df

# Path to your CSV file
csv_file_path = '/content/drive/MyDrive/Heart_sound_classification-master/Heart_sound_classification-master/csv/training-b.csv'
data = read_variable_length_csv(csv_file_path)

# Replace 'None' with zeros and convert features to floats
features = data.iloc[:, 2:].fillna(0).astype(float).values

# Encode the labels
labels = LabelEncoder().fit_transform(data.iloc[:, 1].values)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=4)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the model architecture
model = Sequential([
    Dense(1024, input_dim=X_train_scaled.shape[1], activation='relu'),
    Dropout(0.5),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(32, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train_scaled, y_train, epochs=50, batch_size=32, validation_split=0.2)

# Evaluate the model
y_pred = model.predict(X_test_scaled)
y_pred = (y_pred > 0.5).astype(int)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy of MLP model: {accuracy:.4f}')


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50




Accuracy of MLP model: 0.6707
