In [None]:
def load_ecg_dataset():
    try:
        dataset_path = Path(kagglehub.dataset_download("shayanfazeli/heartbeat"))
        print(f"Dataset path: {dataset_path}")
        
        train_data = pd.read_csv(dataset_path / 'mitbih_train.csv', header=None)
        test_data = pd.read_csv(dataset_path / 'mitbih_test.csv', header=None)
        abnormal_data = pd.read_csv(dataset_path / 'ptbdb_abnormal.csv', header=None)
        normal_data = pd.read_csv(dataset_path / 'ptbdb_normal.csv', header=None)
        
        print("Dane pobrane z Kaggle pomy≈õlnie!")
        
    except Exception as err:
        print(f"Pobieranie nie powiod≈Ço siƒô: {err}")
        print("Przechodzƒô do lokalnych plik√≥w lub rozpakowywania 7z...")
        
        if not file_exists("mitbih_test.csv"):
            success = any(run_shell(cmd) == 0 for cmd in ["7za e data.7z.001", "7z e data.7z.001", "7zz e data.7z.001"])
            if not success:
                print("Zainstaluj 7za lub 7z, by rozpakowaƒá archiwum.")
        
        train_data = pd.read_csv('mitbih_train.csv', header=None)
        test_data = pd.read_csv('mitbih_test.csv', header=None)
        abnormal_data = pd.read_csv('ptbdb_abnormal.csv', header=None)
        normal_data = pd.read_csv('ptbdb_normal.csv', header=None)
        
        print("Dane za≈Çadowane z lokalnych plik√≥w!")
        
    return train_data, test_data, abnormal_data, normal_data

print("Funkcja ≈Çadowania danych zdefiniowana")


In [None]:
def prepare_data(df: pd.DataFrame, label_value: int):
    subset = df[df[187] == label_value]
    features = subset.iloc[:, :187].values.astype(np.float32)
    labels = subset[187].values.tolist()
    return features, labels

def normalize(train_arr, test_arr):
    min_val = np.min(train_arr, axis=(0,1))
    max_val = np.max(train_arr, axis=(0,1))
    train_norm = (train_arr - min_val) / (max_val - min_val)
    test_norm = (test_arr - min_val) / (max_val - min_val)
    return train_norm, test_norm

print("Funkcje przetwarzania danych zdefiniowane")


In [None]:
def plot_ecg_samples(normal_signals, abnormal_signals):
    plt.figure(figsize=(20,10))
    
    for idx in range(8):
        plt.subplot(4, 4, idx+1)
        plt.plot(normal_signals[idx])
        plt.title(f"Normal ECG Sample {idx+1}")
        plt.xlabel("Time")
        
    for idx in range(8, 16):
        plt.subplot(4, 4, idx+1)
        plt.plot(abnormal_signals[idx])
        plt.title(f"Abnormal ECG Sample {idx-7}")
        plt.xlabel("Time")
        
    plt.tight_layout()
    plt.savefig('ecg_samples.png', dpi=300, bbox_inches='tight')
    plt.show()

def plot_training_errors(epochs, errors):
    plt.figure()
    plt.plot(epochs, errors)
    plt.title("Error During Training")
    plt.xlabel("Epoch")
    plt.ylabel("Validation RMSE")
    plt.grid(True)
    plt.savefig('training_error.png', dpi=300, bbox_inches='tight')
    plt.show()

print("Funkcje wizualizacji zdefiniowane")


In [None]:
class ECGAutoEncoder(tf.keras.Model):
    def __init__(self):
        super(ECGAutoEncoder, self).__init__()
        
        self.encoder = tf.keras.Sequential([
            tf.keras.layers.Dense(100, activation='relu', input_shape=(188,)),
            tf.keras.layers.Dense(40, activation='relu'),
            tf.keras.layers.Dense(20, activation='linear')
        ])
        
        self.decoder = tf.keras.Sequential([
            tf.keras.layers.Dense(40, activation='relu', input_shape=(20,)),
            tf.keras.layers.Dense(100, activation='relu'),
            tf.keras.layers.Dense(188, activation='sigmoid')
        ])
    
    def call(self, x):
        encoded = self.encode(x)
        decoded = self.decode(encoded)
        return decoded
    
    def encode(self, x):
        return self.encoder(tf.sqrt(x))
    
    def decode(self, encoded):
        return tf.square(self.decoder(encoded))

print("Model autokodera zdefiniowany")
print("Stopie≈Ñ kompresji: 187 ‚Üí 20 wymiar√≥w (9.35x mniej danych)")


In [None]:
def train_epoch(model, optimizer, dataset, val_data, loss_fn):
    train_ds = tf.data.Dataset.from_tensor_slices(dataset).shuffle(1000).batch(250)
    
    for batch in train_ds:
        with tf.GradientTape() as tape:
            recon = model(batch)
            loss = loss_fn(batch, recon)
        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))
    
    val_recon = model(val_data)
    val_loss = tf.reduce_mean(tf.sqrt(tf.reduce_mean(tf.square(val_recon - val_data), axis=1)))
    return float(val_loss)

print("Funkcja treningu zdefiniowana")


In [None]:
print("≈Åadowanie danych EKG...")
train_df, test_df, abn_df, norm_df = load_ecg_dataset()

print("Przygotowanie danych...")
x_train, y_train = prepare_data(train_df, label_value=0)
x_test, y_test = prepare_data(test_df, label_value=0)

print(f"Dane treningowe: {x_train.shape}")
print(f"Dane testowe: {x_test.shape}")

abnormal_samples, _ = prepare_data(test_df, label_value=1)
plot_ecg_samples(x_test, abnormal_samples)


In [None]:
print("Normalizacja danych...")
train_norm, test_norm = normalize(train_df.values.astype(np.float32), test_df.values.astype(np.float32))

print(f"Znormalizowane dane treningowe: {train_norm.shape}")
print(f"Zakres danych po normalizacji: [{train_norm.min():.3f}, {train_norm.max():.3f}]")

print("Inicjalizacja modelu...")
model = ECGAutoEncoder()
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
loss_function = tf.keras.losses.MeanSquaredError()

print("Model i optimizer gotowe do treningu")


In [None]:
print("Rozpoczynanie treningu...")

epochs = []
errors = []
error = 1.0
no_improve_count = 0
best_weights = None
epoch = 1

target_error = 8.6e-3
max_epochs = 200
patience = 15

print(f"Cel: RMSE < {target_error}")
print(f"Maksymalne epoki: {max_epochs}")
print(f"Cierpliwo≈õƒá: {patience} epok bez poprawy")
print("-" * 50)

while error > target_error and epoch <= max_epochs and no_improve_count < patience:
    val_error = train_epoch(model, optimizer, train_norm, test_norm, loss_function)
    
    diff = error - val_error
    if diff < 1e-6:
        no_improve_count += 1
    else:
        no_improve_count = 0
        best_weights = model.get_weights()
    
    error = val_error
    epochs.append(epoch)
    errors.append(error)
    
    status = "üî¥" if no_improve_count > 0 else "üü¢"
    print(f"{status} Epoch {epoch:3d} | RMSE: {error:.6f} | Diff: {diff:.6f} | No improve: {no_improve_count}")
    
    epoch += 1

if best_weights:
    model.set_weights(best_weights)
    print(f"Przywr√≥cono najlepsze wagi z epoki {epoch - no_improve_count - 1}")

print(f"\nTrening zako≈Ñczony po {epoch-1} epokach")
print(f"Ko≈Ñcowy RMSE: {error:.6f}")


In [None]:
if len(epochs) > 1:
    plot_training_errors(epochs, errors)
else:
    print("Uczenie zbyt szybkie, brak wykresu")


In [None]:
print("Ewaluacja ko≈Ñcowa modelu...")

compressed_train = model.encode(train_norm)
compressed_test = model.encode(test_norm)
decompressed_train = model.decode(compressed_train)
decompressed_test = model.decode(compressed_test)

train_rmse = float(tf.reduce_mean(tf.sqrt(tf.reduce_mean(tf.square(train_norm - decompressed_train), axis=1))))
test_rmse = float(tf.reduce_mean(tf.sqrt(tf.reduce_mean(tf.square(test_norm - decompressed_test), axis=1))))

print(f"Ko≈Ñcowy b≈ÇƒÖd treningowy (RMSE): {train_rmse:.6f}")
print(f"Ko≈Ñcowy b≈ÇƒÖder testowy (RMSE): {test_rmse:.6f}")
print(f"Stopie≈Ñ kompresji: {187/20:.1f}x (187 ‚Üí 20 wymiar√≥w)")

print("\nGenerowanie por√≥wnania orygina≈Çu z rekonstrukcjƒÖ...")
plt.figure(figsize=(15, 5))
plt.plot(test_norm[0], label='Oryginalny sygna≈Ç', linewidth=2)
plt.plot(decompressed_test[0].numpy(), label='Odtworzony sygna≈Ç', linewidth=2, alpha=0.8)
plt.title("Por√≥wnanie orygina≈Çu i rekonstrukcji EKG", fontsize=14)
plt.xlabel("Pr√≥bka")
plt.ylabel("Amplituda (znormalizowana)")
plt.legend()
plt.grid(True)
plt.savefig('reconstruction_comparison.png', dpi=300, bbox_inches='tight')
plt.show()


In [None]:
print("Analiza rozk≈Çadu b≈Çƒôd√≥w rekonstrukcji...")

train_err_flat = (decompressed_train - train_norm).numpy().flatten()
test_err_flat = (decompressed_test - test_norm).numpy().flatten()

print(f"B≈Çƒôdy treningowe - ≈õrednia: {train_err_flat.mean():.6f}, std: {train_err_flat.std():.6f}")
print(f"B≈Çƒôdy testowe - ≈õrednia: {test_err_flat.mean():.6f}, std: {test_err_flat.std():.6f}")

plt.figure(figsize=(20, 6))

plt.subplot(1, 2, 1)
plt.hist(train_err_flat, bins=50, log=True, alpha=0.7, color='blue')
plt.title("Rozk≈Çad b≈Çƒôd√≥w treningowych (log scale)")
plt.xlabel("B≈ÇƒÖd rekonstrukcji")
plt.ylabel("Czƒôsto≈õƒá (log)")
plt.grid(True)

plt.subplot(1, 2, 2)
plt.hist(test_err_flat, bins=50, log=True, alpha=0.7, color='red')
plt.title("Rozk≈Çad b≈Çƒôd√≥w testowych (log scale)")
plt.xlabel("B≈ÇƒÖd rekonstrukcji")
plt.ylabel("Czƒôsto≈õƒá (log)")
plt.grid(True)

plt.tight_layout()
plt.savefig('error_distribution.png', dpi=300, bbox_inches='tight')
plt.show()


In [None]:
print("SprzƒÖtanie...")

try:
    if 'dataset_path' not in locals():
        files_to_clean = ["mitbih_test.csv", "mitbih_train.csv", "ptbdb_abnormal.csv", "ptbdb_normal.csv"]
        for f in files_to_clean:
            if file_exists(f):
                delete_file(f)
                print(f"Usuniƒôto {f}")
        print("Lokalne pliki CSV posprzƒÖtane")
    else:
        print("Dane pobrane z Kaggle - brak konieczno≈õci sprzƒÖtania")
except Exception as e:
    print(f"B≈ÇƒÖd podczas sprzƒÖtania: {e}")

print("\n" + "="*60)
print("PODSUMOWANIE EKSPERYMENTU KOMPRESJI EKG")
print("="*60)
print(f"Architektura: 187 ‚Üí 100 ‚Üí 40 ‚Üí 20 ‚Üí 40 ‚Üí 100 ‚Üí 187")
print(f"Stopie≈Ñ kompresji: {187/20:.1f}x")
print(f"RMSE treningowy: {train_rmse:.6f}")
print(f"RMSE testowy: {test_rmse:.6f}")
print(f"Liczba epok: {len(epochs)}")
print(f"Pliki wygenerowane:")
print("   - ecg_samples.png (przyk≈Çadowe sygna≈Çy)")
print("   - training_error.png (krzywa uczenia)")
print("   - reconstruction_comparison.png (por√≥wnanie)")
print("   - error_distribution.png (rozk≈Çad b≈Çƒôd√≥w)")
print("="*60)
print("Eksperyment zako≈Ñczony pomy≈õlnie!")
