In [39]:
import torch
import csv
import pandas as pd
from collections import Counter

In [40]:
Eb_N0_test = [0, 1, 2, 3, 4, 5, 6, 7]

train_dataset_path = "train_dataset.csv"
train_dataset_ERRORS_path = "train_dataset_ERRORS.csv"

In [41]:
def count_errors(file_path, awgn_row_name):
    
    data = pd.read_csv(file_path)
    # print(data.head(3))
    
    if awgn_row_name not in data.columns or 'codeword' not in data.columns:
        raise ValueError(f"Column {awgn_row_name} or 'codeword' was not found in the dataset.")

    # Inicjalizacja listy do przechowywania liczby błędów oraz licznika dla rozkładu liczby błędów
    error_counts = []
    error_distribution = Counter()
    max_errors = 0
    
    # Przetwarzanie wszystkich wierszy w pliku
    for index, row in data.iterrows():
        # Odczytaj wektor z kolumny 'awgn_sign_codeword_0'
        awgn_vector = row[awgn_row_name]
        
        # Konwersja ciągu znaków na listę wartości liczbowych
        if isinstance(awgn_vector, str):
            awgn_vector = list(map(float, awgn_vector.strip('[]').split(',')))
    
        # Przekształć wektor według podanych reguł
        transformed_vector = [0 if elem >= 0 else 1 for elem in awgn_vector]
        
        # Odczytaj oryginalny wektor z kolumny 'codeword'
        original_vector = row['codeword']
        if isinstance(original_vector, str):
            original_vector = list(map(float, original_vector.strip('[]').split(',')))
    
        # Policz błędy
        error_count = sum(1 for t, o in zip(transformed_vector, original_vector) if t != o)
        error_counts.append(error_count)
    
        # Zaktualizuj maksymalną liczbę błędów
        max_errors = max(max_errors, error_count)
    
        # Zaktualizuj licznik liczby błędów
        error_distribution[error_count] += 1
    
    # Dodaj wyniki do DataFrame dla całego pliku
    data['error_counts'] = error_counts
    
    print(f'FILE: {file_path}')
    # Wyświetl maksymalną liczbę błędów w całym pliku
    print(f"Maximum number of errors per row: {max_errors}")
    
    # Wyświetl rozkład liczby błędów w całym pliku
    print("\nRozkład liczby błędów w pliku:")
    for error_count, occurrences in sorted(error_distribution.items()):
        print(f"Number of errors per row = {error_count}: appeared {occurrences} times")

In [42]:
count_errors(train_dataset_path, "awgn_sign_codeword_0")

FILE: train_dataset.csv
Maximum number of errors per row: 8

Rozkład liczby błędów w pliku:
Number of errors per row = 0: appeared 29395 times
Number of errors per row = 1: appeared 37544 times
Number of errors per row = 2: appeared 22329 times
Number of errors per row = 3: appeared 8173 times
Number of errors per row = 4: appeared 2190 times
Number of errors per row = 5: appeared 402 times
Number of errors per row = 6: appeared 53 times
Number of errors per row = 7: appeared 9 times
Number of errors per row = 8: appeared 1 times


In [43]:
for elem in Eb_N0_test:
    count_errors(f"test_dataset/test_dataset_{elem}.csv", f"awgn_sign_codeword_{elem}")
    print("\n\n")

FILE: test_dataset/test_dataset_0.csv
Maximum number of errors per row: 7

Rozkład liczby błędów w pliku:
Number of errors per row = 0: appeared 36712 times
Number of errors per row = 1: appeared 46843 times
Number of errors per row = 2: appeared 27972 times
Number of errors per row = 3: appeared 10316 times
Number of errors per row = 4: appeared 2631 times
Number of errors per row = 5: appeared 500 times
Number of errors per row = 6: appeared 74 times
Number of errors per row = 7: appeared 8 times



FILE: test_dataset/test_dataset_1.csv
Maximum number of errors per row: 6

Rozkład liczby błędów w pliku:
Number of errors per row = 0: appeared 52411 times
Number of errors per row = 1: appeared 46883 times
Number of errors per row = 2: appeared 19663 times
Number of errors per row = 3: appeared 5106 times
Number of errors per row = 4: appeared 874 times
Number of errors per row = 5: appeared 105 times
Number of errors per row = 6: appeared 14 times



FILE: test_dataset/test_dataset_2.c

In [44]:
count_errors(train_dataset_ERRORS_path, "awgn_sign_codeword_0")

FILE: train_dataset_ERRORS.csv
Maximum number of errors per row: 8

Rozkład liczby błędów w pliku:
Number of errors per row = 1: appeared 37544 times
Number of errors per row = 2: appeared 22329 times
Number of errors per row = 3: appeared 8173 times
Number of errors per row = 4: appeared 2190 times
Number of errors per row = 5: appeared 402 times
Number of errors per row = 6: appeared 53 times
Number of errors per row = 7: appeared 9 times
Number of errors per row = 8: appeared 1 times


In [45]:
for elem in Eb_N0_test:
    count_errors(f"test_dataset/test_dataset_ERRORS_{elem}.csv", f"awgn_sign_codeword_{elem}")
    print("\n\n")

FILE: test_dataset/test_dataset_ERRORS_0.csv
Maximum number of errors per row: 7

Rozkład liczby błędów w pliku:
Number of errors per row = 1: appeared 46843 times
Number of errors per row = 2: appeared 27972 times
Number of errors per row = 3: appeared 10316 times
Number of errors per row = 4: appeared 2631 times
Number of errors per row = 5: appeared 500 times
Number of errors per row = 6: appeared 74 times
Number of errors per row = 7: appeared 8 times



FILE: test_dataset/test_dataset_ERRORS_1.csv
Maximum number of errors per row: 6

Rozkład liczby błędów w pliku:
Number of errors per row = 1: appeared 46883 times
Number of errors per row = 2: appeared 19663 times
Number of errors per row = 3: appeared 5106 times
Number of errors per row = 4: appeared 874 times
Number of errors per row = 5: appeared 105 times
Number of errors per row = 6: appeared 14 times



FILE: test_dataset/test_dataset_ERRORS_2.csv
Maximum number of errors per row: 5

Rozkład liczby błędów w pliku:
Number of 