In [12]:
import numpy as np

# Generating noisy data

In [41]:
import random

def traffic_signal_answer(truth_value, heads_probability, silent=False):
    first_coin = 'heads' if random.random() < heads_probability else 'tails'
    
    if first_coin == 'heads':
        if not silent: print("DEBUG: Actual data")
        return truth_value
    else:
        if not silent: print("DEBUG: Noise")
        second_coin = random.choice(['heads', 'tails'])
        if second_coin == 'heads':
            return True
        else:
            return False

# Example usage:
truth_value = True  # Assuming the truth value is True, meaning the individual skipped the traffic signal
heads_probability = 0.3  # Probability of the first coin landing on heads
result = traffic_signal_answer(truth_value, heads_probability)
print("Result:", result)  # This will print 1 or 0 based on the rules described


DEBUG: Actual data
Result: True


# Full cycle

## Generating much noisy data

### Traffic light data

In [3]:
yes_events = 10000
no_events = 50000

real_data = np.concatenate(
    (
        [True] * yes_events,
        [False] * no_events
    )
) # True events: 10k Yes; 50k No

print("Ratio of No to Yes events:", no_events/yes_events)

Ratio of No to Yes events: 5.0


In [4]:
np.random.shuffle(real_data)
print("REAL DATA")
print("================")
print("Shape of dataset:", real_data.shape)
print("First 6 elements:",real_data[0:6])

REAL DATA
Shape of dataset: (60000,)
First 6 elements: [False False False False  True  True]


### Adding noise to data

In [5]:
noisy_data = []
heads_probability = 0.5

for data_point in real_data:
    noisy_data.append(traffic_signal_answer(data_point, heads_probability, silent=True))

In [6]:
print("NOISY DATA")
print("================")
print("Shape of dataset:", len(noisy_data))
print("First 6 elements:",noisy_data[0:6])

NOISY DATA
Shape of dataset: 60000
First 6 elements: [False, True, False, False, True, False]


## Removing noise from the data

In [7]:
count_noisy_yes = 0
count_noisy_no = 0

for data_point in noisy_data:
    if data_point: count_noisy_yes += 1
    if not data_point: count_noisy_no += 1

print("Initial True count:", count_noisy_yes)
print("Initial False count:", count_noisy_no)

Initial True count: 19938
Initial False count: 40062


In [8]:
n_random_answers = len(noisy_data) * (1-heads_probability)
print(n_random_answers)

30000.0


In [9]:
denoised_yes = int( count_noisy_yes - n_random_answers / 2 )
denoised_no = int( count_noisy_no - n_random_answers / 2 )

print("Adjusted True count:", denoised_yes)
print("Adjusted False count:", denoised_no)

Adjusted True count: 4938
Adjusted False count: 25062


# Calculating error

In [10]:
estimated_no_to_yes_ratio = denoised_no / denoised_yes

print("Ratio of predicted No to Yes events:", estimated_no_to_yes_ratio)

Ratio of predicted No to Yes events: 5.075334143377885


In [11]:
def calculate_error(original, prediction):
    error = abs(original - prediction)
    error_percentage = (error / original) * 100
    return error_percentage

original_result = no_events/yes_events
prediction_result = estimated_no_to_yes_ratio
  
error_percentage = calculate_error(original_result, prediction_result)
formatted_error_percentage = "{:.2f}%".format(error_percentage)
print("Error:", formatted_error_percentage)

Error: 1.51%
