In [208]:
import numpy as np

# Generating noisy data

In [209]:
import random

def traffic_signal_answer(truth_value, heads_probability, silent=False):
    first_coin = 'heads' if random.random() < heads_probability else 'tails'
    
    if first_coin == 'heads':
        if not silent: print("DEBUG: Actual data")
        return truth_value
    else:
        if not silent: print("DEBUG: Noise")
        second_coin = random.choice(['heads', 'tails'])
        if second_coin == 'heads':
            return True
        else:
            return False

# Example usage:
truth_value = True  # Assuming the truth value is True, meaning the individual skipped the traffic signal
heads_probability = 0.3  # Probability of the first coin landing on heads
result = traffic_signal_answer(truth_value, heads_probability)
print("Result:", result)  # This will print 1 or 0 based on the rules described


DEBUG: Noise
Result: True


# Full cycle

## Generating much noisy data

### Traffic light data

In [210]:
true_events = 10000
false_events = 50000

true_data = np.concatenate(
    (
        [True] * true_events,
        [False] * false_events
    )
) # True events: 10k Yes; 50k No

print("Ratio of No to Yes events:", false_events/true_events)

Ratio of No to Yes events: 5.0


In [211]:
np.random.shuffle(true_data)
print("TRUE DATA")
print("================")
print("Shape of dataset:", true_data.shape)
print("First 6 elements:",true_data[0:6])

TRUE DATA
Shape of dataset: (60000,)
First 6 elements: [ True False False False False  True]


### Adding noise to data

In [212]:
noisy_data = []
heads_probability = 0.3

for data_point in true_data:
    noisy_data.append(traffic_signal_answer(data_point, heads_probability, silent=True))

In [213]:
print("NOISY DATA")
print("================")
print("Shape of dataset:", len(noisy_data))
print("First 6 elements:",noisy_data[0:6])

NOISY DATA
Shape of dataset: 60000
First 6 elements: [False, False, True, True, True, True]


## Removing noise from the data

In [214]:
count_true = 0
count_false = 0

for data_point in noisy_data:
    if data_point: count_true += 1
    if not data_point: count_false += 1

print("Initial True count:", count_true)
print("Initial False count:", count_false)

Initial True count: 24037
Initial False count: 35963


In [215]:
n_random_answers = len(noisy_data) * (1-heads_probability)
print(n_random_answers)

42000.0


In [216]:
count_true = count_true - n_random_answers / 2
count_false = count_false - n_random_answers / 2

print("Adjusted True count:", count_true)
print("Adjusted False count:", count_false)

Adjusted True count: 3037.0
Adjusted False count: 14963.0


# Calculating error

In [217]:
false_to_true_ratio = count_false / count_true

print("Ratio of predicted No to Yes events:", false_to_true_ratio)

Ratio of predicted No to Yes events: 4.926901547579848


In [218]:
def calculate_error(original, prediction):
    error = abs(original - prediction)
    error_percentage = (error / original) * 100
    return error_percentage

original_result = false_events/true_events
prediction_result = false_to_true_ratio
  
error_percentage = calculate_error(original_result, prediction_result)
formatted_error_percentage = "{:.2f}%".format(error_percentage)
print("Error:", formatted_error_percentage)

Error percentage: 1.46%
