In [2]:
import pandas as pd
import numpy as np
from scipy.stats import norm

# Load the dataset
file_path = "railway.csv"
df = pd.read_csv(file_path)

# Extract the "Price" column and remove missing values
prices = df["Price"].dropna().values

def run_test(sequence):
    """
    Perform the Run Test to check if values are randomly distributed.
    """
    median = np.median(sequence)
    signs = np.where(sequence >= median, 1, 0)  # Convert values to 1 and 0 based on median
    
    # Count the number of runs
    runs = np.sum(np.diff(signs) != 0) + 1
    n1 = np.sum(signs)  # Count of values above or equal to median
    n2 = len(sequence) - n1  # Count of values below median

    # Calculate expected runs and standard deviation
    expected_runs = ((2 * n1 * n2) / (n1 + n2)) + 1
    std_dev_runs = np.sqrt((2 * n1 * n2 * (2 * n1 * n2 - n1 - n2)) / ((n1 + n2) ** 2 * (n1 + n2 - 1)))

    # Compute Z-score and p-value
    z_score = (runs - expected_runs) / std_dev_runs
    p_value = 2 * (1 - norm.cdf(abs(z_score)))
    
    return runs, expected_runs, std_dev_runs, z_score, p_value

# Apply the test
run_test_results = run_test(prices)

# Display results
print("Run Test Results:")
print(f"Number of Runs: {run_test_results[0]}")
print(f"Expected Runs: {run_test_results[1]:.2f}")
print(f"Standard Deviation of Runs: {run_test_results[2]:.2f}")
print(f"Z-score: {run_test_results[3]:.2f}")
print(f"p-value: {run_test_results[4]:.5f}")

# Interpretation
if run_test_results[4] < 0.05:
    print("The data is not randomly distributed (Reject Null Hypothesis).")
else:
    print("The data appears to be randomly distributed (Fail to Reject Null Hypothesis).")

Run Test Results:
Number of Runs: 13966
Expected Runs: 15824.62
Standard Deviation of Runs: 0.94
Z-score: -1977.11
p-value: 0.00000
The data is not randomly distributed (Reject Null Hypothesis).


  std_dev_runs = np.sqrt((2 * n1 * n2 * (2 * n1 * n2 - n1 - n2)) / ((n1 + n2) ** 2 * (n1 + n2 - 1)))
