In [2]:
import pandas as pd

# Load the dataset
file_path = 'pennines_binned_ksn_9.csv'
data = pd.read_csv(file_path)


In [7]:
#KS test ksn
from scipy.stats import ks_2samp

# Ensure bins are numeric
data['ksn_bin_midpoint'] = pd.to_numeric(data['ksn_bin_midpoint'], errors='coerce')

# Iterate through all pairs of bins
unique_bins = sorted(data['ksn_bin_midpoint'].unique())
ks_results = []

for i in range(len(unique_bins)):
    for j in range(i + 1, len(unique_bins)):
        bin_1 = unique_bins[i]
        bin_2 = unique_bins[j]
        
        # Extract sinuosity values for each bin
        sinuosity_1 = data[data['ksn_bin_midpoint'] == bin_1]['sinuosity']
        sinuosity_2 = data[data['ksn_bin_midpoint'] == bin_2]['sinuosity']
        
        # Perform KS test
        ks_stat, p_value = ks_2samp(sinuosity_1, sinuosity_2)
        
        # Store results
        ks_results.append({
            'Ksn Bin Midpoint 1': bin_1,
            'Ksn Bin Midpoint 2': bin_2,
            'KS Statistic': ks_stat,
            'P-value': p_value
        })

# Convert results to a DataFrame for easy viewing
ks_df = pd.DataFrame(ks_results)

# Display the results
print(ks_df)
#save
output_file = "ksn_ks_test.csv"
ks_df.to_csv(output_file, index=False)


    Ksn Bin Midpoint 1  Ksn Bin Midpoint 2  KS Statistic       P-value
0              3.11475              8.4930      0.131579  1.440340e-01
1              3.11475             12.7015      0.134934  1.133785e-01
2              3.11475             17.1420      0.210526  2.307139e-03
3              3.11475             21.8765      0.300453  1.448038e-06
4              3.11475             26.1535      0.322368  2.190841e-07
5              3.11475             31.0325      0.360404  2.160305e-09
6              3.11475             38.5335      0.473684  8.585439e-16
7              3.11475             85.9650      0.407895  1.072822e-11
8              8.49300             12.7015      0.126917  1.605112e-01
9              8.49300             17.1420      0.164474  3.258970e-02
10             8.49300             21.8765      0.286816  5.699968e-06
11             8.49300             26.1535      0.276316  1.629134e-05
12             8.49300             31.0325      0.359664  3.241741e-09
13    

In [8]:
import pandas as pd

# Load the dataset
file_path = 'pennines_binned_grad_9.csv'
data = pd.read_csv(file_path)


In [9]:
#KS test gradient
from scipy.stats import ks_2samp

# Ensure bins are numeric
data['gradient_bin_midpoint'] = pd.to_numeric(data['gradient_bin_midpoint'], errors='coerce')

# Iterate through all pairs of bins
unique_bins = sorted(data['gradient_bin_midpoint'].unique())
ks_results = []

for i in range(len(unique_bins)):
    for j in range(i + 1, len(unique_bins)):
        bin_1 = unique_bins[i]
        bin_2 = unique_bins[j]
        
        # Extract sinuosity values for each bin
        sinuosity_1 = data[data['gradient_bin_midpoint'] == bin_1]['sinuosity']
        sinuosity_2 = data[data['gradient_bin_midpoint'] == bin_2]['sinuosity']
        
        # Perform KS test
        ks_stat, p_value = ks_2samp(sinuosity_1, sinuosity_2)
        
        # Store results
        ks_results.append({
            'Gradient Bin Midpoint 1': bin_1,
            'Gradient Bin Midpoint 2': bin_2,
            'KS Statistic': ks_stat,
            'P-value': p_value
        })

# Convert results to a DataFrame for easy viewing
ks_df = pd.DataFrame(ks_results)

# Display the results
print(ks_df)
#save
output_file = "grad_ks_test.csv"
ks_df.to_csv(output_file, index=False)

    Gradient Bin Midpoint 1  Gradient Bin Midpoint 2  KS Statistic  \
0                  0.002130                 0.007455      0.072368   
1                  0.002130                 0.011775      0.161119   
2                  0.002130                 0.016900      0.217105   
3                  0.002130                 0.023500      0.254400   
4                  0.002130                 0.031750      0.250000   
5                  0.002130                 0.042200      0.393430   
6                  0.002130                 0.055800      0.401316   
7                  0.002130                 0.134300      0.473684   
8                  0.007455                 0.011775      0.167698   
9                  0.007455                 0.016900      0.184211   
10                 0.007455                 0.023500      0.260849   
11                 0.007455                 0.031750      0.243421   
12                 0.007455                 0.042200      0.406544   
13                 0

In [2]:
import pandas as pd

# Load the dataset
file_path = 'pennines_binned_grad_9.csv'
data = pd.read_csv(file_path)

In [3]:
#moods median test gradient bins
from scipy.stats import median_test
import pandas as pd

# Ensure bins are numeric
data['gradient_bin_midpoint'] = pd.to_numeric(data['gradient_bin_midpoint'], errors='coerce')

# Iterate through all pairs of bins
unique_bins = sorted(data['gradient_bin_midpoint'].unique())
mood_results = []

for i in range(len(unique_bins)):
    for j in range(i + 1, len(unique_bins)):
        bin_1 = unique_bins[i]
        bin_2 = unique_bins[j]
        
        # Extract sinuosity values for each bin
        sinuosity_1 = data[data['gradient_bin_midpoint'] == bin_1]['sinuosity']
        sinuosity_2 = data[data['gradient_bin_midpoint'] == bin_2]['sinuosity']
        
        # Perform Mood's median test
        stat, p_value, _, _ = median_test(sinuosity_1, sinuosity_2)
        
        # Store results
        mood_results.append({
            'Gradient Bin Midpoint 1': bin_1,
            'Gradient Bin Midpoint 2': bin_2,
            'Mood’s Median Statistic': stat,
            'P-value': p_value
        })

# Convert results to a DataFrame for easy viewing
mood_df = pd.DataFrame(mood_results)

# Display the results
print(mood_df)

# Save to CSV
output_file = "grad_moods_median_test.csv"
mood_df.to_csv(output_file, index=False)


    Gradient Bin Midpoint 1  Gradient Bin Midpoint 2  Mood’s Median Statistic  \
0                  0.002130                 0.007455                 0.118421   
1                  0.002130                 0.011775                 1.746414   
2                  0.002130                 0.016900                 4.750000   
3                  0.002130                 0.023500                16.638873   
4                  0.002130                 0.031750                18.013158   
5                  0.002130                 0.042200                29.788197   
6                  0.002130                 0.055800                36.960526   
7                  0.002130                 0.134300                59.065789   
8                  0.007455                 0.011775                 1.746414   
9                  0.007455                 0.016900                 3.802632   
10                 0.007455                 0.023500                16.638873   
11                 0.007455 

In [4]:
import pandas as pd

# Load the dataset
file_path = 'pennines_binned_ksn_9.csv'
data = pd.read_csv(file_path)

In [5]:
from scipy.stats import median_test
import pandas as pd

# Ensure bins are numeric
data['ksn_bin_midpoint'] = pd.to_numeric(data['ksn_bin_midpoint'], errors='coerce')

# Iterate through all pairs of bins
unique_bins = sorted(data['ksn_bin_midpoint'].unique())
mood_results = []

for i in range(len(unique_bins)):
    for j in range(i + 1, len(unique_bins)):
        bin_1 = unique_bins[i]
        bin_2 = unique_bins[j]
        
        # Extract sinuosity values for each bin
        sinuosity_1 = data[data['ksn_bin_midpoint'] == bin_1]['sinuosity']
        sinuosity_2 = data[data['ksn_bin_midpoint'] == bin_2]['sinuosity']
        
        # Perform Mood's median test
        stat, p_value, _, _ = median_test(sinuosity_1, sinuosity_2)
        
        # Store results
        mood_results.append({
            'Ksn Bin Midpoint 1': bin_1,
            'Ksn Bin Midpoint 2': bin_2,
            'Mood’s Median Statistic': stat,
            'P-value': p_value
        })

# Convert results to a DataFrame for easy viewing
mood_df = pd.DataFrame(mood_results)

# Display the results
print(mood_df)

# Save to CSV
output_file = "ksn_moods_median_test.csv"
mood_df.to_csv(output_file, index=False)


    Ksn Bin Midpoint 1  Ksn Bin Midpoint 2  Mood’s Median Statistic  \
0              3.11475              8.4930                 0.328947   
1              3.11475             12.7015                 2.406581   
2              3.11475             17.1420                 2.223684   
3              3.11475             21.8765                22.738277   
4              3.11475             26.1535                22.118421   
5              3.11475             31.0325                35.016208   
6              3.11475             38.5335                55.592105   
7              3.11475             85.9650                39.802632   
8              8.49300             12.7015                 0.742917   
9              8.49300             17.1420                 0.644737   
10             8.49300             21.8765                16.638873   
11             8.49300             26.1535                16.118421   
12             8.49300             31.0325                32.349396   
13    

In [9]:
import pandas as pd
from scipy.stats import kruskal

# Load your dataset (assuming it contains 'sinuosity' and 'LE' columns)
df = pd.read_csv("pennines_binned_grad_9.csv")

# Step 1: Group by erodibility categories (LE)
erodibility_groups = df.groupby('LE')['sinuosity'].apply(list)

# Step 2: Perform Kruskal-Wallis H test to see if there's a significant difference in sinuosity distributions across LE categories
stat, p_value = kruskal(*erodibility_groups)

# Print the results
print(f"Kruskal-Wallis H statistic: {stat}")
print(f"P-value: {p_value}")

# Optionally, save the results to a CSV file
results = pd.DataFrame({'statistic': [stat], 'p_value': [p_value]})
results.to_csv("sinuosity_vs_erodibility_kw_test.csv", index=False)


Kruskal-Wallis H statistic: 39.363427606020466
P-value: 4.594422794174668e-05
