<a href="https://colab.research.google.com/github/sravyasambaturu/wilcoxon_tests/blob/main/wilcoxon_tests.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install matplotlib seaborn
!pip install xlsxwriter
!pip install natsort  # Install natsort for natural sorting
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from google.colab import files
from scipy.stats import wilcoxon
from natsort import natsorted # Import natsorted

# Upload multiple files
uploaded = files.upload()

metrics = ['CountLineCode', 'MaxCyclomatic', 'CountClassBase', 'CountClassCoupled', 'CountClassDerived', 'CountDeclMethodAll', 'CountDeclInstanceMethod', 'MaxInheritanceTree', 'CountDeclMethod', 'CountDeclInstanceVariable', 'PercentLackOfCohesion']
dfs = []  # Store DataFrames for each version

# Process each uploaded file
for filename in uploaded.keys():
    version = filename  # Use the entire filename as the version

    # Explicitly specify the engine based on file extension
    if filename.endswith('.csv'):
        df = pd.read_csv(filename)  # Read as CSV
    elif filename.endswith('.xlsx') or filename.endswith('.xls'):
        df = pd.read_excel(filename, engine='openpyxl')  # Read as Excel
    else:
        print(f"Skipping file {filename}: Unsupported format")
        continue  # Skip to the next file

    # Remove leading/trailing whitespace and any tab characters from column names:
    df.columns = df.columns.str.strip().str.replace('\t', '')
    df['Version'] = version  # Add a 'Version' column
    dfs.append(df)

# Concatenate all DataFrames into a single DataFrame
all_data = pd.concat(dfs, ignore_index=True)

# --- Sort by Version using natsort ---
unique_versions = natsorted(all_data['Version'].unique())
all_data['Version'] = pd.Categorical(all_data['Version'], categories=unique_versions, ordered=True)
all_data.sort_values(by=['Version'], inplace=True)

# --- Identify and print first and last versions ---
first_version = all_data['Version'].iloc[0]
last_version = all_data['Version'].iloc[-1]

print(f"\nFirst Version: {first_version}")
print(f"Last Version: {last_version}\n")

# --- Wilcoxon Signed-Rank Test for CountLineCode across all versions ---
print("\nWilcoxon Signed-Rank Test for CountLineCode across versions:")
for i in range(len(all_data['Version'].unique()) - 1):
    version1 = all_data['Version'].unique()[i]
    version2 = all_data['Version'].unique()[i + 1]

    data1 = all_data[all_data['Version'] == version1]['CountLineCode']
    data2 = all_data[all_data['Version'] == version2]['CountLineCode']

    statistic, p_value = wilcoxon(data1, data2, alternative='less')

    print(f"Comparing {version1} vs. {version2}:")
    print(f"  Statistic: {statistic}")
    print(f"  P-value: {p_value}")
    if p_value < 0.05:
        print(f"  Significant difference (p < 0.05), supporting the hypothesis that CountLineCode is higher in {version2}.")
    else:
        print(f"  No significant difference (p >= 0.05).")
    print("-" * 30)

# --- Wilcoxon Signed-Rank Test for all metrics between first and last versions ---
first_version_data = all_data[all_data['Version'] == first_version][metrics]
last_version_data = all_data[all_data['Version'] == last_version][metrics]

print("\nWilcoxon Signed-Rank Test Results (First vs. Last Version):")
wilcoxon_results = [] # Store results for Excel
for metric in metrics:
    # Perform Wilcoxon test (one-sided, assuming last version is greater)
    statistic, p_value = wilcoxon(first_version_data[metric], last_version_data[metric], alternative='less')

    print(f"Metric: {metric}")
    print(f"  Statistic: {statistic}")
    print(f"  P-value: {p_value}")

    # Check for statistical significance (e.g., p-value < 0.05)
    if p_value < 0.05:
        print(f"  Significant difference (p < 0.05), supporting the hypothesis that {metric} is higher in the last version.")
    else:
        print(f"  No significant difference (p >= 0.05).")
    print("-" * 30)  # Separator

    wilcoxon_results.append([metric, statistic, p_value]) # Append to results list

# Save the results to an Excel file
output_filename = 'wilcoxon_test_results.xlsx'
with pd.ExcelWriter(output_filename, engine='xlsxwriter') as writer:
    pd.DataFrame(wilcoxon_results, columns=['Metric', 'Statistic', 'P-value']).to_excel(writer, sheet_name='Wilcoxon Test', index=False)

# Download the Excel file
files.download(output_filename)

Collecting xlsxwriter
  Downloading XlsxWriter-3.2.2-py3-none-any.whl.metadata (2.8 kB)
Downloading XlsxWriter-3.2.2-py3-none-any.whl (165 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m165.1/165.1 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: xlsxwriter
Successfully installed xlsxwriter-3.2.2


Saving testng-5.13.csv to testng-5.13.csv
Saving testng-7.11.0.csv to testng-7.11.0.csv
Saving testng-7.5.csv to testng-7.5.csv
Saving testng-6.13.1.csv to testng-6.13.1.csv
Saving testng-6.0.1.csv to testng-6.0.1.csv

First Version: testng-5.13.csv
Last Version: testng-7.11.0.csv


Wilcoxon Signed-Rank Test for CountLineCode across versions:
Comparing testng-5.13.csv vs. testng-6.0.1.csv:
  Statistic: nan
  P-value: nan
  No significant difference (p >= 0.05).
------------------------------
Comparing testng-6.0.1.csv vs. testng-6.13.1.csv:
  Statistic: nan
  P-value: nan
  No significant difference (p >= 0.05).
------------------------------
Comparing testng-6.13.1.csv vs. testng-7.5.csv:
  Statistic: nan
  P-value: nan
  No significant difference (p >= 0.05).
------------------------------
Comparing testng-7.5.csv vs. testng-7.11.0.csv:
  Statistic: nan
  P-value: nan
  No significant difference (p >= 0.05).
------------------------------

Wilcoxon Signed-Rank Test Results (First vs.

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>