In [2]:
import os
import librosa
import numpy as np
import scipy.stats as stats
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set Seaborn style
sns.set_style("whitegrid")

# Define categories
categories = {
    "SpiritualMeditation": 0,
    "Music": 1,
    "Normal(Silence)": 2
}

# Define feature names
feature_names = (
    ["MFCC_{}".format(i) for i in range(13)] +  # 13 MFCC features
    ["ZCR", "RMS"] +  # 2 Time-domain features
    ["Wavelet_{}".format(i) for i in range(10)]  # 10 Wavelet features
)

# Function to extract features
def extract_features(file_path):
    waveform, sr = librosa.load(file_path, sr=16000)

    # MFCC Features
    mfccs = librosa.feature.mfcc(y=waveform, sr=sr, n_mfcc=13).mean(axis=1)

    # Time-Domain Features
    zcr = librosa.feature.zero_crossing_rate(waveform)[0].mean()
    rms = librosa.feature.rms(y=waveform)[0].mean()

    # Wavelet Transform Features
    coeffs = librosa.feature.mfcc(y=waveform, sr=sr, n_mfcc=10).mean(axis=1)

    # Combine features
    return np.concatenate([mfccs, [zcr, rms], coeffs])

# Load dataset
root_dir = "/kaggle/input/qmsat-dataset/ATS-data"
data = []

for category, label in categories.items():
    category_path = os.path.join(root_dir, category)
    if not os.path.exists(category_path):
        print(f"Skipping {category} (Folder not found)")
        continue
    for file in os.listdir(category_path):
        if file.endswith(".wav"):
            features = extract_features(os.path.join(category_path, file))
            data.append([category] + list(features))

# Convert to DataFrame
df = pd.DataFrame(data, columns=["Category"] + feature_names)

# Initialize calmness counter
calmness_count = {"SpiritualMeditation": 0, "Music": 0, "Normal(Silence)": 0}

# Create a results dictionary for logging
results = []

# Perform statistical tests for each feature
for feature in feature_names:
    spiritual_meditation_values = df[df["Category"] == "SpiritualMeditation"][feature]
    music_values = df[df["Category"] == "Music"][feature]
    normal_values = df[df["Category"] == "Normal(Silence)"][feature]

    # Calculate mean values
    mean_spiritual_meditation = np.mean(spiritual_meditation_values)
    mean_music = np.mean(music_values)
    mean_normal = np.mean(normal_values)

    # ANOVA test (Compare all three distributions)
    f_stat, p_anova = stats.f_oneway(spiritual_meditation_values, music_values, normal_values)

    # T-tests (Pairwise comparisons)
    t_sm_m, p_sm_m = stats.ttest_ind(spiritual_meditation_values, music_values, equal_var=False)
    t_sm_n, p_sm_n = stats.ttest_ind(spiritual_meditation_values, normal_values, equal_var=False)
    t_mn, p_mn = stats.ttest_ind(music_values, normal_values, equal_var=False)

    # Determine calmest category
    calmest = "SpiritualMeditation" if mean_spiritual_meditation < mean_music and mean_spiritual_meditation < mean_normal else \
              "Music" if mean_music < mean_spiritual_meditation and mean_music < mean_normal else \
              "Normal(Silence)"

    # Print results
    print(f"Feature: {feature}")
    print(f"Mean values: SpiritualMeditation = {mean_spiritual_meditation:.4f}, Normal = {mean_normal:.4f}, Music = {mean_music:.4f}")
    print(f"Comparison: {feature}(SpiritualMeditation) {'<' if mean_spiritual_meditation < mean_normal else '>'} {feature}(Normal) {'<' if mean_normal < mean_music else '>'} {feature}(Music)")
    print(f"ANOVA p-value: {p_anova:.4f}")
    print(f"T-tests: SpiritualMeditation vs Music p = {p_sm_m:.4f}, SpiritualMeditation vs Normal p = {p_sm_n:.4f}, Music vs Normal p = {p_mn:.4f}")
    print(f"Calmest category based on {feature}: {calmest}\n")

    # Increment calmness counter
    calmness_count[calmest] += 1

    # Save results
    results.append([feature, mean_spiritual_meditation, mean_music, mean_normal, calmest, p_anova, p_sm_m, p_sm_n, p_mn])

    # Visualization
    plt.figure(figsize=(8, 5))
    sns.boxplot(x="Category", y=feature, data=df, palette="coolwarm")
    plt.title(f"{feature} Distribution across Categories")
    plt.xlabel("Category")
    plt.ylabel(feature)
    plt.savefig(f"/kaggle/working/feature_{feature}.jpeg", dpi=300)  # Save graph as JPEG
    plt.close()

# Print final calmness counts
print("\n=== Calmness Count Summary ===")
for category, count in calmness_count.items():
    print(f"{category}: {count} features")

# Determine the overall calmest category
overall_calmest = max(calmness_count, key=calmness_count.get)
print(f"\nThe overall calmest category based on all features is: {overall_calmest}")

# Save results to an Excel file
df_results = pd.DataFrame(results, columns=[
    "Feature", "Mean SpiritualMeditation", "Mean Music", "Mean Normal",
    "Calmest Category", "ANOVA p-value", "T-test SpiritualMeditation vs Music",
    "T-test SpiritualMeditation vs Normal", "T-test Music vs Normal"
])
df_results.to_excel("/kaggle/working/calmness_analysis_results.xlsx", index=False)

print("\nResults saved to calmness_analysis_results.xlsx and feature visualizations saved as JPEG.")


Feature: MFCC_0
Mean values: SpiritualMeditation = -254.2791, Normal = -249.5712, Music = -245.2362
Comparison: MFCC_0(SpiritualMeditation) < MFCC_0(Normal) < MFCC_0(Music)
ANOVA p-value: 0.8719
T-tests: SpiritualMeditation vs Music p = 0.5749, SpiritualMeditation vs Normal p = 0.7915, Music vs Normal p = 0.8051
Calmest category based on MFCC_0: SpiritualMeditation

Feature: MFCC_1
Mean values: SpiritualMeditation = 93.7285, Normal = 87.9650, Music = 99.2803
Comparison: MFCC_1(SpiritualMeditation) > MFCC_1(Normal) < MFCC_1(Music)
ANOVA p-value: 0.6528
T-tests: SpiritualMeditation vs Music p = 0.6550, SpiritualMeditation vs Normal p = 0.6439, Music vs Normal p = 0.3522
Calmest category based on MFCC_1: Normal(Silence)

Feature: MFCC_2
Mean values: SpiritualMeditation = -6.4426, Normal = -9.1582, Music = -3.7299
Comparison: MFCC_2(SpiritualMeditation) > MFCC_2(Normal) < MFCC_2(Music)
ANOVA p-value: 0.7109
T-tests: SpiritualMeditation vs Music p = 0.6854, SpiritualMeditation vs Normal p =