In [13]:
import os
import sys
import glob
import json
from collections import defaultdict

import numpy as np
import plotly.graph_objects as go
import plotly.io as pio
import plotly.express as px
import pandas as pd

In [4]:
root_folder = r"c:\Users\milan\Desktop"
log_files = glob.glob(os.path.join(root_folder, "*.log"))

In [22]:
def parse_log_file(log_file):
    with open(log_file, 'r') as f:
        lines = f.readlines()
        
    lines = lines[2:-1] # first two lines are headers, last line might be incomplete

    data = defaultdict(list)
    for line in lines:
        line = line.strip()
        if not line.startswith(">"):\
            continue
        line = line[1:] # drop >
        line_parts = line.split(",")
        for part in line_parts:
            name, value = part.split(":")
            value = float(value)
            data[name].append(value)

    # print("Parsed data:")
    # for key, values in data.items():
    #     print(f"{key}: {values} (size: {len(values)})")

    df = pd.DataFrame(data)
    return df

all_df = {}
for log_file in log_files:
    print(f"Processing file: {log_file}")
    df = parse_log_file(log_file)
    all_df[os.path.basename(log_file)] = df

Processing file: c:\Users\milan\Desktop\loud_music_max_gain.log
Processing file: c:\Users\milan\Desktop\loud_music_nnormal_gain.log
Processing file: c:\Users\milan\Desktop\no_signal_max_gain.log
Processing file: c:\Users\milan\Desktop\no_signal_normal_gain.log
Processing file: c:\Users\milan\Desktop\quite_music_max_gain.log
Processing file: c:\Users\milan\Desktop\quite_music_normal_gain.log
Processing file: c:\Users\milan\Desktop\silence_max_gain.log
Processing file: c:\Users\milan\Desktop\silence_normal_gain.log


In [26]:
def visualize_pair(filename1, filename2, column_name):
    df1 = all_df[filename1]
    df2 = all_df[filename2]
    
    fig = go.Figure()
    
    # Add trace for first file
    fig.add_trace(go.Scatter(x=df1.index, y=df1[column_name], mode='lines', name=f'{filename1} {column_name}'))
    
    # Add trace for second file
    fig.add_trace(go.Scatter(x=df2.index, y=df2[column_name], mode='lines', name=f'{filename2} {column_name}'))
    
    # Update layout
    fig.update_layout(
        title=f"Comparison of '{column_name}' Signal",
        xaxis_title="Index",
        yaxis_title=f"{column_name} Signal",
        legend_title="Dataset"
    )
    
    return fig

visualize_pair("silence_max_gain.log", "silence_normal_gain.log", "a0").show()
visualize_pair("quite_music_max_gain.log", "quite_music_normal_gain.log", "a0").show()

## Signal Analysis for Music Detection

Let's analyze the characteristics that differentiate music from noise:

In [56]:
def compute_rolling_statistics(df, column='a0', window_size=25):
    """Compute rolling statistics for signal analysis"""
    # Make a copy to avoid modifying the original dataframe
    result = df.copy()
    
    # Compute rolling variance
    result['rolling_var'] = df[column].rolling(window=window_size).var()

    # Compute the derivative of rolling variance
    result['rolling_var_derivative'] = result['rolling_var'].diff()
    
    # Compute variance of variance (using another rolling window)
    var_window = min(window_size * 3, len(df) // 3)  # Larger window for variance of variance
    result['var_of_var'] = result['rolling_var'].rolling(window=var_window).var()
    
    # Drop NaN values from calculations
    result = result[['rolling_var', 'var_of_var', 'rolling_var_derivative', 'a0']].dropna()
    
    return result

# Compute features for each dataset
analysis_results = {}
for filename, df in all_df.items():
    if "max_gain" in filename:
        continue

    statistics = compute_rolling_statistics(df)
    analysis_results[filename] = statistics

    # Visualize all statistics columns in a single plot
    fig = go.Figure()
    columns_to_visualize = ['a0', 'rolling_var', 'var_of_var']
    
    for column in columns_to_visualize:
        fig.add_trace(go.Scatter(
            x=statistics.index,
            y=statistics[column],
            mode='lines',
            name=f"{column} ({filename})"
        ))
        print(column, statistics[column].shape)
    
    # Update layout
    fig.update_layout(
        title=f"Statistics over Time for {filename}",
        xaxis_title="Index",
        yaxis_title="Value",
        legend_title="Statistics"
    )
    
    fig.show()

    
    

a0 (740,)
rolling_var (740,)
var_of_var (740,)


a0 (784,)
rolling_var (784,)
var_of_var (784,)


a0 (771,)
rolling_var (771,)
var_of_var (771,)


a0 (784,)
rolling_var (784,)
var_of_var (784,)


In [28]:
# Compare variance of variance between silence and music
def compare_feature(feature_name):
    fig = go.Figure()
    
    # Add silence data
    silence_data = analysis_results['silence_normal_gain.log']
    fig.add_trace(go.Scatter(x=silence_data.index, y=silence_data[feature_name], 
                             mode='lines', name='Silence'))
    
    # Add quiet music data
    quiet_music = analysis_results['quite_music_normal_gain.log']
    fig.add_trace(go.Scatter(x=quiet_music.index, y=quiet_music[feature_name], 
                             mode='lines', name='Quiet Music'))
    
    # Add loud music data
    loud_music = analysis_results['loud_music_max_gain.log']
    fig.add_trace(go.Scatter(x=loud_music.index, y=loud_music[feature_name], 
                             mode='lines', name='Loud Music'))
    
    fig.update_layout(
        title=f"Comparison of {feature_name} between Silence and Music",
        xaxis_title="Sample Index",
        yaxis_title=feature_name,
        legend_title="Signal Type"
    )
    
    return fig

# Compare variance of variance
compare_feature('var_of_var').show()

# Compare zero crossing rate
compare_feature('zcr').show()

In [29]:
def calculate_metrics_for_detection(df, window_size=50):
    """Calculate metrics useful for music detection"""
    stats = compute_rolling_statistics(df, window_size=window_size)
    
    # Calculate summary statistics
    metrics = {
        'mean_var_of_var': stats['var_of_var'].mean(),
        'std_var_of_var': stats['var_of_var'].std(),
        'mean_zcr': stats['zcr'].mean(),
        'std_rolling_var': stats['rolling_var'].std(),
        'max_rolling_var': stats['rolling_var'].max(),
    }
    
    return metrics

# Calculate metrics for each type
results = {}
for key, df in all_df.items():
    results[key] = calculate_metrics_for_detection(df)
    
results_df = pd.DataFrame(results).T
results_df['signal_type'] = results_df.index.map(lambda x: 'music' if 'music' in x.lower() else 'silence/noise')

# Display results
results_df

Unnamed: 0,mean_var_of_var,std_var_of_var,mean_zcr,std_rolling_var,max_rolling_var,signal_type
loud_music_max_gain.log,237175000.0,292566900.0,0.533893,29877.963819,103073.315918,music
loud_music_nnormal_gain.log,21408820.0,26631300.0,0.505687,8901.365609,30305.089388,music
no_signal_max_gain.log,9.450593,5.309478,0.385146,3.717418,78.785306,silence/noise
no_signal_normal_gain.log,0.01764943,0.00857654,0.632368,0.143323,1.805714,silence/noise
quite_music_max_gain.log,30400.34,39422.99,0.57685,433.021006,1650.622449,music
quite_music_normal_gain.log,32.38697,29.90935,0.520179,9.03674,32.800408,music
silence_max_gain.log,0.008498496,0.005195165,0.529069,0.107987,1.092245,silence/noise
silence_normal_gain.log,0.004800761,0.006122252,0.414123,0.069684,0.5,silence/noise


In [30]:
# Simple threshold-based detection function
def is_music_signal(signal_data, var_of_var_threshold=0.5, zcr_variability_threshold=0.02):
    """Determine if signal is music based on statistical properties"""
    # Calculate metrics
    stats = compute_rolling_statistics(signal_data)
    
    # Check if variance of variance is above threshold
    var_of_var_criteria = stats['var_of_var'].mean() > var_of_var_threshold
    
    # Check if zero crossing rate has significant variability (music tends to have more variable ZCR)
    zcr_variability = stats['zcr'].std()
    zcr_criteria = zcr_variability > zcr_variability_threshold
    
    # Combine criteria
    return var_of_var_criteria or zcr_criteria

# Test the detector on our datasets
for filename, df in all_df.items():
    result = is_music_signal(df)
    expected = 'music' in filename.lower()
    correct = result == expected
    print(f"{filename}: Detected as music: {result} - {'CORRECT' if correct else 'INCORRECT'}")  

loud_music_max_gain.log: Detected as music: True - CORRECT
loud_music_nnormal_gain.log: Detected as music: True - CORRECT
no_signal_max_gain.log: Detected as music: True - INCORRECT
no_signal_normal_gain.log: Detected as music: True - INCORRECT
quite_music_max_gain.log: Detected as music: True - CORRECT
quite_music_normal_gain.log: Detected as music: True - CORRECT
silence_max_gain.log: Detected as music: True - INCORRECT
silence_normal_gain.log: Detected as music: True - INCORRECT
