In [2]:
import os
from collections import defaultdict
from fuzzywuzzy import fuzz
import re
import pandas as pd

In [3]:
# List of reviewers
reviewers = [
    'achoreviews',
    'aftersound',
    'animagus',
    'arn',
    'bedrock',
    'bryaudioreviews',
    'cammyfi',
    'soundjedi',
    'eplv',
    'timmyv',
    'harpo',
    'hbb',
    'cqtek',
    'hobbytalk',
    'ianfann',
    'iemworld',
    'jacstone',
    'kr0mka',
    'kurin',
    'melatonin',
    'nymz', 
    'pw',
    'recode',
    'rg',
    'shortbus',
    'suporsalad',
    'tgx78',
    'vortexreviews',
    'vsg',
    'wdym',
    'akros',
    'data_mrs'
]

In [4]:
def remove_channel_suffix(file_name):
    file_name = file_name.lower()
    file_name = re.sub(r'\(.*?\)', '', file_name)  # Remove content in brackets
    file_name = file_name.strip()  # Remove leading and trailing spaces

    if " l.csv" in file_name:
        file_name = file_name.replace(" l.csv", ".csv")
    elif " r.csv" in file_name:
        file_name = file_name.replace(" r.csv", ".csv")

    return file_name.strip()  # Remove any extra spaces after channel suffix removal

In [5]:
def group_files_by_iem(files, threshold=100):
    groups = defaultdict(list)
    added_files = set()

    # Create a special "target" group
    target_group = "target"

    for file1 in files:
        modified_file1 = remove_channel_suffix(file1)

        # Check if the file name contains "target"
        if "target" in file1.lower():
            if file1 not in added_files:
                groups[target_group].append(file1)
                added_files.add(file1)
            continue

        if file1 not in added_files:
            groups[file1].append(file1)
            added_files.add(file1)

        for file2 in files:
            if file2 not in added_files:
                modified_file2 = remove_channel_suffix(file2)
                similarity = fuzz.token_set_ratio(modified_file1, modified_file2)

                # If the similarity score is above the threshold, add the file to the group
                if similarity >= threshold:
                    groups[file1].append(file2)
                    added_files.add(file2)
                # If the similarity score is between 95 and the threshold, compare only the IEM part
                elif 99 <= similarity < threshold:
                    iem1 = re.sub(r'[^a-zA-Z0-9]', '', modified_file1.split()[1]) if len(modified_file1.split()) > 1 else ''
                    iem2 = re.sub(r'[^a-zA-Z0-9]', '', modified_file2.split()[1]) if len(modified_file2.split()) > 1 else ''
                    iem_similarity = fuzz.token_set_ratio(iem1, iem2)
                    
                    if iem_similarity >= threshold:
                        groups[file1].append(file2)
                        added_files.add(file2)

    return groups

In [6]:
# Gather all file names
all_files = []

In [7]:
for reviewer in reviewers:
    reviewer_path = f'output/csv/{reviewer}'

    if os.path.exists(reviewer_path):
        files = os.listdir(reviewer_path)
        all_files.extend([(reviewer, file) for file in files])

In [8]:
# Group files by IEM
grouped_files = group_files_by_iem([file for _, file in all_files])

In [9]:
print(f"Total number of files: {len(all_files)}")
print(f"Total number of groups: {len(grouped_files)}")
print(f"Total number of files in groups: {sum([len(files) for files in grouped_files.values()])}")
print(f"Total number of files not in groups: {len(all_files) - sum([len(files) for files in grouped_files.values()])}")
print(f"\nAvg number of files per group: {sum([len(files) for files in grouped_files.values()]) / len(grouped_files)}")

Total number of files: 9882
Total number of groups: 2356
Total number of files in groups: 7668
Total number of files not in groups: 2214

Avg number of files per group: 3.2546689303904923


In [10]:
# Print grouped files
for representative, files in grouped_files.items():
    print(f"{representative}:")
    for file in files:
        reviewers_with_file = [reviewer for reviewer, f in all_files if f == file]
        print(f"  {file} - {', '.join(reviewers_with_file)}")
    print()

7Hz Dioko L.csv:
  7Hz Dioko L.csv - achoreviews, bedrock, timmyv, iemworld, data_mrs
  7HZ SALNOTES DIOKO L.csv - aftersound
  7HZ SALNOTES DIOKO R.csv - aftersound
  7Hz Salnotes Dioko L.csv - arn, pw
  7Hz Dioko R.csv - bedrock, timmyv, hbb, iemworld, data_mrs
  7hz Dioko L.csv - hobbytalk
  7hz Dioko R.csv - hobbytalk
  7HZ DIOKO (bass mod) L.csv - ianfann
  7HZ DIOKO (bass mod) R.csv - ianfann
  7HZ DIOKO L.csv - ianfann
  7HZ DIOKO R.csv - ianfann
  Dioko L.csv - kr0mka
  Dioko R.csv - kr0mka
  Resolve 7Hz Salnotes Dioko Gras L.csv - kurin
  7hz Salnotes Dioko L.csv - nymz
  7hz Salnotes Dioko R.csv - nymz
  7Hz Salnotes Dioko R.csv - pw
  7Hz Salnotes Dioko TapedVent L.csv - pw
  7hz x Crinacle Dioko L.csv - vortexreviews
  7hz x Crinacle Dioko R.csv - vortexreviews
  7hz x Crinacle Dioko Updated L.csv - vortexreviews
  7hz x Crinacle Dioko Updated R.csv - vortexreviews
  7Hz x Crinacle Salnotes Dioko L.csv - vsg
  7Hz x Crinacle Salnotes Dioko R.csv - vsg
  7Hz Dioko (post-op) 

### Dataframe

In [11]:
# ... (previous code remains unchanged)

# Prepare the data for the DataFrame
iem_data = defaultdict(lambda: defaultdict(list))

for representative, files in grouped_files.items():
    for file in files:
        reviewers_with_file = [reviewer for reviewer, f in all_files if f == file]
        for reviewer in reviewers_with_file:
            iem_data[representative][reviewer].append(file)

# Convert the nested dictionary to a pandas DataFrame
iem_df = pd.DataFrame.from_dict(iem_data, orient='index')

# Fill empty cells with an empty list
iem_df = iem_df.applymap(lambda x: x if isinstance(x, list) else [])


In [12]:
iem_df

Unnamed: 0,achoreviews,bedrock,timmyv,iemworld,data_mrs,aftersound,arn,pw,hbb,hobbytalk,...,recode,animagus,bryaudioreviews,cammyfi,eplv,melatonin,shortbus,suporsalad,wdym,akros
7Hz Dioko L.csv,[7Hz Dioko L.csv],"[7Hz Dioko L.csv, 7Hz Dioko R.csv]","[7Hz Dioko L.csv, 7Hz Dioko R.csv]","[7Hz Dioko L.csv, 7Hz Dioko R.csv]","[7Hz Dioko L.csv, 7Hz Dioko R.csv, 7Hz Dioko (...","[7HZ SALNOTES DIOKO L.csv, 7HZ SALNOTES DIOKO ...",[7Hz Salnotes Dioko L.csv],"[7Hz Salnotes Dioko L.csv, 7Hz Salnotes Dioko ...",[7Hz Dioko R.csv],"[7hz Dioko L.csv, 7hz Dioko R.csv]",...,[],[],[],[],[],[],[],[],[],[]
7Hz Eternal L.csv,[7Hz Eternal L.csv],"[7Hz Eternal L.csv, 7Hz Eternal R.csv]",[],"[7Hz ETERNAL L.csv, 7Hz ETERNAL R.csv]",[],[],[],[],[7Hz Eternal R.csv],[],...,[],[],[],[],[],[],[],[],[],[]
7Hz Legato L.csv,[7Hz Legato L.csv],[],"[7Hz Legato L.csv, 7Hz Legato R.csv]",[],[],"[7HZ LEGATO L.csv, 7HZ LEGATO R.csv]",[],[],[7Hz Legato R.csv],[],...,[],[],[],[],[],[],[],[],[],[]
7Hz Timeless L.csv,[7Hz Timeless L.csv],[],"[7Hz Timeless L.csv, 7Hz Timeless R.csv, 7Hz T...","[7Hz Timeless (FINAL Tips) L.csv, 7Hz Timeless...","[7Hz Timeless L.csv, 7Hz Timeless R.csv, 7Hz T...","[7HZ TIMELESS AE L.csv, 7HZ TIMELESS AE R.csv,...",[7Hz Timeless L.csv],"[7Hz Timeless L.csv, 7Hz Timeless R.csv, 7Hz T...","[7Hz Timeless AE R.csv, 7Hz Timeless (no filte...","[7hz Timeless L.csv, 7hz Timeless R.csv, 7hz T...",...,[],[],[],[],[],[],[],[],[],[]
7Hz Zero L.csv,[7Hz Zero L.csv],"[7Hz Zero L.csv, 7Hz Zero R.csv]","[7Hz Zero L.csv, 7Hz Zero R.csv]","[7hz Zero (EJ07 FOAM Eartips) L.csv, 7hz Zero ...","[7Hz Zero L.csv, 7Hz Zero R.csv]","[7HZ SALNOTES ZERO L.csv, 7HZ SALNOTES ZERO R....",[7Hz Salnotes Zero L.csv],"[7Hz Salnotes Zero L.csv, 7Hz Salnotes Zero R....",[7hz Zero R.csv],"[7hz Zero R.csv, 7hz Zero L.csv, 7hz Zero Tape...",...,"[zero L.csv, zero R.csv]",[],[],[],[],[],[],[],[],[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"710 Btnc (Active, ANC off) L.csv",[],[],[],[],[],[],[],[],[],[],...,[],[],[],[],[],[],[],[],"[710 Btnc (Active, ANC off) L.csv, 710 Btnc (A...",[]
Elegia (Stock Pads) L.csv,[],[],[],[],[],[],[],[],[],[],...,[],[],[],[],[],[],[],[],[Elegia (Stock Pads) L.csv],[]
Elex (Dekoni Elite Fenestrated Sheepskin Pads) L.csv,[],[],[],[],[],[],[],[],[],[],...,[],[],[],[],[],[],[],[],[Elex (Dekoni Elite Fenestrated Sheepskin Pads...,[]
Ola L.csv,[],[],[],[],[],[],[],[],[],[],...,[],[],[],[],[],[],[],[],[],[Ola Stock L.csv]


### Average

In [18]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d
from scipy.signal import savgol_filter

In [19]:
def read_and_interpolate(file_path, x_values):
    df = pd.read_csv(file_path)

    # Check if 'Y' column is present in the DataFrame
    if 'Y' not in df.columns:
        raise KeyError("Missing 'Y' column in the DataFrame")

    # Group data points by the same X-value and take the mean of their Y-values
    df = df.groupby('X', as_index=False).mean(numeric_only=True)

    min_x = df['X'].min()
    max_x = df['X'].max()
    x_values_interp = x_values[(x_values >= min_x) & (x_values <= max_x)]
    f = interp1d(df['X'], df['Y'], kind='linear', fill_value='extrapolate')
    y_values = np.empty_like(x_values)
    y_values[:] = np.nan
    y_values[(x_values >= min_x) & (x_values <= max_x)] = f(x_values_interp)
    return y_values

In [20]:
def generate_x_values():
    x_values = np.logspace(np.log10(20), np.log10(20000), num=1000)
    return np.concatenate(([0], x_values))

In [21]:
def calculate_average(file_paths, output_path):
    x_values = generate_x_values()

In [22]:
def find_mean_in_range(data, x_min, x_max):
    values = [df.loc[(df['X'] >= x_min) & (df['X'] <= x_max), 'Y'].dropna() for df in data]
    non_empty_values = [v for v in values if len(v) > 0]

    if not non_empty_values:
        return np.nan

    combined_values = np.concatenate(non_empty_values)
    return np.nanmean(combined_values)

In [23]:
def calculate_average(file_paths, output_path):
    x_values = generate_x_values()

    # Read and interpolate all .csv files
    data = []
    for file_path in file_paths:
        try:
            interpolated_data = read_and_interpolate(file_path, x_values)
            # Create a DataFrame with the interpolated data
            interpolated_df = pd.DataFrame({'X': x_values, 'Y': interpolated_data})
            data.append(interpolated_df)
        except KeyError as e:
            print(f"KeyError in {file_path}: {e}. Skipping this file.")

    average = []
    for x_min, x_max in zip(x_values[:-1], x_values[1:]):
        mean = find_mean_in_range(data, x_min, x_max)
        average.append(mean)

    # Save average to .csv
    df = pd.DataFrame({'X': x_values[:-1], 'Y': average})
    df.to_csv(output_path, index=False)

    return x_values, average, data

In [24]:
def calculate_iem_average(iem, reviewers_files, output_directory):
    #print(f"Reviewers_files for {iem}:\n{reviewers_files}\n")  # Add this line for debugging
    
    file_paths = []
    for reviewer in reviewers_files.index:
        files = reviewers_files[reviewer]
        if files:
            for file in files:  # Add this loop to iterate over the file paths in each cell
                file_path = os.path.join(output_directory, '..', 'csv', reviewer, file).replace('\\', '/')
                file_paths.append(file_path)

    output_path = os.path.join(output_directory, f"Average_{iem}.csv")

    x_values, average, data = calculate_average(file_paths, output_path)
    return x_values, average, data

In [25]:
# Create a new DataFrame to store the average measurements
avg_df = pd.DataFrame(columns=['IEM', 'Average Measurement', 'Reviewers'])

In [26]:
# Calculate the average for each IEM
output_directory = 'output/averages'
os.makedirs(output_directory, exist_ok=True)

In [27]:
for iem, reviewers_files in iem_df.iterrows():
    x_values, average, data = calculate_iem_average(iem, reviewers_files, output_directory)

    avg_df = pd.concat([avg_df, pd.DataFrame([{
        'IEM': iem,
        'Average Measurement': average,
        'Reviewers': {reviewer: files for reviewer, files in reviewers_files.items() if files}
    }], index=[0])], ignore_index=True)


KeyError in output/averages/../csv/hbb/HBB 1 R.csv: 'Y'. Skipping this file.
KeyError in output/averages/../csv/vsg/Ugreen HiTune T3 L.csv: 'Y'. Skipping this file.


In [28]:
avg_df

Unnamed: 0,IEM,Average Measurement,Reviewers
0,7Hz Dioko L.csv,"[87.02973913043476, 87.02921538582903, 87.0281...","{'achoreviews': ['7Hz Dioko L.csv'], 'bedrock'..."
1,7Hz Eternal L.csv,"[81.9075, 81.90825430746554, 81.90662147303475...","{'achoreviews': ['7Hz Eternal L.csv'], 'bedroc..."
2,7Hz Legato L.csv,"[86.40225, 86.40302850929731, 86.4045909296810...","{'achoreviews': ['7Hz Legato L.csv'], 'timmyv'..."
3,7Hz Timeless L.csv,"[79.6973817751938, 79.70461227888228, 79.71955...","{'achoreviews': ['7Hz Timeless L.csv'], 'timmy..."
4,7Hz Zero L.csv,"[85.48116781818182, 85.48417048438543, 85.4908...","{'achoreviews': ['7Hz Zero L.csv'], 'bedrock':..."
...,...,...,...
2351,"710 Btnc (Active, ANC off) L.csv","[73.60333333333334, 73.6313962428586, 73.68771...","{'wdym': ['710 Btnc (Active, ANC off) L.csv', ..."
2352,Elegia (Stock Pads) L.csv,"[78.959, 78.97773430498528, 79.01533290539408,...",{'wdym': ['Elegia (Stock Pads) L.csv']}
2353,Elex (Dekoni Elite Fenestrated Sheepskin Pads)...,"[70.5, 70.53076126621039, 70.59249723972114, 7...",{'wdym': ['Elex (Dekoni Elite Fenestrated Shee...
2354,Ola L.csv,"[74.668, 74.6497282095405, 74.61305784740108, ...",{'akros': ['Ola Stock L.csv']}


In [55]:
print(avg_df['Average Measurement'].to_string(index=False))

[87.02973913043476, 87.02921538582903, 87.02816...
[81.9075, 81.90825430746554, 81.90662147303475,...
[86.40225, 86.40302850929731, 86.40459092968104...
[79.6973817751938, 79.70461227888228, 79.719559...
[85.48116781818182, 85.48417048438543, 85.49084...
[72.25953534185213, 72.25885762255763, 72.25001...
[nan, nan, nan, 83.5085502104203, 83.5130367729...
[58.4, 58.4, 58.4, 80.70259167489402, 83.499705...
[86.1115, 86.11045920180928, 86.10837038371271,...
[83.85133333333333, 83.86167493491506, 83.88242...
[nan, nan, nan, 84.34163338652131, 84.346870386...
[91.011156125, 91.01335472444474, 91.0177671786...
[nan, nan, nan, 94.52515283092448, 94.529403258...
[nan, nan, nan, 89.13693448739512, 89.142837859...
[97.723, 97.72508159638144, 97.72925923257456, ...
[90.91787396124032, 90.92063722965611, 90.92364...
[85.82717583333333, 85.83905635194013, 85.86289...
[98.23881449999999, 98.25270675622822, 98.28058...
[92.48740000000001, 92.50738332526204, 92.54748...
[93.77799999999999, 93.78373595

### Target Adherence

In [46]:
import pandas as pd
import numpy as np
from scipy.interpolate import interp1d

In [47]:
def generate_x_values():
    return np.logspace(np.log10(20), np.log10(20000), num=2000)

In [48]:
def read_and_interpolate(file_path, x_values):
    df = pd.read_csv(file_path)
    min_x = df['X'].min()
    max_x = df['X'].max()
    x_values_interp = x_values[(x_values >= min_x) & (x_values <= max_x)]
    f = interp1d(df['X'], pd.to_numeric(df['Y'], errors='coerce'), kind='linear', fill_value='extrapolate')
    y_values = np.empty_like(x_values)
    y_values[:] = np.nan
    y_values[(x_values >= min_x) & (x_values <= max_x)] = f(x_values_interp)
    return y_values

In [49]:
def find_mean_in_range(data, x_min, x_max, x_values):
    indices = (x_values >= x_min) & (x_values <= x_max)
    return np.mean(data[indices])

In [50]:
def target_adherence(average_measurement, target_csv, x_values):
    target_data = read_and_interpolate(target_csv, x_values)

    alignment_freq_range = (200, 10000)  # frequency range for alignment

    input_mean = find_mean_in_range(average_measurement, *alignment_freq_range, x_values)
    target_mean = find_mean_in_range(target_data, *alignment_freq_range, x_values)

    shift = target_mean - input_mean
    average_measurement += shift

    differences = np.abs(target_data - average_measurement)
    
    if np.any(np.isfinite(differences)):
        mean_difference = np.nanmean(differences)
    else:
        mean_difference = 0  # or np.nan if you prefer

    y_range = np.nanmax(target_data) - np.nanmin(target_data)

    adherence = 100 * (1 - mean_difference / y_range)

    return adherence

In [53]:
def calculate_preference(row, target_csv, x_values):
    average_measurement_file = row['Average Measurement']
    average_measurement = read_and_interpolate(average_measurement_file, x_values)
    return target_adherence(average_measurement, target_csv, x_values)


In [54]:
# Replace this with the path to your target curve CSV file
target_csv = 'Objectively Neutral IE Target.csv'

# Reuse the x_values generated earlier
x_values = generate_x_values()

# Calculate the Preference % for each IEM
avg_df['Preference %'] = avg_df.apply(calculate_preference, axis=1, args=(target_csv, x_values))

ValueError: Invalid file path or buffer object type: <class 'list'>