In [4]:
import pandas as pd

# Load datasets
female = pd.read_csv('./data/female.csv')
male = pd.read_csv('./data/male.csv')

print(f'For women we have (rows, columns) {female.shape}')
print(f'For men we have (rows, columns) {male.shape}')

# Define a function to compute percentile ranges
def compute_percentile_ranges(column):
    ranges = [(0, 5), (5, 25), (25, 50), (50, 75), (75, 90), (90, 97), (97, 100)]
    percentiles = {(low, high): (column.quantile(low / 100), column.quantile(high / 100)) for low, high in ranges}
    counts = {}
    for r, (low, high) in percentiles.items():
        counts[r] = ((column >= low) & (column < high)).sum()
    return counts

# Compute percentile measurements and map sizes
def compute_size_percentile_measurements(data, chest_column, shoulder_column):
    sizes = ['XS', 'S', 'M', 'L', 'XL', '2XL', '3XL']
    ranges = [0, 5, 25, 50, 75, 90, 97, 100]

    chest_percentiles = {p: data[chest_column].quantile(p / 100) for p in ranges}
    shoulder_percentiles = {p: data[shoulder_column].quantile(p / 100) for p in ranges}

    size_mappings = {}
    for i, size in enumerate(sizes):
        size_mappings[size] = {
            'Chest': (chest_percentiles[ranges[i]], chest_percentiles[ranges[i + 1]]),
            'Shoulder': (shoulder_percentiles[ranges[i]], shoulder_percentiles[ranges[i + 1]])
        }
    return size_mappings

# Analyze matches and ties
def analyze_matches_and_ties(data, size_chart, chest_column, shoulder_column):
    matches = 0
    ties = 0
    results = []

    for idx, person in data.iterrows():  # Use the row index as the identifier
        person_matches = []
        for size, ranges in size_chart.items():
            chest_range = ranges['Chest']
            shoulder_range = ranges['Shoulder']

            if (chest_range[0] <= person[chest_column] < chest_range[1] and
                shoulder_range[0] <= person[shoulder_column] < shoulder_range[1]):
                person_matches.append(size)

        if len(person_matches) == 1:
            matches += 1
            results.append({'person': idx, 'size': person_matches[0], 'type': 'match'})  # Use idx
        elif len(person_matches) > 1:
            ties += 1
            chosen_size = max(person_matches)  # Choose the larger size in case of a tie
            results.append({'person': idx, 'size': chosen_size, 'type': 'tie'})  # Use idx

    return {
        'matches': matches,
        'ties': ties,
        'results': results
    }

# Example: Compute size mappings and analyze matches
female_size_chart = compute_size_percentile_measurements(female, 'chestcircumference', 'biacromialbreadth')
result = analyze_matches_and_ties(female, female_size_chart, 'chestcircumference', 'biacromialbreadth')

print(f"Matches: {result['matches']}, Ties: {result['ties']}")
print(result['results'][:105])


For women we have (rows, columns) (1986, 108)
For men we have (rows, columns) (4082, 108)
Matches: 1, Ties: 0
[{'person': 0, 'size': 'M', 'type': 'match'}]


In [5]:
import pandas as pd

# Load datasets
female = pd.read_csv('./data/female.csv')
male = pd.read_csv('./data/male.csv')

print(f'For women we have (rows, columns) {female.shape}')
print(f'For men we have (rows, columns) {male.shape}')

# Define a function to compute percentile ranges
def compute_percentile_ranges(column):
    ranges = [(0, 5), (5, 25), (25, 50), (50, 75), (75, 90), (90, 97), (97, 100)]
    percentiles = {(low, high): (column.quantile(low / 100), column.quantile(high / 100)) for low, high in ranges}
    counts = {}
    for r, (low, high) in percentiles.items():
        counts[r] = ((column >= low) & (column < high)).sum()
    return counts

# Compute percentile measurements and map sizes
def compute_size_percentile_measurements(data, chest_column, shoulder_column):
    sizes = ['XS', 'S', 'M', 'L', 'XL', '2XL', '3XL']
    ranges = [0, 5, 25, 50, 75, 90, 97, 100]

    chest_percentiles = {p: data[chest_column].quantile(p / 100) for p in ranges}
    shoulder_percentiles = {p: data[shoulder_column].quantile(p / 100) for p in ranges}

    size_mappings = {}
    for i, size in enumerate(sizes):
        size_mappings[size] = {
            'Chest': (chest_percentiles[ranges[i]], chest_percentiles[ranges[i + 1]]),
            'Shoulder': (shoulder_percentiles[ranges[i]], shoulder_percentiles[ranges[i + 1]])
        }
    return size_mappings

# Analyze matches and ties
def analyze_matches_and_ties(data, size_chart, chest_column, shoulder_column):
    matches = 0
    ties = 0
    results = []

    for idx, person in data.iterrows():
        person_matches = []

        for size, ranges in size_chart.items():
            chest_range = ranges['Chest']
            shoulder_range = ranges['Shoulder']

            if (chest_range[0] <= person[chest_column] < chest_range[1] and
                shoulder_range[0] <= person[shoulder_column] < shoulder_range[1]):
                person_matches.append(size)

        if len(person_matches) == 1:
            matches += 1
            results.append({'person': idx, 'size': person_matches[0], 'type': 'match'})
        elif len(person_matches) > 1:
            ties += 1
            chosen_size = max(person_matches)  # Assuming sizes are sortable
            results.append({'person': idx, 'size': chosen_size, 'type': 'tie'})

    return {
        'matches': matches,
        'ties': ties,
        'results': results
    }

 
female_size_chart = compute_size_percentile_measurements(female, 'chestcircumference', 'biacromialbreadth')
male_size_chart = compute_size_percentile_measurements(male, 'chestcircumference', 'biacromialbreadth')

female_results = analyze_matches_and_ties(female, female_size_chart, 'chestcircumference', 'biacromialbreadth')
male_results = analyze_matches_and_ties(male, male_size_chart, 'chestcircumference', 'biacromialbreadth')

print(f"Female Matches: {female_results['matches']}, Female Ties: {female_results['ties']}")
print(f"Male Matches: {male_results['matches']}, Male Ties: {male_results['ties']}")

print(female_results['results'][:5])
print(male_results['results'][:5])


For women we have (rows, columns) (1986, 108)
For men we have (rows, columns) (4082, 108)
Female Matches: 1, Female Ties: 0
Male Matches: 1180, Male Ties: 0
[{'person': 0, 'size': 'M', 'type': 'match'}]
[{'person': 7, 'size': 'L', 'type': 'match'}, {'person': 21, 'size': 'S', 'type': 'match'}, {'person': 27, 'size': 'S', 'type': 'match'}, {'person': 32, 'size': 'S', 'type': 'match'}, {'person': 37, 'size': '2XL', 'type': 'match'}]
