# Predicting T-shirt size using the ANSUR II dataset
We will here try to predict a persons t-shirt size given the weight and height of the person. We will use the ANSUR II dataset which contains a lot of information about the physical attributes of a large number of people. 

We will first try to map the persons in the dataset to a t-shirt size. It is hard to find a concise size chart for t-shirt so we will create our own, initial chart, based on these assumptions:

We will only look at two measurements, Shoulder Width and Chest Circumference.

Our first problem is that Shoulder Width is not one of the measurements taken in the dataset. But we have Biacromial Breadth which is the distance between the two acromion processes. We will assume that this is the same as Shoulder Width.

We will then have these initial rules:

| Size | Percentile |
|------|------------|
| XS   | 0-5        |
| S    | 5-25       |
| M    | 25-50      |
| L    | 50-75      |
| XL   | 75-90      |
| XXL  | 90-97      |
| XXXL | 97-100     |

In [28]:
import pandas as pd
female = pd.read_csv('./Data/female.csv' )
male = pd.read_csv('./Data/male.csv' )
print(f'The female shirts size data {female.shape}')
print(f'The male shirts size data {male.shape}')

The female shirts size data (1986, 108)
The male shirts size data (4082, 108)


In [30]:
def compute_percentile_ranges(column):
    # Define percentile ranges
    ranges = [(0, 5), (5, 25), (25, 50), (50, 75), (75, 90), (90, 97), (97, 100)]

    percentiles = {(low, high): (column.quantile(low/100), column.quantile(high/100)) for low, high in ranges}

    counts = {}

    for r, (low, high) in percentiles.items():
        counts[r] = ((column >= low) & (column < high)).sum()
    
    return counts

print(compute_percentile_ranges(female['chestcircumference']))
print(compute_percentile_ranges(female['biacromialbreadth']))

print(compute_percentile_ranges(male['chestcircumference']))
print(compute_percentile_ranges(male['biacromialbreadth']))



{(0, 5): np.int64(1), (5, 25): np.int64(1), (25, 50): np.int64(1), (50, 75): np.int64(1), (75, 90): np.int64(1), (90, 97): np.int64(0), (97, 100): np.int64(0)}
{(0, 5): np.int64(1), (5, 25): np.int64(1), (25, 50): np.int64(1), (50, 75): np.int64(1), (75, 90): np.int64(1), (90, 97): np.int64(0), (97, 100): np.int64(0)}
{(0, 5): np.int64(199), (5, 25): np.int64(810), (25, 50): np.int64(1025), (50, 75): np.int64(1012), (75, 90): np.int64(616), (90, 97): np.int64(295), (97, 100): np.int64(124)}
{(0, 5): np.int64(191), (5, 25): np.int64(787), (25, 50): np.int64(989), (50, 75): np.int64(1079), (75, 90): np.int64(610), (90, 97): np.int64(303), (97, 100): np.int64(122)}


In [34]:
def compute_size_percentile_mesurments(data, chest_column, shoulder_column):
    sizes = ['XS', 'S', 'M', 'L', 'XL', '2XL', '3XL']
    ranges = [0, 5, 25, 50, 75, 90, 97]

    # Compute the values for each percentile for chest and shoulder
    chest_percentiles = {p: data[chest_column].quantile(p/100) for p in ranges}
    shoulder_percentiles = {p: data[shoulder_column].quantile(p/100) for p in ranges}

    # Map the t-shirt sizes to the corresponding chest and shoulder measurments
    size_mappings = {}
    for i, size in enumerate(sizes):
        size_mappings[size] = {
            'Chest': int(chest_percentiles[ranges[i]]),
            'Shoulder': int(shoulder_percentiles[ranges[i]])
        }
    
    return size_mappings
    


print(compute_size_percentile_mesurments(female, 'chestcircumference', 'biacromialbreadth'))
print(compute_size_percentile_mesurments(male, 'chestcircumference', 'biacromialbreadth'))

 

{'XS': {'Chest': 839, 'Shoulder': 356}, 'S': {'Chest': 847, 'Shoulder': 360}, 'M': {'Chest': 886, 'Shoulder': 372}, 'L': {'Chest': 965, 'Shoulder': 375}, 'XL': {'Chest': 1025, 'Shoulder': 382}, '2XL': {'Chest': 1060, 'Shoulder': 390}, '3XL': {'Chest': 1080, 'Shoulder': 395}}
{'XS': {'Chest': 774, 'Shoulder': 337}, 'S': {'Chest': 922, 'Shoulder': 384}, 'M': {'Chest': 996, 'Shoulder': 403}, 'L': {'Chest': 1056, 'Shoulder': 415}, 'XL': {'Chest': 1117, 'Shoulder': 428}, '2XL': {'Chest': 1172, 'Shoulder': 441}, '3XL': {'Chest': 1233, 'Shoulder': 452}}


In [35]:
female_sizes={
  'XS': {'Chest': 839, 'Shoulder': 356}, 
  'S': {'Chest': 847, 'Shoulder': 360}, 
  'M': {'Chest': 886, 'Shoulder': 372},
  'L': {'Chest': 965, 'Shoulder': 375}, 
  'XL': {'Chest': 1025, 'Shoulder': 382}, 
  '2XL': {'Chest': 1060, 'Shoulder': 390},
  '3XL': {'Chest': 1080, 'Shoulder': 395}
  }

In [36]:
def get_size_category(value, size_chart):
    """
    Determines the size category for a given measurement based on a size chart.
    
    Args:
        value (float): The measurement value (e.g., chest circumference or shoulder breadth).
        size_chart (dict): A dictionary with size categories as keys and value ranges as values.

    Returns:
        str: The size category (e.g., 'S', 'M', 'L').
    """
    for size, (lower, upper) in size_chart.items():
        if lower <= value <= upper:
            return size
    return None

def count_matches_and_conflicts(data, size_chart):
    """
    Counts individuals with matching and conflicting size categories based on the dataset and size chart.

    Args:
        data (list of dict): List of individuals with measurements (e.g., [{'chest': 90, 'shoulder': 45}, ...]).
        size_chart (dict): Dictionary specifying size limits for chest and shoulder.

    Returns:
        dict: Counts of matches and conflicts.
    """
    matches = 0
    conflicts = 0

    for person in data:
        chest_size = get_size_category(person['chest'], size_chart['chest'])
        shoulder_size = get_size_category(person['shoulder'], size_chart['shoulder'])

        if chest_size and shoulder_size:
            if chest_size == shoulder_size:
                matches += 1
            else:
                conflicts += 1

    return {'matches': matches, 'conflicts': conflicts}

# Example size charts for males and females
male_size_chart = {
    'chest': {
        'S': (85, 95),
        'M': (96, 105),
        'L': (106, 115),
        'XL': (116, 125)
    },
    'shoulder': {
        'S': (40, 45),
        'M': (46, 50),
        'L': (51, 55),
        'XL': (56, 60)
    }
}

female_size_chart = {
    'chest': {
        'S': (80, 90),
        'M': (91, 100),
        'L': (101, 110),
        'XL': (111, 120)
    },
    'shoulder': {
        'S': (35, 40),
        'M': (41, 45),
        'L': (46, 50),
        'XL': (51, 55)
    }
}

# Example datasets for males and females
male_data = [
    {'chest': 92, 'shoulder': 44},
    {'chest': 100, 'shoulder': 48},
    {'chest': 110, 'shoulder': 52},
    {'chest': 120, 'shoulder': 58}
]

female_data = [
    {'chest': 85, 'shoulder': 38},
    {'chest': 95, 'shoulder': 42},
    {'chest': 105, 'shoulder': 47},
    {'chest': 115, 'shoulder': 53}
]

# Testing the function
male_results = count_matches_and_conflicts(male_data, male_size_chart)
female_results = count_matches_and_conflicts(female_data, female_size_chart)

print("Male Results:", male_results)
print("Female Results:", female_results)


Male Results: {'matches': 4, 'conflicts': 0}
Female Results: {'matches': 4, 'conflicts': 0}


In [40]:
def analyze_matches_and_ties(size_chart, measurements):
    """
    Analyze the number of matches and ties based on two measurements.

    Args:
        size_chart (list of dict): The size chart, where each size has min/max values for two measurements.
                                   Example: [{'size': 'S', 'min_height': 150, 'max_height': 160,
                                              'min_chest': 80, 'max_chest': 90}, ...]
        measurements (list of dict): List of individual measurements.
                                      Example: [{'person': 'A', 'height': 155, 'chest': 85}, ...]

    Returns:
        dict: A dictionary with counts of matches and ties, plus individual results.
    """
    matches = 0
    ties = 0
    results = []

    for person in measurements:
        person_matches = []
        for size in size_chart:
            # Check if the person's measurements fit the size
            if (size['min_height'] <= person['height'] <= size['max_height'] and
                size['min_chest'] <= person['chest'] <= size['max_chest']):
                person_matches.append(size['size'])
        
        if len(person_matches) == 1:
            matches += 1
            results.append({'person': person['person'], 'size': person_matches[0], 'type': 'match'})
        elif len(person_matches) > 1:
            ties += 1
            # Handle ties: choose the larger size if sizes are adjacent
            chosen_size = max(person_matches)  # Assuming sizes are sortable
            results.append({'person': person['person'], 'size': chosen_size, 'type': 'tie'})
    
    return {
        'matches': matches,
        'ties': ties,
        'results': results
    }
 


In [42]:
size_chart = [
    {'size': 'S', 'min_height': 150, 'max_height': 160, 'min_chest': 80, 'max_chest': 90},
    {'size': 'M', 'min_height': 161, 'max_height': 170, 'min_chest': 91, 'max_chest': 100},
    {'size': 'L', 'min_height': 171, 'max_height': 180, 'min_chest': 101, 'max_chest': 110},
]

measurements = [
    {'person': 'A', 'height': 155, 'chest': 85},
    {'person': 'B', 'height': 165, 'chest': 95},
    {'person': 'C', 'height': 175, 'chest': 105},
    {'person': 'D', 'height': 160, 'chest': 85},
]
