In [1]:
import pandas as pd

# Load the CSV file into a pandas DataFrame
df = pd.read_csv("Results314Edited.csv")
df = df.drop([0, 1])

# Reset the index to start from 0
df = df.reset_index(drop=True)

# Function to calculate the allowed birth years for the age range
def calculate_allowed_birth_years(age_range):
    # Split the age range by '-'
    if age_range == "75 years or older":
        start_age, end_age = 75, 90
    else:
        start_age, end_age = map(int, age_range.split('-'))

    # Calculate the allowed birth years (assuming the current year is 2023)
    allowed_birth_years = list(range(2023 - end_age, 2023 - start_age + 1))
    
    # Add one less than the smallest allowed birth year
    allowed_birth_years.append(allowed_birth_years[0] - 1)
    
    return allowed_birth_years

# Create a dictionary to store allowed birth years for each age range
allowed_birth_years_dict = {}

# Iterate through the DataFrame and calculate allowed birth years
for index, row in df.iterrows():
    age_range = row["Q1"]
    allowed_birth_years = calculate_allowed_birth_years(age_range)
    
    # Store the allowed birth years in the dictionary
    allowed_birth_years_dict[age_range] = allowed_birth_years

# Initialize counters for matches, non-matches, and dictionaries to store them
matches = 0
non_matches = 0
matches_dict = {}
non_matches_dict = {}

# Iterate through the DataFrame and check if birth year matches the allowed years for age range
for index, row in df.iterrows():
    age_range = row["Q1"]
    birth_year = int(row["Q4"])
    
    allowed_birth_years = allowed_birth_years_dict.get(age_range, [])
    
    if birth_year in allowed_birth_years:
        matches += 1
        matches_dict[index + 2] = f"Birth year ({birth_year}) matches the expected year for age range {age_range}."
    else:
        non_matches += 1
        closest_year = min(allowed_birth_years, key=lambda x: abs(x - birth_year))
        year_difference = abs(birth_year - closest_year)
        non_matches_dict[index + 2] = f"Birth year ({birth_year}) does not match the expected year for age range {age_range}. Off by {year_difference} years."

# Print matches and non-matches and their counts
for index, message in matches_dict.items():
    print(f"Match - Row {index}: {message}")

for index, message in non_matches_dict.items():
    print(f"Non-Match - Row {index}: {message}")

print(f"Total Matches: {matches}")
print(f"Total Non-Matches: {non_matches}")


Match - Row 2: Birth year (1988) matches the expected year for age range 25-34.
Match - Row 3: Birth year (1960) matches the expected year for age range 55-64.
Match - Row 4: Birth year (1990) matches the expected year for age range 25-34.
Match - Row 5: Birth year (1995) matches the expected year for age range 25-34.
Match - Row 6: Birth year (2001) matches the expected year for age range 18-24.
Match - Row 7: Birth year (1985) matches the expected year for age range 35-44.
Match - Row 8: Birth year (1992) matches the expected year for age range 25-34.
Match - Row 9: Birth year (1978) matches the expected year for age range 35-44.
Match - Row 10: Birth year (1989) matches the expected year for age range 25-34.
Match - Row 11: Birth year (1993) matches the expected year for age range 25-34.
Match - Row 12: Birth year (1975) matches the expected year for age range 45-54.
Match - Row 13: Birth year (1994) matches the expected year for age range 25-34.
Match - Row 14: Birth year (1997) ma