In [None]:
import pandas as pd
import re
import unicodedata

In [None]:
def clean_str(s: str) -> str:
    s = unicodedata.normalize("NFKC", str(s))
    for bad, good in {
        "Â":"", "â€™":"'", "\u2019":"'", "\u2018":"'",
        "\u00A0":" ", "\ufeff":"", "\u2026":"..."
    }.items():
        s = s.replace(bad, good)
    return s.strip()

def normalize_col(col: str) -> str:
    s = clean_str(col)
    s = re.sub(r"[^\w\s]", "", s)
    return s.strip().lower().replace(" ", "_")

def extract_qnum(col: str) -> int:
    m = re.match(r"^(\d+)_", col)
    return int(m.group(1)) if m else 0

def recode_cell(cell, mapping: dict) -> str:
    if pd.isna(cell):
        return cell
    parts = [p.strip() for p in str(cell).split(";") if p.strip()]
    out = [mapping.get(p, p) for p in parts]
    seen = set(); deduped = []
    for v in out:
        if v not in seen:
            seen.add(v); deduped.append(v)
    return "; ".join(deduped)

In [None]:
#load & clean raw data
raw = pd.read_excel("2024 ALS Totals for SWB.xlsx", sheet_name="Table")
for c in raw.select_dtypes(include="object"):
    raw[c] = raw[c].apply(lambda x: clean_str(x) if isinstance(x, str) else x)
raw.columns = [normalize_col(c) for c in raw.columns]

In [None]:
#groupings by question number (Q2–Q62)
grouping_map_by_q = {
    2: {
        "Election Administration Reforms":"Voter Services",
        "Ranked Choice Voting":"Voter Services",
        "Restoration of Voting Rights":"Voter Services",
        "Voter Access & Participation (Voter Registration, Voter Education including VOTE411, Candidate Forums, Get Out The Vote)":"Voter Services",
        "Civic Education":"Voter Education",
        "Combatting Mis- and Disinformation":"Voter Education",
        "Education":"Voter Education",
        "Leadership Development":"Voter Education",
        "Abolishing the Electoral College through National Popular Vote or constitutional amendment":"Voter Registration",
        "Collaboration with other nonpartisan organizations":"Voter Mobilization",
        "Membership Recruitment":"Voter Mobilization",
        "Voter Protection (Litigation, Redistricting Work, Legislative Actions)":"Voter Protection",
        "Government Transparency":"Voter Protection",
        "Redistricting and Apportionment":"Voter Protection",
        "Diversity, Equity & Inclusion":"Voter Education"
    },
    8: {
        "Announcements in newsletters":"Announcements and Promotions",
        "Announcements in newsletters in a language other than English":"Announcements and Promotions",
        "Promoting a partner organization":"Announcements and Promotions",
        "Sharing upcoming training opportunities":"Announcements and Promotions",
        "Social media posting":"Social Media and Public Relations",
        "Social media posting in a language other than English":"Social Media and Public Relations",
        "Writing letters to the editor thanking poll workers":"Social Media and Public Relations",
        "Throwing a party to thank poll workers":"Events and Engagement",
        "Working with election officials to send thank you notes to poll workers":"Events and Engagement",
        "Other":"Other",
        "Not applicable":"Not applicable"
    },
    9: {
        "General election voter guide":"Voter Guides",
        "Primary election voter guide":"Voter Guides",
        "Local voter guide on VOTE411":"Voter Guides",
        "Local voter guide NOT on VOTE411":"Voter Guides",
        "State voter guide on VOTE411":"Voter Guides",
        "State voter guide NOT on VOTE411":"Voter Guides",
        "Ballot question information guide":"Ballot Information",
        "Other":"Other",
        "Not applicable":"Not applicable"
    },
    11: {
        "Collected paper voter registration applications at in-person events":"Voter Registration Activities",
        "Directed voters to the voter registration tool on VOTE411.org":"Voter Registration Activities",
        "Directed voters to your state’s online voter registration portal":"Voter Registration Activities",
        "Distributed voting resources":"Voter Registration Activities",
        "Employed a QR code at in-person events/locations":"Voter Registration Activities",
        "Provided partners/venues with printed materials directing voters how to register":"Voter Registration Activities",
        "Used equipment (iPads, laptops, etc.) for voters to use at your registration drives":"Voter Registration Activities",
        "Trained volunteers to register voters":"Voter Registration Activities",
        "Participation in National Voter Registration Day":"Participation in Events",
        "Participation in Vote Early Day":"Participation in Events",
        "Participation in Voter Education Week":"Participation in Events",
        "Social media outreach":"Outreach and Engagement",
        "Tabling":"Outreach and Engagement",
        "Visited to high school(s)":"Outreach and Engagement",
        "Visited college/universities (includes community colleges)":"Outreach and Engagement",
        "Visited prison(s)":"Outreach and Engagement",
        "Visited incarcerated or formerly incarcerated voters":"Outreach and Engagement",
        "Other":"Other - Please specify:"
    },
    14: {
        "Canvassing":"Outreach and Engagement",
        "Get out the vote activity":"Outreach and Engagement",
        "Phone banking":"Outreach and Engagement",
        "Text banking":"Outreach and Engagement",
        "Social media":"Outreach and Engagement",
        "Election monitoring":"Election Activities",
        "Poll observation":"Election Activities",
        "Producing & disbursing election information materials":"Election Activities",
        "Staffing a hotline":"Election Activities",
        "Email blasts":"Communications",
        "Newsletters":"Communications",
        "Other - Please specify:":"Other",
        "Not applicable - Our League did not participate in early voting, mail-in voting, or Election Day activities":"Not applicable"
    },
    15: {
        "Ballot drop boxes overflowing":"Election Day Issues",
        "Broken machines":"Election Day Issues",
        "Long lines":"Election Day Issues",
        "Running out of paper ballots":"Election Day Issues",
        "Voter intimidation at ballot drop boxes":"Election Day Issues",
        "Voter intimidation at the polls":"Election Day Issues",
        "Canvassing":"Outreach and Engagement",
        "Distribution of guides":"Outreach and Engagement",
        "Social media outreach":"Outreach and Engagement",
        "Ride to polls":"Outreach and Engagement",
        "Poll worker recruitment":"Poll Worker Activities",
        "No Challenges Occurred":"No Challenges Occurred",
        "Other":"Other"
    },
    20: {
        "Candidate schedule conflicts":"Participant Issues",
        "Candidate cancellation":"Participant Issues",
        "Candidate refusal to participate":"Participant Issues",
        "Co-sponsor/partner conflicts":"Organizational Issues",
        "Lack of attendee participation":"Organizational Issues",
        "Technical challenges (e.g. Zoom)":"Technical Issues",
        "We did not face any challenges":"No Challenges",
        "Other - Please specify:":"Other"
    },
    23: {
        "Asian communities":"Ethnic and Cultural Communities",
        "Black or African American communities":"Ethnic and Cultural Communities",
        "Hispanic or Latino/a communities":"Ethnic and Cultural Communities",
        "Indigenous communities":"Ethnic and Cultural Communities",
        "Native Hawaiian or other Pacific Islander communities":"Ethnic and Cultural Communities",
        "Community college students":"Educational Groups",
        "High school students":"Educational Groups",
        "Technical/vocational school students":"Educational Groups",
        "University/4-year college students":"Educational Groups",
        "Formerly or currently incarcerated persons":"Special Interest Groups",
        "LGBTQIA+ communities":"Special Interest Groups",
        "New citizens":"Special Interest Groups",
        "People with disabilities":"Special Interest Groups",
        "People with limited income":"Special Interest Groups",
        "People with past felony convictions":"Special Interest Groups",
        "Seniors/Older Adults":"Special Interest Groups",
        "Veterans":"Special Interest Groups"
    },
    25: {
        "Email":"Digital Communication",
        "Forums":"Digital Communication",
        "Social media direct messages":"Digital Communication",
        "Text":"Digital Communication",
        "Website":"Digital Communication",
        "In-person":"In-Person and Direct Interaction",
        "Phone":"In-Person and Direct Interaction",
        "Training":"In-Person and Direct Interaction",
        "Mail":"Printed and Physical Materials",
        "Printed materials":"Printed and Physical Materials",
        "League in Action powered by Outreach Circle":"Promotional and Outreach Activities",
        "Newsletters":"Promotional and Outreach Activities",
        "Paid advertising":"Promotional and Outreach Activities",
        "Other":"Other"
    },
    26: {
        "Advocating in front of local school boards to support civics-focused curriculum":"Advocacy and Support",
        "Advocating in front of state boards of education to support civics-focused curriculum":"Advocacy and Support",
        "Advocating in front of state legislatures to support civics-focused curriculum or funding for programs":"Advocacy and Support",
        "Supporting curriculum development":"Advocacy and Support",
        "Educating on civic engagement":"Education and Information",
        "Providing election-related information":"Education and Information",
        "Sharing VOTE411.org resources":"Education and Information",
        "Working with specific schools":"School Engagement",
        "Other":"Other",
        "Not applicable":"Not applicable"
    },
    27: {
        "Run educational sessions, like Civics 101 or other programs":"Run educational sessions",
        "Participate in an Observer Corps":"Participate in an Observer Corps",
        "Publish Know Your Community (or a similar publication)":"Publish community publication",
        "Host/cohost trainings for people interested in running for office":"Host trainings",
        "Recruit individuals for other groups' trainings about running for office":"Recruit for trainings",
        "Other - Please specify:":"Other",
        "Not applicable":"Not applicable"
    },
    28: {
        "Sharing VOTE411 (with voters, partners, media, etc.)":"Sharing VOTE411",
        "Sharing our learning from trainings":"Sharing training learnings",
        "Public messaging campaign (social media, letters to the editor, op-eds)":"Public messaging campaign",
        "Addressing the issues at public meetings":"Addressing issues publicly",
        "Debunking":"Debunking",
        "Pre-bunking":"Pre-bunking",
        "Inoculation":"Inoculation",
        "Fact-checking":"Fact-checking",
        "Building trusted messengers":"Building trusted messengers",
        "Program: Selected governmental issues chosen by members at the local, state and national levels for study and action":"Program: governmental issues",
        "Programs: Plans for speakers, discussion or other activities for League meetings":"Program meetings",
        "Projects: election observation, observer corps, hosting meetings with local election officials":"Projects: observation & meetings",
        "Other":"Other",
        "Not applicable":"Not applicable"
    },
    29: {
        "Climate change/environmental issues":"Climate change/environment",
        "Criminal justice":"Criminal justice",
        "Neighborhood policing":"Neighborhood policing",
        "Racial justice":"Racial justice",
        "Reproductive choices":"Reproductive choices",
        "Security":"Security",
        "DC statehood":"DC statehood",
        "Equal Rights Amendment (ERA) advocacy":"Equal Rights Amendment advocacy",
        "Voting rights":"Voting rights",
        "Building a community budget":"Community budgeting",
        "Other":"Other",
        "Not applicable":"Not applicable"
    },
    30: {
        "Advertising":"Advertising",
        "Pamphlets":"Pamphlets",
        "Postcards":"Postcards",
        "Public Service Announcements":"PSAs",
        "Podcasts":"Podcasts",
        "Social media content":"Social media content",
        "TV programming":"TV programming",
        "Videos":"Videos",
        "Other":"Other",
        "Not applicable":"Not applicable"
    },
    40: {
        "Civic / Voter Engagement / “Good Government” Groups":"Civic / Voter Engagement Groups",
        "Greek-Letter Organizations":"Greek-Letter Organizations",
        "Black Greek-Letter organizations (Fraternities and Sororities)":"Black Greek-Letter organizations",
        "Legal Organizations":"Legal Organizations",
        "Legal organizations on legal advocacy or litigation":"Legal Advocacy Organizations",
        "Legal organizations for purposes other than legal advocacy or litigation":"Other Legal Organizations",
        "Organizations representing particular racial/ethnic communities (Please specify which groups)":"Organizations representing specific communities",
        "Organizations representing communities who speak a particular language (Please specify the language)":"Language-based organizations",
        "Organizations representing young people":"Youth Organizations",
        "Organizations representing the disability community":"Disability Organizations",
        "PTA / school administrators / teachers / other educational groups":"Educational Groups",
        "Women’s organizations":"Women’s Organizations",
        "Other":"Other",
        "No Partner Organizations":"No Partner Organizations"
    },
    41: {
        "All Voting is Local":"Partner – All Voting is Local",
        "Black Voters Matter":"Partner – Black Voters Matter",
        "Brennan Center":"Partner – Brennan Center",
        "No partner organizations":"No Partner Organizations"
    },
    42: {
        "Number of bills/ordinances supported":"Counts: supported",
        "Number of supported bills/ordinances that passed":"Counts: passed",
        "Number of bills/ordinances opposed":"Counts: opposed",
        "Number of bills/ordinances opposed that did not pass":"Counts: opposed–failed",
        "Number of testimonies (verbal/written)":"Counts: testimonies",
        "Number of lobby corps volunteer activities":"Counts: lobby activities"
    },
    43: {
        "Electoral and Voting Reforms":"Priorities – Electoral and Voting Reforms",
        "Abolishing the Electoral College through National Popular Vote or constitutional amendment":"Priorities – National Popular Vote",
        "Campaign Finance Reform":"Priorities – Campaign Finance Reform",
        "Election Administration Reforms":"Priorities – Election Administration Reforms",
        "Improving Elections (strengthening ballot access, absentee voting, etc.)":"Priorities – Improving Elections",
        "Redistricting and Apportionment":"Priorities – Redistricting",
        "Voter Access & Participation (Voter Registration, Voter Education including VOTE411, Candidate Forums, Get Out The Vote)":"Priorities – Voter Access & Participation",
        "Voting Rights and Voter Protection":"Priorities – Voting Rights & Protection",
        "Civics Education":"Priorities – Civics Education",
        "Equal Rights Amendment (ERA) Advocacy":"Priorities – ERA Advocacy",
        "Combatting Mis- and Disinformation":"Priorities – Combatting Misinfo",
        "Social and Environmental Issues":"Priorities – Social & Environmental Issues",
        "Climate Change/Environmental Issues":"Priorities – Climate Change",
        "Gun Safety":"Priorities – Gun Safety",
        "Healthcare":"Priorities – Healthcare",
        "Immigration":"Priorities – Immigration",
        "Public Safety and Accountability (including gun safety issues)":"Priorities – Public Safety & Accountability",
        "Reproductive Choices":"Priorities – Reproductive Choices",
        "Other":"Priorities – Other"
    },
    56: {
        "$500 or less":"$500 or less",
        "$501-$1,500":"$501–$1,500",
        "$1,501-$3,000":"$1,501–$3,000",
        "$3,001-$4,500":"$3,001–$4,500",
        "Greater than $4,500":"> $4,500",
        "Dues":"Revenue – Dues",
        "Grants from LWVUS":"Revenue – LWVUS Grants",
        "External grants":"Revenue – External Grants",
        "Fundraising efforts":"Revenue – Fundraising",
        "Individual donations from your league members":"Revenue – Member Donations",
        "In kind donation (not including volunteer time)":"Revenue – In-Kind Donations"
    },
    58: {
        "Fundraising":"Role – Fundraising",
        "Administrative Support":"Role – Admin Support",
        "Communications/Marketing":"Role – Communications/Marketing",
        "Membership Coordination":"Role – Membership Coordination",
        "Advocacy/Program Coordination":"Role – Advocacy/Program Coordination"
    },
    62: {
        "Yes":"Yes",
        "No":"No"
    }
}

for col in raw.columns:
    q = extract_qnum(col)
    if q in grouping_map_by_q:
        raw[col] = raw[col].apply(lambda cell: recode_cell(cell, grouping_map_by_q[q]))


In [None]:
desc_map = {
    1:"League's greatest accomplishment in 2024",2:"League's focus areas in 2024 (multi-select)",
    3:"Number of volunteers in 2024",4:"Engagement in election-related activities",
    5:"Count of election-related activities hosted",6:"Volunteer hours invested in election activities",
    7:"LWVUS resources used for election activities",8:"Poll worker support activities",
    9:"Types of voter guides produced",10:"Number registered outside VOTE411",
    11:"Voter registration tactics used",12:"People reached with GOTV information",
    13:"Members serving as poll workers",14:"Early/mail/Election Day voting support",
    15:"Challenges voters faced on voting days",16:"Meetings with election officials",
    17:"Types of candidate debates/forums sponsored",18:"Count of debates/forums by format",
    19:"Debates/forums planned but not held",20:"Challenges in co-sponsoring debates/forums",
    21:"Count of non-election activities hosted",22:"Volunteer hours on non-election activities",
    23:"Communities engaged in 2024 (multi-select)",24:"Total voters reached overall",
    25:"Methods used to contact voters (multi-select)",26:"Civics education activities in schools",
    27:"Activities to help understand government",28:"Strategies against mis/disinformation",
    29:"Structured-dialogue topics hosted",30:"Voter education materials produced",
    31:"Media coverage earned by league",32:"Communications activities undertaken",
    33:"Social media platforms used",34:"Social media followers count",
    35:"Frequency of content updates",36:"Website features implemented",
    37:"Communications tools/resources needed",38:"Communications lead contact info",
    39:"Digital lead contact info",40:"Types of organizations partnered with",
    41:"Specific partner groups",42:"Legislative engagement in 2024",
    43:"2025 legislative priorities",44:"Actions toward UN Sustainable Development Goals",
    45:"Specific SDG actions taken",46:"Tools/resources needed for legislative work",
    47:"Comfort level with legislature engagement",48:"Engagement in federal court litigation",
    49:"Engagement in state court litigation",50:"Rating of legal representation/support",
    51:"Engagement in non-litigation legal advocacy",52:"Tools needed for accountability efforts",
    53:"Support needed for DEI lens application",54:"DEI policy adoption status",
    55:"Collection of membership demographic data",56:"Revenue stream estimates for 2024",
    57:"Other revenue streams (specify)",58:"Dedicated positions by role",
    59:"Expense areas needing funding",60:"Priority areas for growth/investment",
    61:"League's readiness for Nov 2024 election",62:"Support needed for Nov 2028 election",
    63:"Additional feedback Q63",64:"Additional feedback Q64",
    65:"Additional feedback Q65",66:"Additional feedback Q66"
}
cat_map = {
    1:"Descriptive",2:"Descriptive",3:"Descriptive",4:"Diagnostic",
    5:"Descriptive",6:"Diagnostic",7:"Descriptive",8:"Descriptive",
    9:"Descriptive",10:"Descriptive",11:"Diagnostic",12:"Descriptive",
    13:"Descriptive",14:"Diagnostic",15:"Diagnostic",16:"Diagnostic",
    17:"Descriptive",18:"Descriptive",19:"Descriptive",20:"Diagnostic",
    21:"Descriptive",22:"Descriptive",23:"Descriptive",24:"Descriptive",
    25:"Descriptive",26:"Diagnostic",27:"Diagnostic",28:"Diagnostic",
    29:"Descriptive",30:"Descriptive",31:"Descriptive",32:"Descriptive",
    33:"Descriptive",34:"Descriptive",35:"Descriptive",36:"Descriptive",
    37:"Diagnostic",38:"Descriptive",39:"Descriptive",40:"Descriptive",
    41:"Descriptive",42:"Diagnostic",43:"Descriptive",44:"Descriptive",
    45:"Descriptive",46:"Diagnostic",47:"Diagnostic",48:"Diagnostic",
    49:"Diagnostic",50:"Diagnostic",51:"Descriptive",52:"Descriptive",
    53:"Descriptive",54:"Descriptive",55:"Descriptive",56:"Descriptive",
    57:"Descriptive",58:"Descriptive",59:"Descriptive",60:"Descriptive",
    61:"Diagnostic",62:"Descriptive",63:"Open Text",64:"Open Text",
    65:"Open Text",66:"Open Text"
}
short_map = {
     1:"greatest_accomplishment",2:"focus_areas",3:"volunteer_count",
     4:"election_engagement",5:"activity_count",6:"volunteer_hours",
     7:"resources_used",8:"pollworker_support",9:"guide_types",
    10:"non_vote411_regs",11:"registration_tactics",12:"gotv_reach",
    13:"pollworker_members",14:"early_voting_support",15:"voter_challenges",
    16:"officials_meetings",17:"debate_types",18:"debate_counts",
    19:"planned_not_held",20:"debate_challenges",21:"non_election_events",
    22:"non_elect_vol_hours",23:"engaged_communities",24:"total_voters",
    25:"contact_methods",26:"civics_curriculum",27:"govt_activities",
    28:"misinfo_strategies",29:"dialogue_topics",30:"education_materials",
    31:"media_coverage",32:"comm_activities",33:"social_platforms",
    34:"follower_count",35:"update_frequency",36:"website_features",
    37:"needed_comms_tools",38:"comms_lead_info",39:"digital_lead_info",
    40:"partner_org_types",41:"partner_groups",42:"legislative_work",
    43:"future_priorities",44:"sdg_actions",45:"sdg_specific",
    46:"legislative_tools",47:"legislature_comfort",48:"federal_litigation",
    49:"state_litigation",50:"legal_support_rating",51:"nonlitigation_advocacy",
    52:"accountability_tools",53:"dei_support",54:"dei_status",
    55:"demographic_data",56:"revenue_estimates",57:"other_revenues",
    58:"staff_vol_roles",59:"expense_areas",60:"investment_priorities",
    61:"election_readiness",62:"support_for_2028",63:"feedback_open1",
    64:"feedback_open2",65:"feedback_open3",66:"feedback_open4"
}
col_map_updates = {
    "other_please_specify_52_please_indicate_what_tools_are_needed_for_accountability_efforts":"accountability_tools_other",
    "other_please_specify_52_please_indicate_what_tools_are_needed_for_accountability_efforts_2":"accountability_tools_other2",
    "number_of_supported_billsordinances_that_passed":"leg_supported_passed_count",
    "number_of_billsordinances_opposed_that_did_not_pass":"leg_opposed_failed_count",
    "number_of_lobby_corps_volunteer_activities_education_programs":"lobby_corps_activity_count"
}

vm = pd.DataFrame({'Raw Column Name': raw.columns})
vm['Qnum'] = vm['Raw Column Name'].map(extract_qnum)
vm['_dup'] = vm.groupby('Qnum').cumcount().add(1)
vm['Survey Question Number'] = vm.apply(lambda r: f"{r.Qnum}.{r._dup}" if r.Qnum>0 else "", axis=1)
vm.drop(columns='_dup', inplace=True)


In [None]:
def make_short(r):
    raw = r['Raw Column Name']
    norm = normalize_col(raw)
    # 1) First check any manual overrides:
    if norm in col_map_updates:
        return col_map_updates[norm]
    # 2) Then check your explicit short_map by question:
    q = r['Qnum']
    if q in short_map:
        return short_map[q]
    # 3) Otherwise take only the first 5 tokens of the normalized name:
    tokens = norm.split("_")
    short = "_".join(tokens[:5])
    return short
vm['Short Name'] = vm.apply(make_short, axis=1)
counts = {}; uniq=[]
for nm in vm['Short Name']:
    counts[nm]=counts.get(nm,0)+1
    uniq.append(f"{nm}_{counts[nm]}" if counts[nm]>1 else nm)
vm['Short Name'] = uniq

vm['Description'] = vm['Qnum'].map(desc_map).fillna("")
vm['Analytic Category'] = vm['Qnum'].map(cat_map).fillna("")
vm['Response Type'] = vm['Raw Column Name'].map(
    lambda c: "Checkbox" if raw[c].dropna().astype(str).str.contains(";").any()
    else ("Numeric" if raw[c].dtype.kind in ('i','f') else "Text"))
vm['Missing (%)'] = vm['Raw Column Name'].map(lambda c: f"{int(round(raw[c].isnull().mean()*100))}%")

vm[[
    'Raw Column Name','Survey Question Number','Description',
    'Response Type','Analytic Category','Missing (%)','Short Name'
]].to_csv("variable_master.csv", index=False)


In [None]:
df = raw.rename(columns=dict(zip(vm['Raw Column Name'], vm['Short Name'])))
df.to_csv("lwv_cleaned_final.csv", index=False)


In [None]:
print("✅ variable_master.csv and lwv_cleaned_final.csv created.")