In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import random

# Define a broader range of complaint categories and corresponding government departments
categories = {
    'Wage Issues': ['Minimum Wages Act violations', 'Delayed salary payments', 'Unauthorized deductions', 
                   'Overtime payment issues', 'Equal pay for equal work violations', 'Bonus payment issues',
                   'Gratuity payment problems', 'PF deduction issues', 'ESI contribution problems', 
                   'Contract wages dispute', 'Commission payment delays', 'Incorrect salary calculation',
                   'Salary below industry standards', 'Unpaid leave deductions', 'Illegal fine impositions'],
    
    'Discrimination': ['Gender discrimination', 'Caste-based discrimination', 'Religious discrimination',
                      'Disability discrimination', 'Age discrimination', 'Pregnancy discrimination',
                      'Sexual harassment', 'Promotion bias', 'Hiring bias', 'Regional bias',
                      'Language discrimination', 'Marital status discrimination', 'Political affiliation bias',
                      'LGBTQ+ discrimination', 'Appearance-based discrimination'],
    
    'Unsafe Working Conditions': ['Lack of safety equipment', 'Hazardous materials exposure',
                                 'Inadequate fire safety', 'Building structural issues', 'Machine safety concerns',
                                 'Excessive work hours', 'No proper ventilation', 'Unhygienic facilities',
                                 'No access to clean water', 'Noise pollution', 'Radiation exposure',
                                 'Improper lighting', 'Ergonomic hazards', 'Extreme temperature conditions',
                                 'Chemical spills', 'No emergency protocols'],
                                 
    'Labor Rights Violations': ['Contract labor issues', 'Termination without notice', 'Forced resignation',
                               'No appointment letter', 'Child labor', 'Denial of maternity benefits',
                               'Denial of leave', 'Union formation prevention', 'Collective bargaining issues',
                               'Forced overtime', 'No service benefits', 'Illegal termination',
                               'Identity document confiscation', 'Apprentice exploitation', 'Denial of statutory holidays'],
                               
    'Workplace Harassment': ['Verbal abuse', 'Physical intimidation', 'Cyberbullying at workplace',
                            'Hostile work environment', 'Retaliation for complaints', 'Mental harassment',
                            'Character defamation', 'Privacy violations', 'Excessive monitoring',
                            'Public humiliation', 'Exclusion from meetings', 'Unreasonable deadlines',
                            'Forced personal favors', 'Stalking', 'Threatening behavior'],
                            
    'Social Security Concerns': ['PF withdrawal issues', 'Pension payment delays', 'ESIC benefit denial',
                                'Maternity benefit delays', 'Accident compensation issues', 'Death benefit claims',
                                'Gratuity calculation disputes', 'Unemployment allowance issues', 'Insurance claim rejection',
                                'Medical reimbursement delays', 'Retirement benefit disputes', 'NPS contribution problems'],
                                
    'Migrant Worker Issues': ['Inter-state migrant documentation', 'Inadequate housing', 'Return transportation denial',
                             'Displacement allowance issues', 'Language barrier discrimination', 'Home state leave refusal',
                             'Identity verification problems', 'Family separation issues', 'Cultural insensitivity',
                             'Remittance challenges', 'Local authority harassment', 'Medical care access']
}

# Map categories to appropriate government departments
department_mapping = {
    'Wage Issues': 'Ministry of Labour and Employment - Wage Division',
    'Discrimination': 'National Commission for Women/Minorities/SC/ST (based on case)',
    'Unsafe Working Conditions': 'Directorate General of Factory Advice Service and Labour Institutes',
    'Labor Rights Violations': 'Ministry of Labour and Employment - Industrial Relations Division',
    'Workplace Harassment': 'Ministry of Women and Child Development/Local Complaints Committee',
    'Social Security Concerns': 'Ministry of Labour and Employment - Social Security Division',
    'Migrant Worker Issues': 'Ministry of Labour and Employment - Inter-State Migrant Workers Division'
}

# Create a more detailed mapping for specific subcomplaints
detailed_department_mapping = {
    # Wage Issues
    'Minimum Wages Act violations': 'Ministry of Labour and Employment - Wage Division',
    'Delayed salary payments': 'Ministry of Labour and Employment - Wage Division',
    'Unauthorized deductions': 'Ministry of Labour and Employment - Wage Division',
    'Overtime payment issues': 'Ministry of Labour and Employment - Wage Division',
    'Equal pay for equal work violations': 'Ministry of Labour and Employment - Wage Division',
    'Bonus payment issues': 'Ministry of Labour and Employment - Wage Division',
    'Gratuity payment problems': 'Ministry of Labour and Employment - Wage Division',
    'PF deduction issues': 'Employees Provident Fund Organisation',
    'ESI contribution problems': 'Employees State Insurance Corporation',
    'Contract wages dispute': 'Ministry of Labour and Employment - Wage Division',
    'Commission payment delays': 'Ministry of Labour and Employment - Wage Division',
    'Incorrect salary calculation': 'Ministry of Labour and Employment - Wage Division',
    'Salary below industry standards': 'Ministry of Labour and Employment - Wage Division',
    'Unpaid leave deductions': 'Ministry of Labour and Employment - Wage Division',
    'Illegal fine impositions': 'Ministry of Labour and Employment - Wage Division',
    
    # Discrimination
    'Gender discrimination': 'National Commission for Women',
    'Caste-based discrimination': 'National Commission for Scheduled Castes/Tribes',
    'Religious discrimination': 'National Commission for Minorities',
    'Disability discrimination': 'Chief Commissioner for Persons with Disabilities',
    'Age discrimination': 'Ministry of Labour and Employment - Industrial Relations Division',
    'Pregnancy discrimination': 'National Commission for Women',
    'Sexual harassment': 'Internal Complaints Committee/Local Complaints Committee',
    'Promotion bias': 'Ministry of Labour and Employment - Industrial Relations Division',
    'Hiring bias': 'Ministry of Labour and Employment - Industrial Relations Division',
    'Regional bias': 'National Commission for Backward Classes',
    'Language discrimination': 'Ministry of Labour and Employment - Industrial Relations Division',
    'Marital status discrimination': 'Ministry of Labour and Employment - Industrial Relations Division',
    'Political affiliation bias': 'Ministry of Labour and Employment - Industrial Relations Division',
    'LGBTQ+ discrimination': 'National Human Rights Commission',
    'Appearance-based discrimination': 'Ministry of Labour and Employment - Industrial Relations Division',
    
    # Unsafe Working Conditions
    'Lack of safety equipment': 'Directorate General of Factory Advice Service and Labour Institutes',
    'Hazardous materials exposure': 'Directorate General of Factory Advice Service and Labour Institutes',
    'Inadequate fire safety': 'State Fire Department and Labour Department',
    'Building structural issues': 'Municipal Corporation Building Department',
    'Machine safety concerns': 'Directorate General of Factory Advice Service and Labour Institutes',
    'Excessive work hours': 'Ministry of Labour and Employment - Industrial Relations Division',
    'No proper ventilation': 'Directorate General of Factory Advice Service and Labour Institutes',
    'Unhygienic facilities': 'Municipal Corporation Health Department',
    'No access to clean water': 'Municipal Corporation Water Department',
    'Noise pollution': 'State Pollution Control Board',
    'Radiation exposure': 'Atomic Energy Regulatory Board',
    'Improper lighting': 'Directorate General of Factory Advice Service and Labour Institutes',
    'Ergonomic hazards': 'Directorate General of Factory Advice Service and Labour Institutes',
    'Extreme temperature conditions': 'Directorate General of Factory Advice Service and Labour Institutes',
    'Chemical spills': 'Directorate General of Factory Advice Service and Labour Institutes',
    'No emergency protocols': 'Directorate General of Factory Advice Service and Labour Institutes',
    
    # Labor Rights Violations
    'Contract labor issues': 'Ministry of Labour and Employment - Industrial Relations Division',
    'Termination without notice': 'Ministry of Labour and Employment - Industrial Relations Division',
    'Forced resignation': 'Ministry of Labour and Employment - Industrial Relations Division',
    'No appointment letter': 'Ministry of Labour and Employment - Industrial Relations Division',
    'Child labor': 'Ministry of Labour and Employment - Child Labor Division',
    'Denial of maternity benefits': 'Ministry of Women and Child Development',
    'Denial of leave': 'Ministry of Labour and Employment - Industrial Relations Division',
    'Union formation prevention': 'Ministry of Labour and Employment - Industrial Relations Division',
    'Collective bargaining issues': 'Ministry of Labour and Employment - Industrial Relations Division',
    'Forced overtime': 'Ministry of Labour and Employment - Industrial Relations Division',
    'No service benefits': 'Ministry of Labour and Employment - Industrial Relations Division',
    'Illegal termination': 'Labour Commissioner Office',
    'Identity document confiscation': 'Ministry of Labour and Employment - Industrial Relations Division',
    'Apprentice exploitation': 'Board of Apprenticeship Training',
    'Denial of statutory holidays': 'Ministry of Labour and Employment - Industrial Relations Division',
    
    # Workplace Harassment
    'Verbal abuse': 'Ministry of Labour and Employment - Industrial Relations Division',
    'Physical intimidation': 'Local Police Station and Ministry of Labour',
    'Cyberbullying at workplace': 'Cyber Crime Cell and Ministry of Labour',
    'Hostile work environment': 'Ministry of Labour and Employment - Industrial Relations Division',
    'Retaliation for complaints': 'Ministry of Labour and Employment - Industrial Relations Division',
    'Mental harassment': 'Ministry of Labour and Employment - Industrial Relations Division',
    'Character defamation': 'Local Police Station',
    'Privacy violations': 'Ministry of Electronics and Information Technology',
    'Excessive monitoring': 'Ministry of Labour and Employment - Industrial Relations Division',
    'Public humiliation': 'Ministry of Labour and Employment - Industrial Relations Division',
    'Exclusion from meetings': 'Ministry of Labour and Employment - Industrial Relations Division',
    'Unreasonable deadlines': 'Ministry of Labour and Employment - Industrial Relations Division',
    'Forced personal favors': 'Ministry of Labour and Employment - Industrial Relations Division',
    'Stalking': 'Local Police Station',
    'Threatening behavior': 'Local Police Station and Ministry of Labour',
    
    # Social Security Concerns
    'PF withdrawal issues': 'Employees Provident Fund Organisation',
    'Pension payment delays': 'Employees Provident Fund Organisation',
    'ESIC benefit denial': 'Employees State Insurance Corporation',
    'Maternity benefit delays': 'Ministry of Women and Child Development',
    'Accident compensation issues': 'Employees State Insurance Corporation',
    'Death benefit claims': 'Employees Provident Fund Organisation',
    'Gratuity calculation disputes': 'Ministry of Labour and Employment - Industrial Relations Division',
    'Unemployment allowance issues': 'Ministry of Labour and Employment - Social Security Division',
    'Insurance claim rejection': 'Insurance Regulatory and Development Authority',
    'Medical reimbursement delays': 'Employees State Insurance Corporation',
    'Retirement benefit disputes': 'Employees Provident Fund Organisation',
    'NPS contribution problems': 'Pension Fund Regulatory and Development Authority',
    
    # Migrant Worker Issues
    'Inter-state migrant documentation': 'Ministry of Labour and Employment - Inter-State Migrant Workers Division',
    'Inadequate housing': 'Ministry of Housing and Urban Affairs',
    'Return transportation denial': 'Ministry of Labour and Employment - Inter-State Migrant Workers Division',
    'Displacement allowance issues': 'Ministry of Labour and Employment - Inter-State Migrant Workers Division',
    'Language barrier discrimination': 'Ministry of Labour and Employment - Inter-State Migrant Workers Division',
    'Home state leave refusal': 'Ministry of Labour and Employment - Inter-State Migrant Workers Division',
    'Identity verification problems': 'Ministry of Home Affairs',
    'Family separation issues': 'Ministry of Labour and Employment - Inter-State Migrant Workers Division',
    'Cultural insensitivity': 'Ministry of Labour and Employment - Inter-State Migrant Workers Division',
    'Remittance challenges': 'Ministry of Finance',
    'Local authority harassment': 'Ministry of Home Affairs',
    'Medical care access': 'Ministry of Health and Family Welfare'
}

# Generate more varied and detailed complaint templates
complaint_templates = {
    'Wage Issues': [
        "I have not received my salary for {time_period}. My employer keeps delaying payments.",
        "My employer is paying below the minimum wage of {amount} rupees in {state}.",
        "The company deducted {amount} rupees from my salary without explanation.",
        "I worked overtime for {hours} hours but did not receive any additional payment.",
        "Female employees are being paid less than male employees for the same work.",
        "The company has not paid the annual bonus as required by law.",
        "After working for {years} years, I was denied gratuity payment upon resignation.",
        "My employer deducts PF from my salary but doesn't deposit it in my PF account.",
        "ESI contributions are being deducted but I'm unable to avail medical benefits.",
        "My contract states a higher wage than what I'm receiving monthly.",
        "I was promised a commission of {percentage}% but only received {lower_percentage}%.",
        "My salary calculation doesn't include the allowances mentioned in my offer letter.",
        "I'm being paid significantly less than the industry standard for my position.",
        "The company is deducting money for the days I took approved sick leave.",
        "My employer imposed a fine of {amount} rupees for {minor_reason} which seems excessive and illegal.",
        "Despite working on national holidays, I haven't received double wages as per law.",
        "The company has not revised my salary for {years} years despite annual evaluations.",
        "I was put on unpaid leave without my consent during {situation}.",
        "My employer has been delaying the variable pay component for {time_period}.",
        "I'm being paid in cash without any salary slip or documentation."
    ],
    
    'Discrimination': [
        "I was denied promotion because of my gender despite having better performance records.",
        "Employees from certain castes are given preferential treatment in our organization.",
        "I'm not allowed to take prayer breaks due to my religious beliefs.",
        "As a person with disability, the company has not provided reasonable accommodations.",
        "I was told I'm too old for the position despite having all qualifications.",
        "After informing about my pregnancy, I was removed from important projects.",
        "My supervisor makes inappropriate comments and touches female employees.",
        "Despite having better qualifications, I was passed over for promotion due to my {attribute}.",
        "The job advertisement explicitly mentioned they prefer candidates from certain communities.",
        "Colleagues from a particular state get better assignments and growth opportunities.",
        "I was mocked and excluded because I don't speak the local language fluently.",
        "After my marriage, I was denied field assignments assuming I can't travel.",
        "I face discrimination because I support a different political party than management.",
        "As an LGBTQ+ individual, I face daily harassment and isolation at work.",
        "I was criticized for not matching the company's 'look policy' despite my competence.",
        "My accommodation request for religious observance was denied without reason.",
        "I'm consistently assigned lower-level tasks than my peers from privileged backgrounds.",
        "As a north-eastern employee, I face stereotyping and prejudice from colleagues.",
        "My disability accommodations were removed after the inspection team left.",
        "I was asked inappropriate questions about my family planning during the interview."
    ],
    
    'Unsafe Working Conditions': [
        "Workers are not provided with safety helmets, gloves or boots in our factory.",
        "We are exposed to harmful chemicals without proper protection equipment.",
        "There are no fire extinguishers or emergency exits in our workplace.",
        "The office building has visible cracks and leakages creating unsafe conditions.",
        "Machine guards are removed to speed up production causing accidents.",
        "We are forced to work for 14-16 hours daily without proper breaks.",
        "The factory has poor ventilation with excessive dust and smoke.",
        "Toilet facilities are extremely dirty and unusable in our workplace.",
        "There is no access to clean drinking water in our factory premises.",
        "The noise levels in our factory exceed safe limits and no ear protection is provided.",
        "We work with radioactive materials without proper monitoring or protection.",
        "The lighting is so poor that it's causing eye strain and accidents.",
        "We are forced to work in unnatural positions causing severe back problems.",
        "During summer, the temperature exceeds 45°C with no cooling systems.",
        "Chemical spills are common and no cleanup protocols are followed.",
        "There are no emergency evacuation drills or protocols in place.",
        "Heavy objects are moved manually without mechanical aids causing injuries.",
        "Electrical wiring is exposed and has caused several shock incidents.",
        "We work with cutting machines that lack safety guards or emergency stops.",
        "The construction site lacks proper scaffolding and fall protection equipment."
    ],
    
    'Labor Rights Violations': [
        "I've been working as a contractor for 5 years but haven't been regularized.",
        "I was fired without any notice or explanation after working for {years} years.",
        "My boss threatened to fire me if I didn't submit a resignation letter.",
        "I've been working for 3 months without any formal appointment letter.",
        "Children below 14 years are employed in our manufacturing unit.",
        "I was denied maternity leave and benefits after pregnancy announcement.",
        "My sick leave application was rejected despite having medical certificate.",
        "Management threatened employees who tried to form a workers' union.",
        "The company refuses to negotiate wages and benefits collectively.",
        "I'm forced to work overtime without my consent or extra payment.",
        "After 10 years of service, I was denied retirement benefits.",
        "I was terminated for raising safety concerns about factory conditions.",
        "My employer has confiscated my passport and educational certificates.",
        "I'm an apprentice but I'm performing regular employee duties without benefits.",
        "The company doesn't grant national and festival holidays as per law.",
        "I was made to sign blank papers/resignation letters when joining.",
        "The company doesn't provide any written contract to employees.",
        "I'm employed as a trainee but receive no actual training.",
        "I was transferred to a remote location after filing a complaint.",
        "The company doesn't allow employees to use their phones during shifts."
    ],
    
    'Workplace Harassment': [
        "My supervisor constantly shouts and uses abusive language towards me.",
        "My colleague threatened to physically harm me if I reported irregularities.",
        "Co-workers have created offensive social media groups targeting me.",
        "The management encourages a toxic environment where employees are pitted against each other.",
        "After filing a complaint, I was transferred to a remote location as punishment.",
        "My manager constantly criticizes me in front of others affecting my mental health.",
        "False rumors about my character are being spread in the workplace.",
        "The company monitors personal communications and break time activities excessively.",
        "Managers check our bags and personal belongings without consent everyday.",
        "I was humiliated in front of clients for a minor mistake.",
        "I'm deliberately excluded from important meetings relevant to my work.",
        "My manager sets impossible deadlines and penalizes me for not meeting them.",
        "I'm pressured to run personal errands for my superior during work hours.",
        "A senior colleague follows me to and from work making me uncomfortable.",
        "My supervisor has threatened to ruin my career if I don't comply with demands.",
        "I'm constantly interrupted and talked over in meetings by male colleagues.",
        "My religious practices are mocked openly in the workplace.",
        "I'm assigned tasks outside my job description as a form of punishment.",
        "My superior takes credit for my work and ideas regularly.",
        "I'm forced to attend company events that conflict with my personal beliefs."
    ],
    
    'Social Security Concerns': [
        "Despite multiple applications, I haven't been able to withdraw my PF amount.",
        "My pension payments have been delayed for the past {time_period}.",
        "The hospital refused to accept my ESIC card for treatment.",
        "I've been waiting for my maternity benefit payment for {time_period}.",
        "After a workplace accident, my compensation claim has been pending for months.",
        "Following my husband's death at work, the death benefit claim is still unprocessed.",
        "The company calculated my gratuity based on basic pay only, not total emoluments.",
        "I was laid off but haven't received unemployment allowance as promised.",
        "My health insurance claim was rejected without proper explanation.",
        "My medical reimbursement has been pending for {time_period} despite submitting all documents.",
        "After retirement, I'm facing issues accessing my provident fund benefits.",
        "My employer has stopped contributing to NPS despite deducting from my salary.",
        "I cannot access my UAN account despite multiple attempts.",
        "My ESIC hospital referred me to a private hospital but refused reimbursement.",
        "As a contract worker, I don't receive any social security benefits.",
        "The company has provided incorrect details in my PF account making withdrawals impossible.",
        "After a workplace injury, I was denied paid medical leave.",
        "My widow pension was stopped without any prior notice or explanation.",
        "Despite being eligible, I haven't received the statutory bonus for two years.",
        "The paternity leave application was rejected without valid reasons."
    ],
    
    'Migrant Worker Issues': [
        "I'm an inter-state migrant worker without proper documentation provided by my employer.",
        "The housing provided to us migrant workers is overcrowded and lacks basic facilities.",
        "After project completion, the company refused to arrange transportation back to our home state.",
        "I haven't received the displacement allowance mentioned in the Inter-State Migrant Workmen Act.",
        "I face discrimination because I don't speak the local language fluently.",
        "I was denied leave to visit my home state during festivals.",
        "Local authorities have issues verifying my identity as my documents are in my native language.",
        "I haven't been able to bring my family due to lack of family accommodation provisions.",
        "Local colleagues mock our cultural practices and food habits.",
        "I can't send money home as I don't have proper banking facilities here.",
        "Local police harass us migrant workers during identity checks.",
        "We have no access to healthcare as our home state medical cards aren't accepted here.",
        "My contractor brought me here but now refuses to take responsibility for my employment.",
        "I receive lower wages than local workers for the same job.",
        "No translator is provided during important safety briefings.",
        "My home state work experience is not recognized for promotion considerations.",
        "During COVID-19 lockdown, we were abandoned without any support.",
        "My children cannot get admission to local schools due to documentation issues.",
        "I'm charged excessive rent knowing I have limited housing options as a migrant.",
        "My native state ID is not accepted for opening a bank account here."
    ]
}

# Create expanded lists for variation
companies = [
    "ABC Manufacturing", "XYZ Technologies", "Sunrise Industries", "Global Solutions", 
    "Datatech Services", "Pride Construction", "Metro Textiles", "Goodlife Healthcare",
    "Eastern Electronics", "Westland Automobiles", "Mountain Mining", "Coastal Shipping",
    "Skyrise Builders", "Greenfield Agro", "Bluestar Chemicals", "RedBrick Consultancy",
    "GoldenGate Software", "SilverLine Railways", "CopperTech Engineering", "IronStrength Steel",
    "CloudNine IT Solutions", "Sunshine Apparels", "Moonlight Entertainment", "Starway Hotels",
    "Rainbow Plastics", "Thunder Power Generation", "Lightning Fast Couriers", "Breeze Airlines",
    "Tsunami Shipping", "Volcano Cement", "Forest Wood Products", "Desert Oil Exploration",
    "Arctic Cold Storage", "Tropical Beverages", "Savannah Agriculture", "Alpine Adventures",
    "Oceanic Marine Products", "Prairie Food Processing", "Canyon Minerals", "Delta Logistics"
]

designations = [
    "machine operator", "software developer", "administrative assistant", "sales executive",
    "customer service representative", "accountant", "factory worker", "supervisor",
    "production manager", "quality control inspector", "HR coordinator", "marketing specialist",
    "data entry operator", "network administrator", "security guard", "receptionist",
    "warehouse manager", "logistics coordinator", "procurement officer", "finance analyst",
    "team leader", "project coordinator", "business analyst", "content writer",
    "graphic designer", "social media manager", "driver", "delivery personnel",
    "maintenance technician", "electrician", "plumber", "carpenter",
    "welder", "forklift operator", "call center agent", "floor manager",
    "regional sales manager", "branch manager", "nurse", "medical representative"
]

locations = [
    "Mumbai", "Delhi", "Bangalore", "Chennai", "Hyderabad", "Pune", "Kolkata", "Ahmedabad",
    "Jaipur", "Lucknow", "Kochi", "Chandigarh", "Bhubaneswar", "Indore", "Nagpur", "Surat",
    "Coimbatore", "Visakhapatnam", "Guwahati", "Bhopal", "Thiruvananthapuram", "Patna",
    "Ludhiana", "Vadodara", "Nashik", "Agra", "Varanasi", "Madurai", "Ranchi", "Raipur",
    "Dehradun", "Jodhpur", "Gwalior", "Jalandhar", "Tiruchirappalli", "Amritsar", "Mangalore",
    "Vijayawada", "Shillong", "Jammu", "Dharwad", "Imphal", "Panaji", "Shimla", "Itanagar"
]

state_names = [
    "Maharashtra", "Karnataka", "Tamil Nadu", "Delhi", "Uttar Pradesh", "Gujarat", "West Bengal",
    "Rajasthan", "Telangana", "Kerala", "Punjab", "Odisha", "Madhya Pradesh", "Haryana",
    "Bihar", "Andhra Pradesh", "Assam", "Chhattisgarh", "Uttarakhand", "Himachal Pradesh",
    "Jharkhand", "Goa", "Tripura", "Meghalaya", "Manipur", "Nagaland", "Arunachal Pradesh",
    "Mizoram", "Sikkim", "Jammu and Kashmir", "Ladakh"
]

time_periods = [
    "2 months", "3 months", "the last quarter", "6 weeks", "45 days", "the past 90 days",
    "half a year", "8 weeks", "the last three pay cycles", "4 months", "100 days",
    "the entire financial year", "5 months", "7 weeks", "120 days", "one fiscal quarter",
    "the probation period", "9 weeks", "the project duration", "30 working days"
]

amounts = [
    "5,000", "10,000", "15,000", "20,000", "3,500", "7,500", "12,500", "9,000",
    "18,000", "25,000", "1,200", "8,400", "6,750", "4,200", "11,500", "13,000",
    "22,000", "2,800", "17,500", "30,000", "950", "1,500", "2,500", "19,500"
]

hours = ["20", "15", "30", "40", "25", "10", "35", "45", "12", "18", "22", "8", "36", "50", "55"]

years = ["3", "5", "7", "10", "2", "4", "8", "1", "6", "9", "12", "15", "20", "25", "1.5", "2.5", "4.5"]

attributes = [
    "age", "background", "region", "language", "marital status", "appearance", "accent",
    "educational institution", "political views", "family status", "height", "weight",
    "cultural background", "religious practices", "social class"
]

industries = [
    "manufacturing", "IT", "healthcare", "construction", "retail", "hospitality",
    "textile", "automotive", "education", "logistics", "mining", "pharmaceutical",
    "banking", "insurance", "telecommunications", "agriculture", "entertainment",
    "media", "real estate", "oil and gas", "aviation", "shipping"
]

minor_reasons = [
    "being 5 minutes late", "taking a personal call", "not meeting unrealistic targets",
    "a small error in documentation", "wearing informal shoes", "not attending optional team events",
    "questioning company policies", "taking authorized breaks", "using personal email",
    "not participating in office celebrations"
]

situations = [
    "company reorganization", "low production periods", "system maintenance",
    "inventory checks", "market fluctuations", "between projects",
    "festive seasons", "quarterly closing"
]

percentages = ["5", "7", "10", "12", "15", "20", "25", "3", "8", "18"]
lower_percentages = ["2", "3", "5", "7", "8", "10", "12", "1", "4", "6"]

# Function to generate detailed and varied complaints
def generate_varied_complaint(category, subcategory, template):
    complaint = template
    
    # Replace placeholders with random values
    if "{time_period}" in complaint:
        complaint = complaint.replace("{time_period}", random.choice(time_periods))
    
    if "{amount}" in complaint:
        complaint = complaint.replace("{amount}", random.choice(amounts))
    
    if "{state}" in complaint:
        complaint = complaint.replace("{state}", random.choice(state_names))
    
    if "{hours}" in complaint:
        complaint = complaint.replace("{hours}", random.choice(hours))
    
    if "{years}" in complaint:
        complaint = complaint.replace("{years}", random.choice(years))
    
    if "{attribute}" in complaint:
        complaint = complaint.replace("{attribute}", random.choice(attributes))
    
    if "{minor_reason}" in complaint:
        complaint = complaint.replace("{minor_reason}", random.choice(minor_reasons))
    
    if "{situation}" in complaint:
        complaint = complaint.replace("{situation}", random.choice(situations))
    
    if "{percentage}" in complaint:
        complaint = complaint.replace("{percentage}", random.choice(percentages))
    
    if "{lower_percentage}" in complaint:
        complaint = complaint.replace("{lower_percentage}", random.choice(lower_percentages))
    
    # Add company details with varied probability
    if random.random() > 0.4:
        company = random.choice(companies)
        industry = random.choice(industries)
        complaint += f" I work at {company} in the {industry} industry."
    
    # Add designation with varied probability
    if random.random() > 0.6:
        designation = random.choice(designations)
        experience = random.choice(years)
        complaint += f" I have been working as a {designation} for {experience} years."
    
    # Add location with varied probability
    if random.random() > 0.5:
        location = random.choice(locations)
        complaint += f" This is happening in our {location} branch/office/factory."
    
    # Add more context with varied probability
    if random.random() > 0.7:
        context_additions = [
            f" I have previously complained to my {random.choice(['supervisor', 'manager', 'HR', 'department head'])} but no action was taken.",
            f" This issue affects {random.choice(['many', 'several', 'a few', 'all'])} employees in my department.",
            f" I have documentation and evidence to support my claim.",
            f" This started after the {random.choice(['management change', 'company acquisition', 'new policy implementation', 'restructuring'])}.",
            f" I need urgent intervention as my {random.choice(['financial situation', 'health', 'safety', 'job security'])} is at risk.",
            f" Similar complaints have been raised by other employees as well.",
            f" I am afraid of direct confrontation due to possible retaliation."
        ]
        complaint += random.choice(context_additions)
    
    # Add request for resolution with varied probability
    if random.random() > 0.8:
        resolution_requests = [
    " I request immediate intervention and resolution of this matter.",
    " I seek fair compensation for the damages caused.",
    " I would like the authorities to inspect our workplace and take appropriate action.",
    " I demand that my rights under labor laws be protected.",
    " I am requesting transfer to another department/branch.",
    " I want the harassment to stop and appropriate action against the perpetrators.",
    " I need assistance in recovering my unpaid wages/benefits.",
    " I am requesting that proper safety measures be implemented immediately.",
    " I would like this to be resolved without affecting my employment status.",
    " I want the responsible officials to be held accountable for these violations."
]

# Define diverse personal details to make complaints more unique
personal_details = [
    " I am a single parent supporting two children.",
    " I have been working in this industry for over a decade.",
    " I recently relocated to this city for this job.",
    " I am the sole breadwinner in my family.",
    " I belong to a scheduled caste community.",
    " I am a person with physical disability.",
    " I am a migrant worker from a different state.",
    " I am a woman working in a male-dominated field.",
    " I am a senior citizen close to retirement.",
    " I am a fresh graduate in my first job.",
    " I have specialized skills in my field but face exploitation.",
    " I am supporting my elderly parents with my income.",
    " I have education loans to repay which makes this situation critical.",
    " I previously worked abroad but returned to India for family reasons.",
    " I am a contractual worker hoping for permanent employment.",
    " I have medical conditions that require regular treatment.",
    " I am from a rural background trying to establish myself in an urban job market.",
    " I am working while pursuing higher education.",
    " I switched careers recently and face discrimination due to it.",
    " I am supporting my siblings' education with my income."
]

# Define more category-specific details
category_specific_details = {
    'Wage Issues': [
        " The issue has affected my ability to pay rent and bills.",
        " My family is suffering due to irregular payment of wages.",
        " I have loan EMIs that I'm unable to pay due to salary delays.",
        " The company is profitable yet refuses to pay fair wages.",
        " Other companies in the same area pay much better for similar work.",
        " The management recently gave themselves bonuses while delaying our payments.",
        " I have kept records of all my working hours and payment discrepancies.",
        " The company changed payroll policies without proper notice.",
        " Our wages have remained stagnant despite inflation and rising costs.",
        " The company has a history of wage violations that go unreported."
    ],
    'Discrimination': [
        " The discrimination is subtle but persistent in daily interactions.",
        " I have witnessed similar treatment of others from my community.",
        " Promotions consistently go to employees from certain backgrounds only.",
        " There are documented differences in how different groups are evaluated.",
        " Derogatory comments about my identity are common in the workplace.",
        " Important opportunities are only shared with select groups.",
        " People from my background are consistently assigned menial tasks.",
        " There's a significant pay gap between different groups doing identical work.",
        " The company policies on paper support equality but practice is different.",
        " I was directly told that people 'like me' don't get promoted here."
    ],
    'Unsafe Working Conditions': [
        " Several workers have already suffered injuries due to these conditions.",
        " Management ignores safety concerns to maintain production schedules.",
        " Required safety certifications are outdated or falsified.",
        " I developed health issues directly related to workplace conditions.",
        " Safety equipment exists but workers are discouraged from using it.",
        " Mandatory safety training is rushed and inadequate.",
        " When inspections occur, the company temporarily improves conditions.",
        " Accident reports are either not filed or manipulated.",
        " Workers who report safety issues face retaliation.",
        " The company prioritizes cost-cutting over worker safety."
    ],
    'Labor Rights Violations': [
        " My employment contract is different from actual working conditions.",
        " The company exploits legal loopholes to deny statutory benefits.",
        " Workers who demand their rights are often terminated.",
        " Documentation of work hours is manipulated to avoid overtime payment.",
        " The company operates through multiple shell contractors to avoid responsibility.",
        " We are made to sign blank documents that are later misused.",
        " Legal compliance exists only on paper, not in practice.",
        " Company policies contradict labor laws but are strictly enforced.",
        " There is no grievance redressal system available to workers.",
        " Job descriptions are intentionally vague to assign unlimited responsibilities."
    ],
    'Workplace Harassment': [
        " The harassment has caused me significant mental distress.",
        " HR department is aware but has taken no action despite evidence.",
        " The harasser is protected due to their relationship with senior management.",
        " There have been previous complaints against the same individual/group.",
        " Witnesses are afraid to come forward due to fear of retaliation.",
        " The toxicity has led to high employee turnover in our department.",
        " I've had to take medical leave due to stress from the harassment.",
        " Company culture normalizes and even rewards such behavior.",
        " I've documented all incidents with dates, times and details.",
        " The harassment increased after I declined personal advances/requests."
    ],
    'Social Security Concerns': [
        " I discovered the discrepancy when trying to claim benefits.",
        " Despite automatic deductions, benefits are not being properly credited.",
        " The company is using outdated forms and processes for benefits.",
        " Multiple visits to government offices haven't resolved the issue.",
        " My UAN/account details were incorrectly entered by the employer.",
        " I'm entitled to these benefits as per my years of service.",
        " The delay in benefits has affected my post-retirement planning.",
        " I've submitted all required documentation multiple times.",
        " Different departments keep redirecting me without solutions.",
        " My employer refuses to provide necessary certification for claims."
    ],
    'Migrant Worker Issues': [
        " We were recruited through agents who made false promises.",
        " Our identity documents are held by the contractor/employer.",
        " We face hostility from local workers and community.",
        " Essential information is not provided in languages we understand.",
        " We pay excessive rates for basic accommodations provided by the employer.",
        " Our interstate worker benefits are not being provided.",
        " We face restrictions on movement and communication with family.",
        " Our cultural and dietary needs are ignored creating difficulties.",
        " We are charged excessive fees for transportation to work sites.",
        " During emergencies, we lack access to local support systems."
    ]
}

# Function to generate a varied and realistic complaint dataset
def generate_varied_dataset(num_samples=1000):
    data = []
    
    for _ in range(num_samples):
        # Select random category with somewhat balanced distribution
        category = random.choice(list(categories.keys()))
        
        # Select random subcategory from the category
        subcategory = random.choice(categories[category])
        
        # Select random template for the category
        template = random.choice(complaint_templates[category])
        
        # Basic complaint
        complaint_text = template
        
        # Replace placeholders with random values
        if "{time_period}" in complaint_text:
            complaint_text = complaint_text.replace("{time_period}", random.choice(time_periods))
        
        if "{amount}" in complaint_text:
            complaint_text = complaint_text.replace("{amount}", random.choice(amounts))
        
        if "{state}" in complaint_text:
            complaint_text = complaint_text.replace("{state}", random.choice(state_names))
        
        if "{hours}" in complaint_text:
            complaint_text = complaint_text.replace("{hours}", random.choice(hours))
        
        if "{years}" in complaint_text:
            complaint_text = complaint_text.replace("{years}", random.choice(years))
        
        if "{attribute}" in complaint_text:
            complaint_text = complaint_text.replace("{attribute}", random.choice(attributes))
        
        if "{minor_reason}" in complaint_text:
            complaint_text = complaint_text.replace("{minor_reason}", random.choice(minor_reasons))
        
        if "{situation}" in complaint_text:
            complaint_text = complaint_text.replace("{situation}", random.choice(situations))
        
        if "{percentage}" in complaint_text:
            complaint_text = complaint_text.replace("{percentage}", random.choice(percentages))
        
        if "{lower_percentage}" in complaint_text:
            complaint_text = complaint_text.replace("{lower_percentage}", random.choice(lower_percentages))
        
        # Make each complaint unique by adding varied details
        # Add company details (30-70% chance)
        if random.random() > 0.3:
            company = random.choice(companies)
            industry = random.choice(industries)
            complaint_text += f" I work at {company} in the {industry} industry."
        
        # Add designation (40-60% chance)
        if random.random() > 0.4:
            designation = random.choice(designations)
            experience = random.choice(years)
            complaint_text += f" I have been working as a {designation} for {experience} years."
        
        # Add location (50-50% chance)
        if random.random() > 0.5:
            location = random.choice(locations)
            complaint_text += f" This is happening in our {location} branch/office/factory."
        
        # Add personal details (30% chance)
        if random.random() > 0.7:
            complaint_text += random.choice(personal_details)
        
        # Add category-specific details (40% chance)
        if random.random() > 0.6:
            complaint_text += random.choice(category_specific_details[category])
        
        # # Add request for resolution (30% chance)
        # if random.random() > 0.7:
        #     complaint_text += random.choice(resolution_requests)
        
        # Get appropriate department
        department = detailed_department_mapping[subcategory]
        
        data.append({
            'complaint_text': complaint_text,
            'category': category,
            'subcategory': subcategory,
            'department': department
        })
    
    return pd.DataFrame(data)

# Generate and save dataset
complaints_df = generate_varied_dataset(1000)

# Add some noise by randomly swapping a few departments (to simulate real-world complexity)
noise_indices = np.random.choice(len(complaints_df), size=int(len(complaints_df)*0.05), replace=False)
random_departments = np.random.choice(list(detailed_department_mapping.values()), size=len(noise_indices))
complaints_df.loc[noise_indices, 'department'] = random_departments

# Split into train and test sets
train_df, test_df = train_test_split(complaints_df, test_size=0.2, random_state=42)

# Save datasets to CSV
train_df.to_csv('indian_workplace_complaints_train.csv', index=False)
test_df.to_csv('indian_workplace_complaints_test.csv', index=False)

print(f"Generated dataset with {len(complaints_df)} complaints")
print(f"Train set: {len(train_df)} samples")
print(f"Test set: {len(test_df)} samples")

# Display sample counts by category
print("\nCategory distribution:")
print(complaints_df['category'].value_counts())

# Display sample counts by department
print("\nDepartment distribution:")
print(complaints_df['department'].value_counts().head(10))

# Display some sample complaints to show variety
print("\nSample complaints:")
for category in categories.keys():
    samples = complaints_df[complaints_df['category'] == category].sample(min(2, len(complaints_df[complaints_df['category'] == category])))
    for _, sample in samples.iterrows():
        print(f"\n--- {category} sample ---")
        print(f"Text: {sample['complaint_text']}")
        print(f"Subcategory: {sample['subcategory']}")
        print(f"Department: {sample['department']}")
        print("-" * 80)

Generated dataset with 1000 complaints
Train set: 800 samples
Test set: 200 samples

Category distribution:
category
Unsafe Working Conditions    162
Discrimination               145
Migrant Worker Issues        145
Workplace Harassment         143
Social Security Concerns     142
Wage Issues                  136
Labor Rights Violations      127
Name: count, dtype: int64

Department distribution:
department
Ministry of Labour and Employment - Industrial Relations Division           274
Ministry of Labour and Employment - Wage Division                           129
Directorate General of Factory Advice Service and Labour Institutes          89
Ministry of Labour and Employment - Inter-State Migrant Workers Division     79
Employees Provident Fund Organisation                                        48
Employees State Insurance Corporation                                        42
Ministry of Home Affairs                                                     30
National Commission for Women

In [7]:


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import pickle
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import warnings
warnings.filterwarnings('ignore')

# Download NLTK resources
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt')

# Direct mapping from categories to primary departments
category_to_department = {
    'Wage Issues': 'Ministry of Labour and Employment - Wage Division',
    'Discrimination': 'National Commission for Women/Minorities/SC/ST (based on case)',
    'Unsafe Working Conditions': 'Directorate General of Factory Advice Service and Labour Institutes',
    'Labor Rights Violations': 'Ministry of Labour and Employment - Industrial Relations Division',
    'Workplace Harassment': 'Ministry of Women and Child Development/Local Complaints Committee',
    'Social Security Concerns': 'Ministry of Labour and Employment - Social Security Division',
    'Migrant Worker Issues': 'Ministry of Labour and Employment - Inter-State Migrant Workers Division'
}

# For more specific routing, you can create a detailed mapping based on keywords
keyword_to_department = {
    'salary': 'Ministry of Labour and Employment - Wage Division',
    'wage': 'Ministry of Labour and Employment - Wage Division',
    'payment': 'Ministry of Labour and Employment - Wage Division',
    'bonus': 'Ministry of Labour and Employment - Wage Division',
    'overtime': 'Ministry of Labour and Employment - Wage Division',
    
    'pf': 'Employees Provident Fund Organisation',
    'provident fund': 'Employees Provident Fund Organisation',
    'epf': 'Employees Provident Fund Organisation',
    
    'esi': 'Employees State Insurance Corporation',
    'medical benefit': 'Employees State Insurance Corporation',
    'health insurance': 'Employees State Insurance Corporation',
    
    'safety': 'Directorate General of Factory Advice Service and Labour Institutes',
    'hazard': 'Directorate General of Factory Advice Service and Labour Institutes',
    'accident': 'Directorate General of Factory Advice Service and Labour Institutes',
    'fire': 'State Fire Department and Labour Department',
    
    'sexual harassment': 'Internal Complaints Committee/Local Complaints Committee',
    'gender discrimination': 'National Commission for Women',
    'caste discrimination': 'National Commission for Scheduled Castes/Tribes',
    'religious discrimination': 'National Commission for Minorities',
    'disability': 'Chief Commissioner for Persons with Disabilities',
    
    'contract': 'Ministry of Labour and Employment - Industrial Relations Division',
    'termination': 'Ministry of Labour and Employment - Industrial Relations Division',
    'resignation': 'Ministry of Labour and Employment - Industrial Relations Division',
    'union': 'Ministry of Labour and Employment - Industrial Relations Division',
    'leave': 'Ministry of Labour and Employment - Industrial Relations Division',
    
    'migrant': 'Ministry of Labour and Employment - Inter-State Migrant Workers Division',
    'interstate': 'Ministry of Labour and Employment - Inter-State Migrant Workers Division',
    'housing': 'Ministry of Housing and Urban Affairs',
    
    'pension': 'Employees Provident Fund Organisation',
    'gratuity': 'Ministry of Labour and Employment - Industrial Relations Division',
    
    'child labor': 'Ministry of Labour and Employment - Child Labor Division',
    'maternity': 'Ministry of Women and Child Development'
}

# Load the datasets
train_df = pd.read_csv('indian_workplace_complaints_train.csv')
test_df = pd.read_csv('indian_workplace_complaints_test.csv')

print(f"Training set shape: {train_df.shape}")
print(f"Testing set shape: {test_df.shape}")

# Enhanced text preprocessing
def preprocess_text(text):
    # Convert to lowercase
    text = text.lower()
    
    # Remove special characters but keep some punctuation for better understanding
    text = re.sub(r'[^a-zA-Z\s\.\,]', '', text)
    
    # Tokenize text
    tokens = nltk.word_tokenize(text)
    
    # Create custom stopwords - keeping some important words that might be relevant
    custom_stopwords = set(stopwords.words('english')) - {'no', 'not', 'against', 'below', 'above', 'under', 'over', 'without', 'with'}
    
    # Remove stopwords
    tokens = [word for word in tokens if word not in custom_stopwords]
    
    # Lemmatization
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    
    # Join tokens back into text
    preprocessed_text = ' '.join(tokens)
    
    return preprocessed_text

print("Preprocessing text data...")
# Apply enhanced preprocessing
train_df['preprocessed_text'] = train_df['complaint_text'].apply(preprocess_text)
test_df['preprocessed_text'] = test_df['complaint_text'].apply(preprocess_text)

# Model for Category Classification
print("\nBuilding Category Classification Model...")
category_pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(max_features=5000, ngram_range=(1, 2), min_df=2)),
    ('classifier', MultinomialNB(alpha=0.01))  # Using the best params from your results
])

# Train the category model
print("Training the category classification model...")
category_model = category_pipeline.fit(train_df['preprocessed_text'], train_df['category'])

# Evaluate the category model
category_predictions = category_model.predict(test_df['preprocessed_text'])
category_accuracy = accuracy_score(test_df['category'], category_predictions)
print(f"Category Classification Accuracy: {category_accuracy:.4f}")
print("\nCategory Classification Report:")
print(classification_report(test_df['category'], category_predictions))

# Save the model
print("\nSaving the category model...")
pickle.dump(category_model, open('category_classifier_model.pkl', 'wb'))

# Function for identifying keywords in complaint text
def identify_keywords(text, keyword_dict):
    text = text.lower()
    found_keywords = []
    
    for keyword in keyword_dict.keys():
        if keyword in text:
            found_keywords.append(keyword)
    
    return found_keywords

# Function to determine department based on category and keywords
def determine_department(category, complaint_text):
    # Primary department based on category
    primary_dept = category_to_department[category]
    
    # Check for specific keywords that might override the default department
    found_keywords = identify_keywords(complaint_text.lower(), keyword_to_department)
    
    if found_keywords:
        # Count occurrences of departments based on found keywords
        dept_counts = {}
        for keyword in found_keywords:
            dept = keyword_to_department[keyword]
            dept_counts[dept] = dept_counts.get(dept, 0) + 1
        
        # If a specific department is strongly indicated by keywords, use it
        max_count = max(dept_counts.values()) if dept_counts else 0
        max_depts = [dept for dept, count in dept_counts.items() if count == max_count]
        
        if max_count >= 2 and len(max_depts) == 1:
            return max_depts[0], found_keywords
    
    # Default to category-based department if no strong keyword match
    return primary_dept, found_keywords

# Complete function for complaint classification and department recommendation
def classify_complaint(complaint_text):
    # Preprocess the complaint text
    preprocessed = preprocess_text(complaint_text)
    
    # Predict category
    category = category_model.predict([preprocessed])[0]
    category_proba = category_model.predict_proba([preprocessed])[0]
    category_confidence = max(category_proba)
    
    # Get top 3 categories
    category_indices = category_proba.argsort()[-3:][::-1]
    category_classes = category_model.classes_
    top_categories = [(category_classes[i], category_proba[i]) for i in category_indices]
    
    # Determine department based on category and keywords
    recommended_department, keywords_found = determine_department(category, complaint_text)
    
    # Generate explanation
    category_explanation = f"This complaint is classified as '{category}' because it contains terms commonly associated with {category.lower()} issues."
    department_explanation = f"Based on the category '{category}', this complaint should be directed to '{recommended_department}'."
    
    if keywords_found:
        department_explanation += f" This recommendation is supported by specific keywords found in the complaint: {', '.join(keywords_found)}."
    
    result = {
        'complaint': complaint_text,
        'category': category,
        'category_confidence': category_confidence,
        'alternative_categories': top_categories[1:] if len(top_categories) > 1 else [],
        'recommended_department': recommended_department,
        'keywords_found': keywords_found,
        'explanation': {
            'category': category_explanation,
            'department': department_explanation
        }
    }
    
    return result

# Test the classification with department mapping
print("\nTesting the model with some examples:")
test_examples = [
    "I have not been paid my salary for the past 3 months despite repeated requests to the HR department.",
    "Female employees are being paid less than male employees for the same work in our manufacturing unit.",
    "We are working in a factory with no fire exits or safety equipment. There have been two accidents last month.",
    "My manager keeps making inappropriate comments about my appearance and threatens me with termination if I complain.",
    "I am a migrant worker from Bihar and the local authorities refuse to recognize my work permit.",
    "My PF has been deducted from my salary for 5 years but when I checked online, no contributions have been made to my account."
]

for example in test_examples:
    result = classify_complaint(example)
    print("\nComplaint:", result['complaint'])
    print(f"Category: {result['category']} (Confidence: {result['category_confidence']:.2f})")
    print("Alternative Categories:")
    for cat, conf in result['alternative_categories']:
        print(f"  - {cat} (Confidence: {conf:.2f})")
    print(f"Recommended Department: {result['recommended_department']}")
    if result['keywords_found']:
        print(f"Keywords found: {', '.join(result['keywords_found'])}")
    print("\nExplanation:")
    for key, explanation in result['explanation'].items():
        print(f"  - {explanation}")
    print("-" * 80)

print("\nModel building and evaluation complete.")

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\praga\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\praga\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\praga\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Training set shape: (800, 4)
Testing set shape: (200, 4)
Preprocessing text data...

Building Category Classification Model...
Training the category classification model...
Category Classification Accuracy: 0.9950

Category Classification Report:
                           precision    recall  f1-score   support

           Discrimination       1.00      1.00      1.00        29
  Labor Rights Violations       1.00      1.00      1.00        25
    Migrant Worker Issues       1.00      1.00      1.00        28
 Social Security Concerns       0.97      1.00      0.98        30
Unsafe Working Conditions       1.00      1.00      1.00        23
              Wage Issues       1.00      1.00      1.00        28
     Workplace Harassment       1.00      0.97      0.99        37

                 accuracy                           0.99       200
                macro avg       1.00      1.00      1.00       200
             weighted avg       1.00      0.99      1.00       200


Saving the c