In [3]:
import csv
import random

# --- 1. CONFIGURATION ---
TOTAL_DESCRIPTIONS = 5000
OUTPUT_FILE = 'civic_issues_dataset.csv'

# --- 2. DATA FOR VARIATION ---

# List of locations (you can add more to increase variety)
LOCATIONS = [
    "near the main market on GT Road", "on Panchanantala Road", "just outside the Howrah Maidan",
    "in the alleyway behind Foreshore Road", "at the crossing of Andul Road", "near Santragachi Jheel",
    "on Shibpur Road, close to the Botanical Gardens", "in the Belur Math area", "at Kona Expressway turning",
    "adjacent to Avani Riverside Mall", "in the Kazipara neighborhood", "near the Howrah railway station exit",
    "on a side street in Bantra", "close to the Don Bosco School", "along the riverbank near Telkal Ghat",
    "in the middle of Sarat Chatterjee Avenue", "at the intersection near Howrah Bridge",
]

# Modifiers for severity and time
SEVERITIES = ["dangerous", "significant", "minor", "severe", "growing", "hazardous", "critical"]
DURATIONS = ["for the past week", "since the last rainfall", "for over a month", "for several days now", "since yesterday morning", "for a fortnight"]
IMPACTS = ["causing traffic jams", "posing a risk to pedestrians", "creating a hygiene issue", "leading to frequent accidents", "disrupting daily life", "making the area unsafe at night"]

# --- 3. DESCRIPTION TEMPLATES ---

# Templates organized by the categories you provided
ISSUE_TEMPLATES = {
    "Road & Traffic": {
        "Potholes / damaged roads": [
            "There is a {severity} pothole {location} that has been ignored {duration}. It is {impact} and requires immediate attention before it causes a major accident.",
            "The road surface {location} is severely damaged, with multiple large cracks and potholes appearing. This has been a problem {duration}, making it difficult for vehicles to pass safely.",
            "A particularly deep and {severity} pothole has formed {location}. It fills with water during rain, making it invisible to drivers. This is {impact}."
        ],
        "Broken traffic signals": [
            "The traffic signal {location} is completely non-functional {duration}. This has led to chaos at the intersection, {impact} during peak hours.",
            "One of the lights at the traffic signal {location} is broken. The green light is not working, causing confusion and near-misses among commuters.",
            "For the past few days, the traffic lights {location} have been blinking yellow continuously. This is a {severity} issue that needs urgent repair to regulate traffic flow."
        ],
        "Illegal parking": [
            "Illegal parking {location} has become a major nuisance {duration}. Cars are parked on both sides of the narrow street, {impact} and blocking emergency vehicle access.",
            "A number of vehicles are consistently parked in the 'No Parking' zone {location}. This obstructs the view for other drivers and is a {severity} safety hazard.",
            "Despite clear signage, illegal parking continues {location}. This narrows the road and is {impact}, especially during school hours."
        ]
    },
    "Streetlights & Electricity": {
        "Broken or dim streetlights": [
            "The streetlight {location} has been out of order {duration}. The entire stretch of road is plunged into darkness at night, {impact}.",
            "A streetlight pole {location} is flickering continuously. It's a {severity} problem that makes visibility poor and the area feel unsafe after dark.",
            "Most of the streetlights on the road {location} are extremely dim. They provide almost no light, which is a major safety concern for residents returning home late."
        ],
        "Fallen poles / exposed wires": [
            "A utility pole has fallen {location} after the recent storm and is now blocking part of the road. There are live wires exposed, making it a {severity} and dangerous situation.",
            "Exposed electrical wires are hanging from a damaged junction box {location}. This is a {severity} situation, especially with children playing nearby.",
            "A concrete electricity pole {location} is tilted at a dangerous angle {duration}, and the base has cracked. It looks like it could fall at any moment."
        ]
    },
    "Water Supply & Drainage": {
        "Pipeline leaks / bursts": [
            "There is a major water pipeline leak {location}. Gallons of clean drinking water are being wasted every day, and it has been happening {duration}.",
            "A water pipe burst {location}, causing a constant flow of water onto the street. This has created a large puddle and is starting to damage the road surface.",
            "A {severity} leak from an underground pipeline is flooding the lane {location}. It is creating a mess and causing a shortage of water supply in the area."
        ],
        "Waterlogging / drainage overflow": [
            "The drainage system {location} is overflowing after just a little rain. The dirty water floods the streets, {impact} and posing a health risk.",
            "Severe waterlogging is being reported {location} {duration}. The drains are clearly choked and unable to handle the water volume, creating a breeding ground for mosquitoes.",
            "An open drain {location} is overflowing, spilling sewage onto the main road. The smell is unbearable and the situation is extremely unhygienic for everyone."
        ]
    },
    "Waste & Sanitation": {
        "Uncollected garbage / overflowing bins": [
            "The community garbage bin {location} is overflowing with waste. Garbage has been piling up on the street {duration}, attracting stray animals and creating a foul smell.",
            "Waste has not been collected from our area {location} {duration}. The pile of garbage is growing daily, which is a {severity} health hazard for the residents.",
            "An overflowing dumpster {location} is blocking the footpath. The waste is spilling onto the road, {impact} and creating an unsightly and unhygienic environment."
        ],
        "Dirty public toilets": [
            "The public toilet facility {location} is in a deplorable state. It is extremely dirty, lacks water supply, and is practically unusable by anyone.",
            "There is a lack of basic sanitation in the public toilets {location}. They have not been cleaned {duration} and pose a significant public health risk.",
            "The condition of the public restroom {location} is unacceptable. The doors are broken, there is no lighting, and it is incredibly unsanitary."
        ]
    },
    "Public Safety": {
        "Open manholes / missing covers": [
            "A manhole {location} has been left open without any warning signs. It is a {severity} trap for pedestrians and motorists, especially at night.",
            "The cover of a manhole is missing {location} {duration}. Residents have placed a tree branch in it as a temporary warning, but a permanent solution is needed urgently.",
            "An uncovered manhole {location} is a {severity} safety risk. It is located right in the middle of a busy sidewalk, posing a direct threat to people walking there."
        ],
        "Unsafe pedestrian crossings": [
            "The zebra crossing {location} has faded completely and is no longer visible to drivers. This makes it very unsafe for pedestrians trying to cross the busy road.",
            "There is no proper pedestrian crossing {location}, despite it being a busy intersection with a school nearby. This is {impact} and is an accident waiting to happen.",
            "The pedestrian signal at the crossing {location} is not working. People are forced to guess when to cross, which is extremely {severity}."
        ]
    },
    "Public Infrastructure": {
        "Damaged footpaths / benches": [
            "The footpath {location} is broken and uneven, with loose paving slabs everywhere. It is very difficult for elderly people to walk on and is a tripping hazard.",
            "Several public benches in the park {location} are broken and have been in a state of disrepair {duration}. They are unusable for visitors.",
            "A large section of the sidewalk {location} has been dug up and left unrepaired. It forces pedestrians to walk on the main road, which is very risky."
        ],
        "Damaged bus stops": [
            "The shelter at the bus stop {location} is badly damaged. The roof is broken, and it provides no protection from the sun or rain for waiting passengers.",
            "The seating at the bus stop {location} has been completely vandalized and is unusable. Commuters are forced to stand while waiting for the bus.",
            "A bus stop signpost {location} has fallen down. There is no clear indication of where the bus stop is, causing confusion for passengers and bus drivers."
        ]
    }
}

# --- 4. GENERATE AND WRITE DATA ---

print(f"Generating {TOTAL_DESCRIPTIONS} descriptions...")

with open(OUTPUT_FILE, 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    # Write the header
    writer.writerow(['Category', 'Issue', 'Description'])

    # Generate data
    for _ in range(TOTAL_DESCRIPTIONS):
        # Randomly select a category and one of its issues
        category = random.choice(list(ISSUE_TEMPLATES.keys()))
        issue = random.choice(list(ISSUE_TEMPLATES[category].keys()))

        # Randomly select a template for that issue
        template = random.choice(ISSUE_TEMPLATES[category][issue])

        # Populate the template with random data
        description = template.format(
            severity=random.choice(SEVERITIES),
            location=random.choice(LOCATIONS),
            duration=random.choice(DURATIONS),
            impact=random.choice(IMPACTS)
        )

        # Write the row to the CSV file
        writer.writerow([category, issue, description])

print(f"\n✅ Success! Your dataset has been saved to '{OUTPUT_FILE}'.")
print(f"Total rows generated: {TOTAL_DESCRIPTIONS}")

Generating 5000 descriptions...

✅ Success! Your dataset has been saved to 'civic_issues_dataset.csv'.
Total rows generated: 5000


In [4]:
import csv
import random

# --- 1. CONFIGURATION ---
TOTAL_DESCRIPTIONS = 10000
OUTPUT_FILE = 'civic_issues_dataset.csv'

# --- 2. DATA FOR VARIATION ---

# List of locations (you can add more to increase variety)
LOCATIONS = [
    "near the main market on GT Road", "on Panchanantala Road", "just outside the Howrah Maidan",
    "in the alleyway behind Foreshore Road", "at the crossing of Andul Road", "near Santragachi Jheel",
    "on Shibpur Road, close to the Botanical Gardens", "in the Belur Math area", "at Kona Expressway turning",
    "adjacent to Avani Riverside Mall", "in the Kazipara neighborhood", "near the Howrah railway station exit",
    "on a side street in Bantra", "close to the Don Bosco School", "along the riverbank near Telkal Ghat",
    "in the middle of Sarat Chatterjee Avenue", "at the intersection near Howrah Bridge",
]

# Modifiers for severity and time
SEVERITIES = ["dangerous", "significant", "minor", "severe", "growing", "hazardous", "critical"]
DURATIONS = ["for the past week", "since the last rainfall", "for over a month", "for several days now", "since yesterday morning", "for a fortnight"]
IMPACTS = ["causing traffic jams", "posing a risk to pedestrians", "creating a hygiene issue", "leading to frequent accidents", "disrupting daily life", "making the area unsafe at night"]

# --- 3. DESCRIPTION TEMPLATES ---

# Templates organized by the categories you provided
ISSUE_TEMPLATES = {
    "Road & Traffic": {
        "Potholes / damaged roads": [
            "There is a {severity} pothole {location} that has been ignored {duration}. It is {impact} and requires immediate attention before it causes a major accident.",
            "The road surface {location} is severely damaged, with multiple large cracks and potholes appearing. This has been a problem {duration}, making it difficult for vehicles to pass safely.",
            "A particularly deep and {severity} pothole has formed {location}. It fills with water during rain, making it invisible to drivers. This is {impact}."
        ],
        "Broken traffic signals": [
            "The traffic signal {location} is completely non-functional {duration}. This has led to chaos at the intersection, {impact} during peak hours.",
            "One of the lights at the traffic signal {location} is broken. The green light is not working, causing confusion and near-misses among commuters.",
            "For the past few days, the traffic lights {location} have been blinking yellow continuously. This is a {severity} issue that needs urgent repair to regulate traffic flow."
        ],
        "Illegal parking": [
            "Illegal parking {location} has become a major nuisance {duration}. Cars are parked on both sides of the narrow street, {impact} and blocking emergency vehicle access.",
            "A number of vehicles are consistently parked in the 'No Parking' zone {location}. This obstructs the view for other drivers and is a {severity} safety hazard.",
            "Despite clear signage, illegal parking continues {location}. This narrows the road and is {impact}, especially during school hours."
        ]
    },
    "Streetlights & Electricity": {
        "Broken or dim streetlights": [
            "The streetlight {location} has been out of order {duration}. The entire stretch of road is plunged into darkness at night, {impact}.",
            "A streetlight pole {location} is flickering continuously. It's a {severity} problem that makes visibility poor and the area feel unsafe after dark.",
            "Most of the streetlights on the road {location} are extremely dim. They provide almost no light, which is a major safety concern for residents returning home late."
        ],
        "Fallen poles / exposed wires": [
            "A utility pole has fallen {location} after the recent storm and is now blocking part of the road. There are live wires exposed, making it a {severity} and dangerous situation.",
            "Exposed electrical wires are hanging from a damaged junction box {location}. This is a {severity} situation, especially with children playing nearby.",
            "A concrete electricity pole {location} is tilted at a dangerous angle {duration}, and the base has cracked. It looks like it could fall at any moment."
        ]
    },
    "Water Supply & Drainage": {
        "Pipeline leaks / bursts": [
            "There is a major water pipeline leak {location}. Gallons of clean drinking water are being wasted every day, and it has been happening {duration}.",
            "A water pipe burst {location}, causing a constant flow of water onto the street. This has created a large puddle and is starting to damage the road surface.",
            "A {severity} leak from an underground pipeline is flooding the lane {location}. It is creating a mess and causing a shortage of water supply in the area."
        ],
        "Waterlogging / drainage overflow": [
            "The drainage system {location} is overflowing after just a little rain. The dirty water floods the streets, {impact} and posing a health risk.",
            "Severe waterlogging is being reported {location} {duration}. The drains are clearly choked and unable to handle the water volume, creating a breeding ground for mosquitoes.",
            "An open drain {location} is overflowing, spilling sewage onto the main road. The smell is unbearable and the situation is extremely unhygienic for everyone."
        ]
    },
    "Waste & Sanitation": {
        "Uncollected garbage / overflowing bins": [
            "The community garbage bin {location} is overflowing with waste. Garbage has been piling up on the street {duration}, attracting stray animals and creating a foul smell.",
            "Waste has not been collected from our area {location} {duration}. The pile of garbage is growing daily, which is a {severity} health hazard for the residents.",
            "An overflowing dumpster {location} is blocking the footpath. The waste is spilling onto the road, {impact} and creating an unsightly and unhygienic environment."
        ],
        "Dirty public toilets": [
            "The public toilet facility {location} is in a deplorable state. It is extremely dirty, lacks water supply, and is practically unusable by anyone.",
            "There is a lack of basic sanitation in the public toilets {location}. They have not been cleaned {duration} and pose a significant public health risk.",
            "The condition of the public restroom {location} is unacceptable. The doors are broken, there is no lighting, and it is incredibly unsanitary."
        ]
    },
    "Public Safety": {
        "Open manholes / missing covers": [
            "A manhole {location} has been left open without any warning signs. It is a {severity} trap for pedestrians and motorists, especially at night.",
            "The cover of a manhole is missing {location} {duration}. Residents have placed a tree branch in it as a temporary warning, but a permanent solution is needed urgently.",
            "An uncovered manhole {location} is a {severity} safety risk. It is located right in the middle of a busy sidewalk, posing a direct threat to people walking there."
        ],
        "Unsafe pedestrian crossings": [
            "The zebra crossing {location} has faded completely and is no longer visible to drivers. This makes it very unsafe for pedestrians trying to cross the busy road.",
            "There is no proper pedestrian crossing {location}, despite it being a busy intersection with a school nearby. This is {impact} and is an accident waiting to happen.",
            "The pedestrian signal at the crossing {location} is not working. People are forced to guess when to cross, which is extremely {severity}."
        ]
    },
    "Public Infrastructure": {
        "Damaged footpaths / benches": [
            "The footpath {location} is broken and uneven, with loose paving slabs everywhere. It is very difficult for elderly people to walk on and is a tripping hazard.",
            "Several public benches in the park {location} are broken and have been in a state of disrepair {duration}. They are unusable for visitors.",
            "A large section of the sidewalk {location} has been dug up and left unrepaired. It forces pedestrians to walk on the main road, which is very risky."
        ],
        "Damaged bus stops": [
            "The shelter at the bus stop {location} is badly damaged. The roof is broken, and it provides no protection from the sun or rain for waiting passengers.",
            "The seating at the bus stop {location} has been completely vandalized and is unusable. Commuters are forced to stand while waiting for the bus.",
            "A bus stop signpost {location} has fallen down. There is no clear indication of where the bus stop is, causing confusion for passengers and bus drivers."
        ]
    }
}

# --- 4. GENERATE AND WRITE DATA ---

print(f"Generating {TOTAL_DESCRIPTIONS} descriptions...")

with open(OUTPUT_FILE, 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    # Write the header
    writer.writerow(['Category', 'Issue', 'Description', 'Severity'])

    # Generate data
    for _ in range(TOTAL_DESCRIPTIONS):
        # Randomly select a category and one of its issues
        category = random.choice(list(ISSUE_TEMPLATES.keys()))
        issue = random.choice(list(ISSUE_TEMPLATES[category].keys()))

        # Randomly select a template for that issue
        template = random.choice(ISSUE_TEMPLATES[category][issue])

        # Choose a severity and store it to use in both the description and its own column
        selected_severity = random.choice(SEVERITIES)
        
        # Populate the template with random data
        description = template.format(
            severity=selected_severity,
            location=random.choice(LOCATIONS),
            duration=random.choice(DURATIONS),
            impact=random.choice(IMPACTS)
        )

        # Write the row to the CSV file
        writer.writerow([category, issue, description, selected_severity])

print(f"\n✅ Success! Your dataset has been saved to '{OUTPUT_FILE}'.")
print(f"Total rows generated: {TOTAL_DESCRIPTIONS}")



Generating 10000 descriptions...

✅ Success! Your dataset has been saved to 'civic_issues_dataset.csv'.
Total rows generated: 10000


In [7]:
import csv
import random

# --- 1. CONFIGURATION ---
TOTAL_DESCRIPTIONS = 30000
OUTPUT_FILE = 'civic_issues_dataset_new.csv'

# --- 2. DATA FOR VARIATION (ENRICHED, LOCATION REMOVED) ---

# Expanded lists for more variety
SEVERITIES = ["dangerous", "significant", "minor", "severe", "growing", "hazardous", "critical", "major", "slight"]
DURATIONS = [
    "for the past week", "since the heavy rainfall last Tuesday", "for over a month now", "for several days",
    "since yesterday morning", "for a fortnight", "it's been like this for ages", "for a couple of weeks",
    "since the start of the month"
]
IMPACTS = [
    "causing massive traffic jams", "posing a serious risk to pedestrians", "creating a major hygiene issue",
    "leading to frequent accidents, especially with two-wheelers", "severely disrupting daily commutes",
    "making the area completely unsafe after dark", "damaging the tires of vehicles", "a nightmare for local residents",
    "attracting stray dogs and other animals", "forcing people to walk on the main road"
]
TIMES_OF_DAY = ["during the morning rush hour", "especially at night", "in the evening when traffic is high", "throughout the day"]
REPORTER_TONES = ["Action needs to be taken immediately.", "We request the authorities to look into this matter.", "This is completely unacceptable.", "Please resolve this issue as soon as possible.", "Hope to see a quick resolution."]

# --- 3. DESCRIPTION TEMPLATES (REWRITTEN WITHOUT LOCATIONS) ---

# Templates are now generic and do not contain the {location} placeholder.
# The structure remains: (template_string, associated_severity_level)
ISSUE_TEMPLATES = {
    "Road & Traffic": {
        "Potholes / damaged roads": [
            ("There is a {severity} pothole that has been ignored {duration}. It is {impact} and requires immediate attention.", "{severity}"),
            ("The road surface is in a terrible state, with multiple large cracks. This has been a problem {duration}, making it hard to drive.", "significant"),
            ("A deep pothole has formed which fills with water, becoming an invisible trap for bikers. This has already led to a few minor accidents.", "severe"),
            ("The road in this area is completely broken. It's been {duration} and is {impact}. {reporter_tone}", "critical")
        ],
        "Broken traffic signals": [
            ("A traffic signal is dead {duration}. It's causing chaos at an intersection, {impact}.", "critical"),
            ("The green light at a signal isn't working. This is confusing drivers and is a {severity} issue that needs urgent repair.", "{severity}"),
            ("The traffic lights at a busy crossing have been blinking yellow {duration}. This is {impact}. Please fix this.", "significant")
        ],
        "Illegal parking": [
            ("Illegal parking by trucks has become a major nuisance {duration}. They are blocking the road, {impact}. {reporter_tone}", "major"),
            ("Cars are consistently parked in a 'No Parking' zone. It obstructs the view and is a {severity} safety hazard.", "{severity}"),
            ("Despite clear signs, people keep parking their bikes on the footpath. This narrows the road and is a real problem, especially during school hours.", "minor")
        ]
    },
    "Streetlights & Electricity": {
        "Broken or dim streetlights": [
            ("A streetlight has been out of order {duration}. The street is pitch black at night, {impact}.", "severe"),
            ("A streetlight pole is flickering non-stop. It's a {severity} problem making visibility poor and the area feel unsafe.", "{severity}"),
            ("Darkness prevails on a main road because most streetlights are dim. It's a major safety concern for residents.", "major")
        ],
        "Fallen poles / exposed wires": [
            ("A utility pole has fallen after the storm. Live wires are exposed, making it a death trap. {reporter_tone}", "critical"),
            ("Exposed electrical wires are dangling from a junction box. This is an extremely {severity} situation, especially with kids playing nearby.", "{severity}"),
            ("A concrete pole is dangerously tilted {duration}. It looks like it could fall any moment. This is a critical issue.", "critical")
        ]
    },
    "Water Supply & Drainage": {
        "Pipeline leaks / bursts": [
            ("There's a massive water pipeline leak. Gallons of clean water are being wasted daily {duration}.", "major"),
            ("A water pipe burst, flooding the street. It's starting to damage the road surface. This is a {severity} problem.", "{severity}"),
            ("Clean drinking water is leaking from a pipe. It is creating a mess and causing water shortages in nearby homes.", "significant")
        ],
        "Waterlogging / drainage overflow": [
            ("The drainage system overflows after the slightest rain. Dirty water floods the streets, creating a health hazard.", "severe"),
            ("Severe waterlogging is reported {duration}. The drains are choked, creating a breeding ground for mosquitoes. {reporter_tone}", "severe"),
            ("An open drain is overflowing with sewage onto the main road. The smell is unbearable. This is a critical hygiene issue.", "critical")
        ]
    },
    "Waste & Sanitation": {
        "Uncollected garbage / overflowing bins": [
            ("A community bin is overflowing. Garbage is piled up on the street {duration}, creating a foul smell and attracting dogs.", "severe"),
            ("Waste has not been collected {duration}. The growing pile is a {severity} health hazard. {reporter_tone}", "{severity}"),
            ("An overflowing dumpster is blocking the footpath. Waste is spilling onto the road, creating an unhygienic eyesore.", "major")
        ],
        "Dirty public toilets": [
            ("A public toilet is in a deplorable state. It's filthy, lacks water, and is unusable.", "severe"),
            ("There's no basic sanitation in the public toilets. They haven't been cleaned {duration} and pose a health risk.", "major"),
            ("The condition of a public restroom is unacceptable. Broken doors, no lights, and it's completely unsanitary.", "severe")
        ]
    },
    "Public Safety": {
        "Open manholes / missing covers": [
            ("An open manhole without any warning signs is a {severity} trap for pedestrians, {times_of_day}.", "{severity}"),
            ("A manhole cover is missing {duration}. Someone put a branch in it, but a permanent fix is needed urgently before someone gets hurt.", "critical"),
            ("An uncovered manhole is a huge safety risk, right in the middle of a busy sidewalk.", "critical")
        ],
        "Unsafe pedestrian crossings": [
            ("A zebra crossing has completely faded. It's very unsafe for pedestrians trying to cross the busy road.", "major"),
            ("There is no proper pedestrian crossing near a school. This leads to frequent near-misses and is an accident waiting to happen.", "severe"),
            ("The pedestrian signal is not working at a crossing. It's extremely {severity} as people have to guess when to cross.", "{severity}")
        ]
    },
    "Public Infrastructure": {
        "Damaged footpaths / benches": [
            ("The footpath is broken and uneven. It's a tripping hazard, especially for elderly people.", "significant"),
            ("Public benches in a local park are broken {duration}. They are unusable for visitors. This is a {severity} issue.", "{severity}"),
            ("A large section of the sidewalk was dug up and never repaired. It forces people onto the main road, which is risky.", "major")
        ],
        "Damaged bus stops": [
            ("A bus stop shelter is badly damaged. The broken roof offers no protection from sun or rain.", "significant"),
            ("The seating at a bus stop is completely vandalized. Commuters have to stand while waiting. {reporter_tone}", "minor"),
            ("A bus stop signpost has fallen. There's no clear indication where the stop is, causing confusion.", "minor")
        ]
    }
}

# --- 4. GENERATE AND WRITE DATA ---

print(f"Generating {TOTAL_DESCRIPTIONS} descriptions for an improved dataset...")

with open(OUTPUT_FILE, 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['Category', 'Issue', 'Description', 'Severity'])

    for _ in range(TOTAL_DESCRIPTIONS):
        category = random.choice(list(ISSUE_TEMPLATES.keys()))
        issue = random.choice(list(ISSUE_TEMPLATES[category].keys()))
        
        template_tuple = random.choice(ISSUE_TEMPLATES[category][issue])
        template = template_tuple[0]
        severity_template = template_tuple[1]

        # Populate the template with random data
        # Use a dictionary to handle missing keys gracefully
        format_args = {
            'severity': random.choice(SEVERITIES),
            'duration': random.choice(DURATIONS),
            'impact': random.choice(IMPACTS),
            'times_of_day': random.choice(TIMES_OF_DAY),
            'reporter_tone': random.choice(REPORTER_TONES)
        }

        # Generate description, ignoring unused placeholders
        description = template.format(**{k: v for k, v in format_args.items() if '{' + k + '}' in template})
        
        # Determine the final severity for the CSV
        if '{' in severity_template:
            # If the severity is a placeholder like '{severity}', use the value we randomly selected
            final_severity = format_args['severity']
        else:
            # Otherwise, use the hardcoded severity from the template tuple
            final_severity = severity_template
            
        writer.writerow([category, issue, description, final_severity])

print(f"\n✅ Success! Your new, improved dataset has been saved to '{OUTPUT_FILE}'.")
print(f"Total rows generated: {TOTAL_DESCRIPTIONS}")

Generating 30000 descriptions for an improved dataset...

✅ Success! Your new, improved dataset has been saved to 'civic_issues_dataset_new.csv'.
Total rows generated: 30000


In [9]:
import csv
import random

# --- 1. CONFIGURATION ---
TOTAL_DESCRIPTIONS = 30000
OUTPUT_FILE = 'civic_issues_dataset_new.csv'

# --- 2. DATA FOR VARIATION (ENRICHED, LOCATION REMOVED) ---

# Expanded lists for more variety
SEVERITIES = ["dangerous", "significant", "minor", "severe", "growing", "hazardous", "critical", "major", "slight"]
DURATIONS = [
    "for the past week", "since the heavy rainfall last Tuesday", "for over a month now", "for several days",
    "since yesterday morning", "for a fortnight", "it's been like this for ages", "for a couple of weeks",
    "since the start of the month"
]
IMPACTS = [
    "causing massive traffic jams", "posing a serious risk to pedestrians", "creating a major hygiene issue",
    "leading to frequent accidents, especially with two-wheelers", "severely disrupting daily commutes",
    "making the area completely unsafe after dark", "damaging the tires of vehicles", "a nightmare for local residents",
    "attracting stray dogs and other animals", "forcing people to walk on the main road"
]
TIMES_OF_DAY = ["during the morning rush hour", "especially at night", "in the evening when traffic is high", "throughout the day"]
REPORTER_TONES = ["Action needs to be taken immediately.", "We request the authorities to look into this matter.", "This is completely unacceptable.", "Please resolve this issue as soon as possible.", "Hope to see a quick resolution."]

# --- 3. DESCRIPTION TEMPLATES (REWRITTEN WITHOUT LOCATIONS) ---

# Templates are now generic and do not contain the {location} placeholder.
# The structure remains: (template_string, associated_severity_level)
ISSUE_TEMPLATES = {
    "Road & Traffic": {
        "Potholes / damaged roads": [
            ("There is a {severity} pothole that has been ignored {duration}. It is {impact} and requires immediate attention.", "{severity}"),
            ("The road surface is in a terrible state, with multiple large cracks. This has been a problem {duration}, making it hard to drive.", "significant"),
            ("A deep pothole has formed which fills with water, becoming an invisible trap for bikers. This has already led to a few minor accidents.", "severe"),
            ("The road in this area is completely broken. It's been {duration} and is {impact}. {reporter_tone}", "critical")
        ],
        "Broken traffic signals": [
            ("A traffic signal is dead {duration}. It's causing chaos at an intersection, {impact}.", "critical"),
            ("The green light at a signal isn't working. This is confusing drivers and is a {severity} issue that needs urgent repair.", "{severity}"),
            ("The traffic lights at a busy crossing have been blinking yellow {duration}. This is {impact}. Please fix this.", "significant")
        ],
        "Illegal parking": [
            ("Illegal parking by trucks has become a major nuisance {duration}. They are blocking the road, {impact}. {reporter_tone}", "major"),
            ("Cars are consistently parked in a 'No Parking' zone. It obstructs the view and is a {severity} safety hazard.", "{severity}"),
            ("Despite clear signs, people keep parking their bikes on the footpath. This narrows the road and is a real problem, especially during school hours.", "minor")
        ]
    },
    "Streetlights & Electricity": {
        "Broken or dim streetlights": [
            ("A streetlight has been out of order {duration}. The street is pitch black at night, {impact}.", "severe"),
            ("A streetlight pole is flickering non-stop. It's a {severity} problem making visibility poor and the area feel unsafe.", "{severity}"),
            ("Darkness prevails on a main road because most streetlights are dim. It's a major safety concern for residents.", "major")
        ],
        "Fallen poles / exposed wires": [
            ("A utility pole has fallen after the storm. Live wires are exposed, making it a death trap. {reporter_tone}", "critical"),
            ("Exposed electrical wires are dangling from a junction box. This is an extremely {severity} situation, especially with kids playing nearby.", "{severity}"),
            ("A concrete pole is dangerously tilted {duration}. It looks like it could fall any moment. This is a critical issue.", "critical")
        ]
    },
    "Water Supply & Drainage": {
        "Pipeline leaks / bursts": [
            ("There's a massive water pipeline leak. Gallons of clean water are being wasted daily {duration}.", "major"),
            ("A water pipe burst, flooding the street. It's starting to damage the road surface. This is a {severity} problem.", "{severity}"),
            ("Clean drinking water is leaking from a pipe. It is creating a mess and causing water shortages in nearby homes.", "significant")
        ],
        "Waterlogging / drainage overflow": [
            ("The drainage system overflows after the slightest rain. Dirty water floods the streets, creating a health hazard.", "severe"),
            ("Severe waterlogging is reported {duration}. The drains are choked, creating a breeding ground for mosquitoes. {reporter_tone}", "severe"),
            ("An open drain is overflowing with sewage onto the main road. The smell is unbearable. This is a critical hygiene issue.", "critical")
        ]
    },
    "Waste & Sanitation": {
        "Uncollected garbage / overflowing bins": [
            ("A community bin is overflowing. Garbage is piled up on the street {duration}, creating a foul smell and attracting dogs.", "severe"),
            ("Waste has not been collected {duration}. The growing pile is a {severity} health hazard. {reporter_tone}", "{severity}"),
            ("An overflowing dumpster is blocking the footpath. Waste is spilling onto the road, creating an unhygienic eyesore.", "major")
        ],
        "Dirty public toilets": [
            ("A public toilet is in a deplorable state. It's filthy, lacks water, and is unusable.", "severe"),
            ("There's no basic sanitation in the public toilets. They haven't been cleaned {duration} and pose a health risk.", "major"),
            ("The condition of a public restroom is unacceptable. Broken doors, no lights, and it's completely unsanitary.", "severe")
        ]
    },
    "Public Safety": {
        "Open manholes / missing covers": [
            ("An open manhole without any warning signs is a {severity} trap for pedestrians, {times_of_day}.", "{severity}"),
            ("A manhole cover is missing {duration}. Someone put a branch in it, but a permanent fix is needed urgently before someone gets hurt.", "critical"),
            ("An uncovered manhole is a huge safety risk, right in the middle of a busy sidewalk.", "critical")
        ],
        "Unsafe pedestrian crossings": [
            ("A zebra crossing has completely faded. It's very unsafe for pedestrians trying to cross the busy road.", "major"),
            ("There is no proper pedestrian crossing near a school. This leads to frequent near-misses and is an accident waiting to happen.", "severe"),
            ("The pedestrian signal is not working at a crossing. It's extremely {severity} as people have to guess when to cross.", "{severity}")
        ]
    },
    "Public Infrastructure": {
        "Damaged footpaths / benches": [
            ("The footpath is broken and uneven. It's a tripping hazard, especially for elderly people.", "significant"),
            ("Public benches in a local park are broken {duration}. They are unusable for visitors. This is a {severity} issue.", "{severity}"),
            ("A large section of the sidewalk was dug up and never repaired. It forces people onto the main road, which is risky.", "major")
        ],
        "Damaged bus stops": [
            ("A bus stop shelter is badly damaged. The broken roof offers no protection from sun or rain.", "significant"),
            ("The seating at a bus stop is completely vandalized. Commuters have to stand while waiting. {reporter_tone}", "minor"),
            ("A bus stop signpost has fallen. There's no clear indication where the stop is, causing confusion.", "minor")
        ]
    }
}

# --- 4. GENERATE AND WRITE DATA ---

print(f"Generating {TOTAL_DESCRIPTIONS} descriptions for an improved dataset...")

with open(OUTPUT_FILE, 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['Category', 'Issue', 'Description', 'Severity'])

    for _ in range(TOTAL_DESCRIPTIONS):
        category = random.choice(list(ISSUE_TEMPLATES.keys()))
        issue = random.choice(list(ISSUE_TEMPLATES[category].keys()))
        
        template_tuple = random.choice(ISSUE_TEMPLATES[category][issue])
        template = template_tuple[0]
        severity_template = template_tuple[1]

        # Populate the template with random data
        # Use a dictionary to handle missing keys gracefully
        format_args = {
            'severity': random.choice(SEVERITIES),
            'duration': random.choice(DURATIONS),
            'impact': random.choice(IMPACTS),
            'times_of_day': random.choice(TIMES_OF_DAY),
            'reporter_tone': random.choice(REPORTER_TONES)
        }

        # Generate description, ignoring unused placeholders
        description = template.format(**{k: v for k, v in format_args.items() if '{' + k + '}' in template})
        
        # Determine the final severity for the CSV
        if '{' in severity_template:
            # If the severity is a placeholder like '{severity}', use the value we randomly selected
            final_severity = format_args['severity']
        else:
            # Otherwise, use the hardcoded severity from the template tuple
            final_severity = severity_template
            
        writer.writerow([category, issue, description, final_severity])

print(f"\n✅ Success! Your new, improved dataset has been saved to '{OUTPUT_FILE}'.")
print(f"Total rows generated: {TOTAL_DESCRIPTIONS}")

Generating 30000 descriptions for an improved dataset...

✅ Success! Your new, improved dataset has been saved to 'civic_issues_dataset_new.csv'.
Total rows generated: 30000


In [10]:
import csv
import random

# --- 1. CONFIGURATION ---
TOTAL_DESCRIPTIONS = 30000
OUTPUT_FILE = 'civic_issues_dataset_again.csv'

# --- 2. DATA FOR VARIATION (GREATLY EXPANDED FOR DETAIL) ---

# Core attributes
SEVERITIES = ["dangerous", "significant", "minor", "severe", "growing", "hazardous", "critical", "major", "slight"]
DURATIONS = [
    "for the past week", "since the heavy rainfall last Tuesday", "for over a month now", "for several days",
    "since yesterday morning", "for a fortnight", "it's been like this for ages", "for a couple of weeks",
    "since the start of the month"
]
IMPACTS = [
    "causing massive traffic jams", "posing a serious risk to pedestrians", "creating a major hygiene issue",
    "leading to frequent accidents, especially with two-wheelers", "severely disrupting daily commutes",
    "making the area completely unsafe after dark", "damaging the tires of vehicles", "a nightmare for local residents",
    "attracting stray dogs and other animals", "forcing people to walk on the main road"
]
TIMES_OF_DAY = ["during the morning rush hour", "especially at night", "in the evening when traffic is high", "throughout the day"]
REPORTER_TONES = ["Action needs to be taken immediately.", "We request the authorities to look into this matter.", "This is completely unacceptable.", "Please resolve this issue as soon as possible.", "Hope to see a quick resolution."]

# --- NEW: DETAILED ENTITY LISTS ---
VEHICLE_TYPES = ["trucks", "private cars", "auto-rickshaws", "delivery vans", "motorcycles", "buses"]
WASTE_TYPES = ["household garbage", "plastic bottles and bags", "construction debris", "rotting vegetable matter", "medical waste"]
PROXIMITY_DETAILS = ["right in front of a school", "near a busy marketplace", "blocking a hospital entrance", "on a residential street", "next to a bus stop"]
WEATHER_CONDITIONS = ["after the recent monsoon rains", "during the current heatwave", "since the storm last night", "on foggy mornings"]
INFRA_OBJECTS = ["concrete bench", "steel railing", "bus shelter's glass panel", "paving slabs on the footpath", "community water tap"]

# --- 3. DESCRIPTION TEMPLATES (UPGRADED FOR DETAIL AND REALISM) ---

# The structure remains: (template_string, associated_severity_level)
ISSUE_TEMPLATES = {
    "Road & Traffic": {
        "Potholes / damaged roads": [
            ("The asphalt is crumbling, creating a {severity} pothole that is a hazard {PROXIMITY_DETAILS}. It's been ignored {duration} and is {impact}.", "{severity}"),
            ("The road surface is in a terrible state with multiple deep cracks and craters {WEATHER_CONDITIONS}. It is {impact} for vehicles.", "significant"),
            ("A deep crater has formed which becomes an invisible water-filled trap for bikers. This has led to a few minor accidents already.", "severe"),
            ("The road is completely broken. It's been {duration} and is a nightmare for daily commuters. {reporter_tone}", "critical")
        ],
        "Broken traffic signals": [
            ("A traffic signal is completely dead {duration} {PROXIMITY_DETAILS}. It's causing chaos at the intersection, {impact}.", "critical"),
            ("The green light at a signal post isn't functioning. This is confusing drivers and is a {severity} issue that needs urgent repair {TIMES_OF_DAY}.", "{severity}"),
            ("The traffic lights at a busy crossing have been blinking yellow constantly {duration}. This is {impact}. Please fix the signal controller.", "significant")
        ],
        "Illegal parking": [
            ("Illegal parking by {VEHICLE_TYPES} has become a major nuisance {duration}. They are parked all along the road, {impact}. {reporter_tone}", "major"),
            ("Cars are consistently parked in a 'No Parking' zone, creating a bottleneck {PROXIMITY_DETAILS}. It obstructs the view and is a {severity} safety hazard.", "{severity}"),
            ("Despite clear signs, people keep parking their bikes on the footpath. This forces pedestrians onto the road, a real problem {TIMES_OF_DAY}.", "minor")
        ]
    },
    "Streetlights & Electricity": {
        "Broken or dim streetlights": [
            ("A streetlight has been fused {duration}. The street is pitch black at night, {impact}, especially {WEATHER_CONDITIONS}.", "severe"),
            ("A streetlight pole is flickering non-stop. It's a {severity} problem with the bulb or wiring, making visibility poor and the area feel unsafe.", "{severity}"),
            ("Darkness prevails on a main road because most streetlight lamps are dim. It's a major safety concern for residents' security.", "major")
        ],
        "Fallen poles / exposed wires": [
            ("A utility pole has fallen {WEATHER_CONDITIONS}. Live, exposed wires are sparking, making it a death trap. {reporter_tone}", "critical"),
            ("Exposed electrical wiring is dangling from a junction box {PROXIMITY_DETAILS}. This is an extremely {severity} situation, especially with kids playing nearby.", "{severity}"),
            ("A concrete electric pole is dangerously tilted {duration}. It looks like it could fall any moment and snap the power lines. This is a critical issue.", "critical")
        ]
    },
    "Water Supply & Drainage": {
        "Pipeline leaks / bursts": [
            ("There's a massive water pipeline leak. Gallons of clean drinking water are being wasted daily from the main supply line {duration}.", "major"),
            ("A water pipe has burst, flooding the street with clean water. It's starting to damage the road surface. This is a {severity} problem.", "{severity}"),
            ("Clean drinking water is leaking from a broken pipe. It is creating a mess and causing water pressure shortages in nearby homes.", "significant")
        ],
        "Waterlogging / drainage overflow": [
            ("The drainage system and local sump overflows {WEATHER_CONDITIONS}. Dirty sewage water floods the streets, creating a health hazard.", "severe"),
            ("Severe waterlogging is reported {duration}. The drains are clearly choked with silt, creating a breeding ground for mosquitoes. {reporter_tone}", "severe"),
            ("An open drain is overflowing with sewage onto the main road {PROXIMITY_DETAILS}. The smell is unbearable. This is a critical hygiene issue.", "critical")
        ]
    },
    "Waste & Sanitation": {
        "Uncollected garbage / overflowing bins": [
            ("A community bin is overflowing with {WASTE_TYPES}. Garbage is piled up on the street {duration}, creating a foul smell and attracting dogs.", "severe"),
            ("Waste including {WASTE_TYPES} has not been collected {duration}. The growing pile of trash is a {severity} health hazard. {reporter_tone}", "{severity}"),
            ("An overflowing dumpster is blocking the footpath. Medical and household waste is spilling onto the road, creating an unhygienic eyesore.", "major")
        ],
        "Dirty public toilets": [
            ("A public toilet is in a deplorable state. It's filthy, lacks a water connection, and is unusable {PROXIMITY_DETAILS}.", "severe"),
            ("There's no basic sanitation in the public toilets. They haven't been cleaned {duration} and pose a significant health risk to users.", "major"),
            ("The condition of a public restroom is unacceptable. Broken doors, no lights, and the floor is completely unsanitary.", "severe")
        ]
    },
    "Public Safety": {
        "Open manholes / missing covers": [
            ("An open manhole without any warning signs is a {severity} trap for pedestrians, {times_of_day}, especially {PROXIMITY_DETAILS}.", "{severity}"),
            ("A manhole cover is missing {duration}. Someone put a branch in it, but a permanent iron lid is needed urgently before someone gets hurt.", "critical"),
            ("An uncovered manhole on a busy sidewalk is a huge safety risk, especially for children and the elderly.", "critical")
        ],
        "Unsafe pedestrian crossings": [
            ("A zebra crossing's paint has completely faded {PROXIMITY_DETAILS}. It's very unsafe for pedestrians trying to cross the busy road against oncoming traffic.", "major"),
            ("There is no proper pedestrian crossing near a school. This leads to frequent near-misses and is an accident waiting to happen.", "severe"),
            ("The pedestrian walk signal is not working at a crossing. It's extremely {severity} as people have to guess when to cross {TIMES_OF_DAY}.", "{severity}")
        ]
    },
    "Public Infrastructure": {
        "Damaged footpaths / benches": [
            ("The {INFRA_OBJECTS} is broken and uneven. It's a tripping hazard, especially for elderly people {PROXIMITY_DETAILS}.", "significant"),
            ("Public benches in a local park are broken {duration}. They are unusable for visitors. This is a {severity} issue.", "{severity}"),
            ("A large section of the sidewalk was dug up for cable work and never repaired. It forces people onto the main road, which is risky.", "major")
        ],
        "Damaged bus stops": [
            ("The {INFRA_OBJECTS} at a bus stop is badly damaged. The broken roof offers no protection from sun or rain for passengers.", "significant"),
            ("The seating at a bus stop is completely vandalized. Commuters have to stand while waiting for public transport. {reporter_tone}", "minor"),
            ("A bus stop signpost has fallen. There's no clear indication where the stop is, causing route confusion for commuters.", "minor")
        ]
    }
}

# --- 4. GENERATE AND WRITE DATA ---

print(f"Generating {TOTAL_DESCRIPTIONS} descriptions for a new, highly detailed dataset...")

with open(OUTPUT_FILE, 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['Category', 'Issue', 'Description', 'Severity'])

    for _ in range(TOTAL_DESCRIPTIONS):
        category = random.choice(list(ISSUE_TEMPLATES.keys()))
        issue = random.choice(list(ISSUE_TEMPLATES[category].keys()))
        
        template_tuple = random.choice(ISSUE_TEMPLATES[category][issue])
        template = template_tuple[0]
        severity_template = template_tuple[1]

        # Populate the template with a richer set of random data
        format_args = {
            'severity': random.choice(SEVERITIES),
            'duration': random.choice(DURATIONS),
            'impact': random.choice(IMPACTS),
            'times_of_day': random.choice(TIMES_OF_DAY),
            'reporter_tone': random.choice(REPORTER_TONES),
            'VEHICLE_TYPES': random.choice(VEHICLE_TYPES),
            'WASTE_TYPES': random.choice(WASTE_TYPES),
            'PROXIMITY_DETAILS': random.choice(PROXIMITY_DETAILS),
            'WEATHER_CONDITIONS': random.choice(WEATHER_CONDITIONS),
            'INFRA_OBJECTS': random.choice(INFRA_OBJECTS),
        }

        # Generate description, ignoring unused placeholders
        description = template.format(**{k: v for k, v in format_args.items() if '{' + k.upper() + '}' in template})
        
        # Determine the final severity for the CSV
        if '{' in severity_template:
            final_severity = format_args['severity']
        else:
            final_severity = severity_template
            
        writer.writerow([category, issue, description, final_severity])

print(f"\n✅ Success! Your new, highly detailed dataset has been saved to '{OUTPUT_FILE}'.")
print(f"Total rows generated: {TOTAL_DESCRIPTIONS}")



Generating 30000 descriptions for a new, highly detailed dataset...


KeyError: 'reporter_tone'

In [1]:
import random
import csv

# Categories and sub-issues
categories = {
    "Road-related": [
        "pothole on the road", 
        "damaged speed breaker", 
        "cracked road surface", 
        "waterlogged street"
    ],
    "Streetlight-related": [
        "damaged streetlight", 
        "flickering bulb", 
        "streetlight not working", 
        "leaning streetlight pole"
    ],
    "Garbage-related": [
        "open garbage dump", 
        "overflowing dustbin", 
        "uncollected waste", 
        "garbage burning"
    ],
    "Water-related": [
        "leaking pipeline", 
        "open manhole", 
        "blocked drain", 
        "sewage overflow"
    ]
}

# Random sentence templates
templates = [
    "There is a {issue} near {location}, causing {effect}.",
    "Residents have reported a {issue} in the {location}, which is leading to {effect}.",
    "A serious problem of {issue} has been noticed around {location}, creating {effect}.",
    "People are facing issues due to {issue} at {location}, resulting in {effect}.",
    "The {issue} located near {location} has caused {effect} in the area."
]

# Possible locations and effects
locations = ["main road", "market area", "school entrance", "residential colony", 
             "bus stop", "hospital road", "railway crossing", "park vicinity"]

effects = ["traffic jams", "safety concerns", "waterlogging", "bad smell", 
           "accidents", "health hazards", "mosquito breeding", "public inconvenience"]

# Generate dataset
rows = []
num_samples = 30000

for _ in range(num_samples):
    category = random.choice(list(categories.keys()))
    issue = random.choice(categories[category])
    template = random.choice(templates)
    location = random.choice(locations)
    effect = random.choice(effects)
    
    description = template.format(issue=issue, location=location, effect=effect)
    rows.append([category, issue, description])

# Save to CSV
with open("civic_issues_dataset.csv", "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(["Category", "Issue", "Description"])
    writer.writerows(rows)

print("✅ Dataset with 30,000 samples generated: civic_issues_dataset.csv")


✅ Dataset with 30,000 samples generated: civic_issues_dataset.csv


In [2]:
import random
import csv

# Categories, issues
categories = {
    "Road-related": [
        "pothole on the road",
        "damaged speed breaker",
        "cracked road surface",
        "waterlogged street"
    ],
    "Streetlight-related": [
        "damaged streetlight",
        "flickering bulb",
        "streetlight not working",
        "leaning streetlight pole"
    ],
    "Garbage-related": [
        "open garbage dump",
        "overflowing dustbin",
        "uncollected waste",
        "garbage burning"
    ],
    "Water-related": [
        "leaking pipeline",
        "open manhole",
        "blocked drain",
        "sewage overflow"
    ]
}

# Severity levels (as text labels)
SEVERITIES = [
    "dangerous", "significant", "minor", "severe", "growing",
    "hazardous", "critical", "major", "slight"
]

# Templates
templates = [
    "There is a {issue} near the {location}, leading to {effect}.",
    "Local residents have complained about a {issue} in the {location}, causing {effect}.",
    "A serious case of {issue} has been observed around the {location}, resulting in {effect}.",
    "The {issue} near the {location} is creating problems like {effect}.",
    "Authorities need to address the {issue} found near the {location}, as it causes {effect}.",
    "People are facing difficulties due to {issue} in the {location}, leading to {effect}.",
    "The issue of {issue} in the {location} has raised concerns regarding {effect}.",
    "Immediate attention is required for {issue} near the {location} because of {effect}."
]

# Locations and effects
locations = [
    "main road", "market area", "school entrance", "residential colony",
    "bus stop", "hospital road", "railway crossing", "park vicinity",
    "office complex", "shopping mall", "highway intersection", "slum area"
]

effects = [
    "traffic jams", "safety hazards", "water stagnation", "bad odor",
    "increased risk of accidents", "serious health issues", "mosquito breeding",
    "public inconvenience", "spread of diseases", "disruption in daily commute"
]

# Total dataset size
num_samples = 30000
num_per_category = num_samples // len(categories)

rows = []

for category, issues in categories.items():
    for _ in range(num_per_category):
        issue = random.choice(issues)
        template = random.choice(templates)
        location = random.choice(locations)
        effect = random.choice(effects)
        severity = random.choice(SEVERITIES)

        description = template.format(issue=issue, location=location, effect=effect)
        rows.append([category, issue, description, severity])

# Save to CSV
with open("civic_issues_dataset_gpt.csv", "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(["Category", "Issue", "Description", "Severity"])
    writer.writerows(rows)

print("✅ Dataset with 30,000 diverse civic issue samples (with text severity) generated: civic_issues_dataset.csv")


✅ Dataset with 30,000 diverse civic issue samples (with text severity) generated: civic_issues_dataset.csv


In [4]:
import random
import csv

# Updated Categories and Issues
categories = {
    "Road & Traffic": [
        "potholes on the road",
        "damaged roads",
        "broken traffic signals",
        "illegal parking"
    ],
    "Streetlights & Electricity": [
        "broken streetlights",
        "dim streetlights",
        "fallen poles",
        "exposed wires"
    ],
    "Water Supply & Drainage": [
        "pipeline leaks",
        "pipeline bursts",
        "waterlogging",
        "drainage overflow"
    ],
    "Waste & Sanitation": [
        "uncollected garbage",
        "overflowing bins",
        "dirty public toilets"
    ],
    "Public Safety": [
        "open manholes",
        "missing manhole covers",
        "unsafe pedestrian crossings"
    ],
    "Public Infrastructure": [
        "damaged footpaths",
        "damaged benches",
        "damaged bus stops"
    ]
}

# Textual Severity levels
SEVERITIES = [
    "dangerous", "significant", "minor", "severe", "growing",
    "hazardous", "critical", "major", "slight"
]

# Templates for diverse description generation
templates = [
    "There is a {issue} near the {location}, leading to {effect}.",
    "Local residents have complained about {issue} in the {location}, causing {effect}.",
    "A case of {issue} has been observed around the {location}, resulting in {effect}.",
    "The {issue} near the {location} is creating problems such as {effect}.",
    "Authorities need to address {issue} at the {location}, as it causes {effect}.",
    "People are facing difficulties due to {issue} in the {location}, leading to {effect}.",
    "The issue of {issue} in the {location} has raised concerns regarding {effect}.",
    "Immediate attention is required for {issue} near the {location} because of {effect}."
]

# Location pool
locations = [
    "main road", "market area", "school entrance", "residential colony",
    "bus stop", "hospital road", "railway crossing", "park vicinity",
    "office complex", "shopping mall", "highway intersection", "slum area"
]

# Effects pool
effects = [
    "traffic jams", "safety hazards", "water stagnation", "bad odor",
    "accidents", "health risks", "mosquito breeding",
    "public inconvenience", "spread of diseases", "disruption in daily commute"
]

# Dataset size
num_samples = 30000
num_per_category = num_samples // len(categories)

rows = []

# Generate dataset
for category, issues in categories.items():
    for _ in range(num_per_category):
        issue = random.choice(issues)
        template = random.choice(templates)
        location = random.choice(locations)
        effect = random.choice(effects)
        severity = random.choice(SEVERITIES)

        description = template.format(issue=issue, location=location, effect=effect)
        rows.append([category, issue, description, severity])

# Save to CSV
with open("civic_issues_dataset_gpt2.csv", "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(["Category", "Issue", "Description", "Severity"])
    writer.writerows(rows)

print("✅ Civic Issues Dataset (30,000 rows) generated: civic_issues_dataset.csv")


✅ Civic Issues Dataset (30,000 rows) generated: civic_issues_dataset.csv
