In [6]:
import pandas as pd
from google.colab import files
uploaded = files.upload()
file_path = 'sarcasm_detected.csv'  # Replace with your file path
data = pd.read_csv(file_path)
data

Saving sarcasm_detected.csv to sarcasm_detected.csv


Unnamed: 0,tweet_id,text,sarcasm_score,sarcastic
0,acd7673f-e621-5f1a-d662-df278964a6ea,the thameslink core between london st pancras ...,0.340264,0
1,5b92aba8-4b05-6c63-8485-e9c870742137,loving the complaint about people having to wa...,0.807071,1
2,0a799c07-8b76-17ba-b840-e538d51e832d,.and yet you have no toilets on some of your t...,0.541555,1
3,8b4d2a34-c4f0-0e19-4055-dfe4af5f0e14,"you have no toilets on some of your trains, li...",0.320328,0
4,1fd08862-d8c7-0682-6b11-2603fba22d94,all the daft cared about was money and backsid...,0.616181,1
...,...,...,...,...
15744,6f429a04-5817-f054-24d0-a9f20fddeb25,at st. alban's city has the pis screen firmwar...,0.357916,0
15745,b47948df-1732-7021-ee77-238f839a2ea0,haha oh man the audio corruption on is quite e...,0.935341,1
15746,bcb3a01a-e58c-e082-3111-10fa77d1a9f7,sweetis there a plug to charge my phone?,0.030832,0
15747,f440c7e6-9e2d-31c4-3626-b9d8288cbbe3,now there are far fewer commuters having to st...,0.206224,0


In [12]:
# Define possible KPIs and their keyword mappings
kpi_columns = [
    "Punctuality & Reliability",
    "Customer Experience",
    "Accessibility",
    "Facilities & Amenities",
    "Communication",
    "Complaint Handling",
    "Value for Money",
]

kpi_mapping = {
    "Punctuality & Reliability": ["delay", "time", "late", "cancel"],
    "Customer Experience": ["service", "staff", "experience", "friendly", "helpful"],
    "Accessibility": ["barrier", "access", "wheelchair", "disabled"],
    "Facilities & Amenities": ["rubbish", "wifi", "toilet", "clean", "signage"],
    "Communication": ["process", "information", "announcement", "signs", "updates"],
    "Complaint Handling": ["repay", "process", "issue", "complaint", "resolve"],
    "Value for Money": ["expensive", "cost", "charge", "value", "price"],
}

# Function to map text to KPIs
def map_kpis(text):
    kpi_flags = {kpi: 0 for kpi in kpi_columns}
    for kpi, keywords in kpi_mapping.items():
        if any(keyword in text.lower() for keyword in keywords):
            kpi_flags[kpi] = 1
    return kpi_flags

# Define actions tailored to KPIs
def get_action(kpi, sarcastic):
    action_map = {
        "Punctuality & Reliability": "Revise and optimize train schedules to improve punctuality.",
        "Customer Experience": "Conduct customer service training for staff.",
        "Accessibility": "Ensure platforms, trains, and stations meet accessibility standards.",
        "Facilities & Amenities": "Upgrade facilities such as toilets, seating, and WiFi access.",
        "Communication": "Improve real-time communication via apps, announcements, and digital boards.",
        "Complaint Handling": "Streamline complaint resolution processes and follow up with customers.",
        "Value for Money": "Evaluate ticket pricing and introduce more competitive pricing options.",
    }
    action = action_map.get(kpi, "No specific action defined.")
    if sarcastic == 1:
        action += " (Detected as sarcastic: ensure issue validation and follow-up.)"
    return action

In [13]:
# Prepare the output dataset
output_rows = []

# Process each tweet
for _, row in data.iterrows():
    sarcastic = row["sarcastic"]
    text = row["text"]
    tweet_id = row["tweet_id"]

    # Map KPIs
    kpi_flags = map_kpis(text)

    # Create rows for each KPI flagged
    for kpi, flag in kpi_flags.items():
        if flag == 1:
            # Add a row for this KPI
            output_rows.append({
                "tweet_id": tweet_id,
                "text": text,
                "sarcastic": sarcastic,
                kpi: 1,
                **{col: 0 for col in kpi_columns if col != kpi},  # Other KPIs set to 0
                "actions": get_action(kpi, sarcastic),
            })

# Create the final output DataFrame
output_data = pd.DataFrame(output_rows)

# Display sample output
output_data

Unnamed: 0,tweet_id,text,sarcastic,Facilities & Amenities,Punctuality & Reliability,Customer Experience,Accessibility,Communication,Complaint Handling,Value for Money,actions
0,acd7673f-e621-5f1a-d662-df278964a6ea,the thameslink core between london st pancras ...,0,1,0,0,0,0,0,0,"Upgrade facilities such as toilets, seating, a..."
1,5b92aba8-4b05-6c63-8485-e9c870742137,loving the complaint about people having to wa...,1,0,0,0,0,0,1,0,Streamline complaint resolution processes and ...
2,0a799c07-8b76-17ba-b840-e538d51e832d,.and yet you have no toilets on some of your t...,1,1,0,0,0,0,0,0,"Upgrade facilities such as toilets, seating, a..."
3,8b4d2a34-c4f0-0e19-4055-dfe4af5f0e14,"you have no toilets on some of your trains, li...",0,1,0,0,0,0,0,0,"Upgrade facilities such as toilets, seating, a..."
4,1fd08862-d8c7-0682-6b11-2603fba22d94,all the daft cared about was money and backsid...,1,0,0,0,1,0,0,0,"Ensure platforms, trains, and stations meet ac..."
...,...,...,...,...,...,...,...,...,...,...,...
17496,35ac9bc5-4578-17ba-72be-1d156996d44c,"thameslink is taking the piss.quite literally,...",1,1,0,0,0,0,0,0,"Upgrade facilities such as toilets, seating, a..."
17497,34424fb6-6126-f5d9-79e9-d3a65ba5566e,"yeah but it's glitching for a lot of the ""this...",1,0,0,0,0,1,0,0,"Improve real-time communication via apps, anno..."
17498,6f429a04-5817-f054-24d0-a9f20fddeb25,at st. alban's city has the pis screen firmwar...,0,0,0,1,0,0,0,0,Conduct customer service training for staff.
17499,bcb3a01a-e58c-e082-3111-10fa77d1a9f7,sweetis there a plug to charge my phone?,0,0,0,0,0,0,0,1,Evaluate ticket pricing and introduce more com...


In [16]:
# Save the output dataset
output_data.to_csv('output_data.csv', index=False)

# Download the CSV file
files.download('output_data.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>