In [1]:
import pandas as pd
import numpy as np
import re

# Create the raw data
data = {
    "Customer_ID": [1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010,
                    1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020],
    "Feedback": [
        "Great service!", 
        np.nan, 
        "bad SERVICE!!!", 
        "Great service!", 
        "Too late delivery :(", 
        "good service", 
        "VERY RUDE BEHAVIOR.", 
        "not bad. could be better", 
        "Best support ever!!!", 
        "delivery was late and cold food", 
        "very polite and helpful staff", 
        "null", 
        "food arrived 2 hours late", 
        "amazing experience!", 
        "no response from support", 
        "amazing experience!", 
        "I loved the food 😋", 
        "worst delivery ever!!!", 
        "Waited too long for my order", 
        "they were polite. good service"
    ],
    "Date": [
        "2023/12/01", "2023-12-02", "01-12-2023", "2023/12/01", "NULL", "2023.12.01",
        "12/01/2023", "2023-12-03", "12-03-2023", "2023-12-03", "2023/12/04", 
        "2023-12-04", "04-12-2023", "2023/12/05", np.nan, "2023-12-05", 
        "05-12-2023", "2023/12/06", "2023.12.06", "06-12-2023"
    ],
    "Rating": [5, 4, 1, 5, 2, 4, 1, 3, 5, 2, 5, 3, 1, 5, 2, 5, 5, 1, 2, 4]
}

#  Load into DataFrame
df = pd.DataFrame(data)

#  Replace 'NULL' and 'null' with np.nan
df.replace(["NULL", "null"], np.nan, inplace=True)

#  Drop rows with missing Feedback
df.dropna(subset=["Feedback"], inplace=True)

#  Drop duplicate feedback
df.drop_duplicates(subset=["Feedback"], inplace=True)

#  Standardize date format
df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
df["Date"].fillna("Unknown", inplace=True)

#  Clean feedback text
def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    text = re.sub(r'\s+', ' ', text).strip()  # Remove extra spaces
    return text

df["Clean_Feedback"] = df["Feedback"].apply(clean_text)

# Add placeholder sentiment column
df["Sentiment"] = "To Be Analyzed"

#  Reset index
df.reset_index(drop=True, inplace=True)

#  Display cleaned DataFrame
print(df)


    Customer_ID                         Feedback                 Date  Rating  \
0          1001                   Great service!  2023-12-01 00:00:00       5   
1          1003                   bad SERVICE!!!              Unknown       1   
2          1005             Too late delivery :(              Unknown       2   
3          1006                     good service              Unknown       4   
4          1007              VERY RUDE BEHAVIOR.              Unknown       1   
5          1008         not bad. could be better              Unknown       3   
6          1009             Best support ever!!!              Unknown       5   
7          1010  delivery was late and cold food              Unknown       2   
8          1011    very polite and helpful staff  2023-12-04 00:00:00       5   
9          1013        food arrived 2 hours late              Unknown       1   
10         1014              amazing experience!  2023-12-05 00:00:00       5   
11         1015         no r

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Date"].fillna("Unknown", inplace=True)
  df["Date"].fillna("Unknown", inplace=True)
