In [3]:
import pandas as pd

# 1. Load the dataset
df = pd.read_csv('customer_feedback.csv')

# 2. View initial info
print("Initial data shape:", df.shape)
print("\nMissing values:\n", df.isnull().sum())

# 3. Remove duplicate feedback entries
df.drop_duplicates(inplace=True)

# 4. Drop rows where Feedback_Text is missing (essential for sentiment analysis)
df.dropna(subset=['Feedback_Text'], inplace=True)

# 5. Fill missing Rating with a neutral score (e.g., 3) if it's required
df['Rating'] = df['Rating'].fillna(3)

# 6. Normalize Date_Submitted format
df['Date_Submitted'] = pd.to_datetime(df['Date_Submitted'], errors='coerce')

# 7. Handle missing or invalid dates
most_common_date = df['Date_Submitted'].mode()[0]
df['Date_Submitted'].fillna(most_common_date, inplace=True)

# 8. Standardize Feedback_Text
df['Feedback_Text'] = df['Feedback_Text'].str.strip().str.lower()

# 9. Correct common typos or slang (basic example)
typo_map = {
    'luvd': 'loved',
    'gr8': 'great',
    'awsm': 'awesome',
    'bttr': 'better',
}
def fix_typos(text):
    for typo, correct in typo_map.items():
        text = text.replace(typo, correct)
    return text

df['Feedback_Text'] = df['Feedback_Text'].apply(fix_typos)

# 10. Capitalize Channel names for consistency
df['Channel'] = df['Channel'].str.title().str.strip()

# 11. Convert Feedback_ID and Customer_ID to string (for consistency)
df['Feedback_ID'] = df['Feedback_ID'].astype(str)
df['Customer_ID'] = df['Customer_ID'].astype(str)

# 12. Sort by Date_Submitted
df.sort_values(by='Date_Submitted', inplace=True)

# 13. Save cleaned data
df.to_csv('cleaned_customer_feedback.csv', index=False)

print("✅ Cleaning complete. Cleaned data saved to 'cleaned_customer_feedback.csv'")


Initial data shape: (5, 6)

Missing values:
 Feedback_ID       0
Customer_ID       4
Feedback_Text     4
Rating            4
Date_Submitted    4
Channel           4
dtype: int64
✅ Cleaning complete. Cleaned data saved to 'cleaned_customer_feedback.csv'


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Date_Submitted'].fillna(most_common_date, inplace=True)
