# Customer Satisfaction Prediction

Predicting customer satisfaction using support ticket data.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset
df = pd.read_csv('../data/customer_support_tickets.csv')
df.head()


In [None]:
df.info()
df.describe()
df.isnull().sum()


In [None]:
df_cleaned = df.drop(columns=['Customer Name', 'Customer Email', 'Ticket Description', 'Resolution'])

df_cleaned['Date of Purchase'] = pd.to_datetime(df_cleaned['Date of Purchase'], errors='coerce')
df_cleaned['First Response Time'] = pd.to_datetime(df_cleaned['First Response Time'], errors='coerce')
df_cleaned['Time to Resolution'] = pd.to_datetime(df_cleaned['Time to Resolution'], errors='coerce')


In [None]:
sns.countplot(data=df_cleaned, x='Customer Satisfaction Rating')
plt.title("Customer Satisfaction Rating Distribution")
plt.show()

avg_satisfaction = df_cleaned.groupby('Product Purchased')['Customer Satisfaction Rating'].mean().sort_values(ascending=False)
avg_satisfaction.plot(kind='bar', figsize=(10, 5), title='Average Satisfaction by Product')
plt.ylabel("Avg Rating")
plt.show()


In [None]:
df_cleaned['response_hours'] = (df_cleaned['Time to Resolution'] - df_cleaned['First Response Time']).dt.total_seconds() / 3600

df_model = df_cleaned.dropna(subset=['Customer Satisfaction Rating'])


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

df_model['satisfaction_level'] = df_model['Customer Satisfaction Rating'].apply(lambda x: 'low' if x <= 2 else 'high')

features = ['Customer Age', 'response_hours']
X = df_model[features].fillna(0)
y = df_model['satisfaction_level']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


## Conclusion & Next Steps

- Response time and age show predictive value for satisfaction.
- Text features could be analyzed using NLP.
- Consider further model tuning and cross-validation.
