In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# 데이터 로드
data_path = '/~/Train.csv'
df = pd.read_csv(data_path)

In [None]:
# 각 'Warehouse_block' 및 'Customer_rating'의 개수 집계
ratings_distribution = df.groupby(['Warehouse_block', 'Customer_rating']).size().unstack(fill_value=0)

In [None]:
# 색상 지정
colors = ['#ff9999','#66b3ff','#99ff99','#ffcc99','#c2c2f0']

In [None]:
# 창고 별 고객 평점 분포 파이 차트 생성
fig, axes = plt.subplots(nrows=1, ncols=len(ratings_distribution.index), figsize=(18, 6))
fig.suptitle('Customer Rating Distribution by Warehouse Block', fontsize=16)
for idx, (block, ratings) in enumerate(ratings_distribution.iterrows()):
    axes[idx].pie(ratings, labels=ratings.index, autopct='%1.1f%%', colors=colors, startangle=140)
    axes[idx].set_title(f'Warehouse {block} Ratings')
plt.tight_layout(rect=[0, 0.03, 1, 0.95])  # 상단 제목과 각 차트간 여백 조정
plt.show()
# Set the aesthetic style of the plots
sns.set(style="whitegrid")

In [None]:
# 운송 방식 별 고객 평점 분포
plt.figure(figsize=(8, 5))
sns.barplot(data=data, x='Mode_of_Shipment', y='Customer_rating', palette='coolwarm', ci=None)
plt.title('Average Customer Ratings by Mode of Shipment')
plt.xlabel('Mode of Shipment')
plt.ylabel('Average Customer Rating')
plt.show()

In [None]:
# 문의 전화 횟수 별 고객 평점 분포
plt.figure(figsize=(10, 6))
sns.boxplot(data=data, x='Customer_care_calls', y='Customer_rating', palette='Set2')
plt.title('Distribution of Customer Ratings by Number of Customer Care Calls')
plt.xlabel('Number of Customer Care Calls')
plt.ylabel('Customer Rating')
plt.grid(True)
plt.show()

In [None]:
# 문의전화횟수 회귀분석
plt.figure(figsize=(10, 6))
sns.scatterplot(data=data, x='Customer_care_calls', y='Customer_rating', color='blue', alpha=0.5)
slope = model.params['Customer_care_calls']
intercept = model.params['const']
x_vals = data['Customer_care_calls']
y_vals = intercept + slope * x_vals
plt.plot(x_vals, y_vals, color='red')
plt.title('Customer Care Calls vs Customer Rating with Regression Line')
plt.xlabel('Number of Customer Care Calls')
plt.ylabel('Customer Rating')
plt.grid(True)
plt.show()

In [None]:
#중요도
importance_ratings = data.groupby('Product_importance')['Customer_rating'].mean().reset_index()
plt.figure(figsize=(8, 5))
sns.barplot(data=importance_ratings, x='Product_importance', y='Customer_rating', palette='pastel')
plt.title('Average Customer Ratings by Product Importance')
plt.xlabel('Product Importance')
plt.ylabel('Average Customer Rating')
plt.show()

In [None]:
#제품 가격
bins = [0, 50, 100, 150, 200, 250, 300, 350, 400, 450, 500]
labels = ['0-50', '51-100', '101-150', '151-200', '201-250', '251-300', '301-350', '351-400', '401-450', '451-500']
data['Price_range'] = pd.cut(data['Cost_of_the_Product'], bins=bins, labels=labels)
price_ratings = data.groupby('Price_range')['Customer_rating'].mean().reset_index()
plt.figure(figsize=(12, 6))
sns.barplot(data=price_ratings, x='Price_range', y='Customer_rating', palette='Spectral')
plt.title('Average Customer Ratings by Product Price Range')
plt.xlabel('Product Price Range')
plt.ylabel('Average Customer Rating')
plt.xticks(rotation=45)
plt.show()

In [None]:
data['On_Time_Delivery'] = data['Reached.on.Time_Y.N'].map({1: 'Not on Time', 0: 'On Time'})
on_time_ratings = data.groupby('On_Time_Delivery')['Customer_rating'].mean().reset_index()
plt.figure(figsize=(8, 5))
sns.barplot(data=on_time_ratings, x='On_Time_Delivery', y='Customer_rating', palette='coolwarm')
plt.title('Average Customer Ratings by On-Time Delivery Status')
plt.xlabel('Delivery Status')
plt.ylabel('Average Customer Rating')
plt.show()