# Level 3 – Task 2: Customer Preference Analysis


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
df = pd.read_csv("Dataset data science.csv")
df.head()


## Step 1: Analyze Relationship Between Cuisine Type and Rating


In [None]:
# Group by cuisines and get average rating
avg_rating_by_cuisine = df.groupby('Cuisines')['Aggregate rating'].mean().sort_values(ascending=False)

# Display top 10
avg_rating_by_cuisine.head(10).plot(kind='barh', figsize=(10,5), color='green')
plt.xlabel("Average Rating")
plt.title("Top 10 Cuisines by Average Rating")
plt.gca().invert_yaxis()
plt.show()


## Step 2: Identify Most Popular Cuisines Based on Number of Votes


In [None]:
# Group by cuisines and sum the votes
votes_by_cuisine = df.groupby('Cuisines')['Votes'].sum().sort_values(ascending=False)

# Display top 10
votes_by_cuisine.head(10).plot(kind='bar', figsize=(10,5), color='skyblue')
plt.ylabel("Total Votes")
plt.title("Top 10 Most Popular Cuisines (by Votes)")
plt.xticks(rotation=45)
plt.show()


## Step 3: Find Cuisines That Tend to Receive Higher Ratings


In [None]:
# Filter cuisines with average rating ≥ 4.0
high_rated_cuisines = avg_rating_by_cuisine[avg_rating_by_cuisine >= 4.0]
print("Cuisines with Average Rating >= 4.0:")
print(high_rated_cuisines)


## Optional: Compare Votes vs Ratings for Top Cuisines


In [None]:
# Join ratings and votes for common cuisines
combined = pd.merge(
    avg_rating_by_cuisine.rename("Avg Rating"),
    votes_by_cuisine.rename("Total Votes"),
    left_index=True, right_index=True
)

# Display top 10 by votes
top_combined = combined.sort_values("Total Votes", ascending=False).head(10)

# Plot
fig, ax1 = plt.subplots(figsize=(10,5))

color = 'tab:blue'
ax1.set_xlabel('Cuisine')
ax1.set_ylabel('Total Votes', color=color)
ax1.bar(top_combined.index, top_combined['Total Votes'], color=color)
ax1.tick_params(axis='y', labelcolor=color)
ax1.set_xticklabels(top_combined.index, rotation=45)

ax2 = ax1.twinx()
color = 'tab:red'
ax2.set_ylabel('Avg Rating', color=color)
ax2.plot(top_combined.index, top_combined['Avg Rating'], color=color, marker='o')
ax2.tick_params(axis='y', labelcolor=color)

plt.title("Votes vs Average Rating for Top Cuisines")
plt.tight_layout()
plt.show()
