# Level 2 Tasks — Cognifyz Data Science Internship
This notebook performs **all Level 2 tasks** using the provided dataset.
Tasks include:
1. Table Booking and Online Delivery Analysis
2. Price Range Analysis
3. Feature Engineering


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Path to dataset
DATA_PATH = r"/mnt/data/Dataset .csv"

# Create output directory
out_dir = Path("outputs_level2")
out_dir.mkdir(parents=True, exist_ok=True)

# Load dataset
df = pd.read_csv(DATA_PATH)
df.head()


## Task 1: Table Booking and Online Delivery

In [None]:
# Percentage of restaurants offering table booking and online delivery
for col in ['Has Table booking', 'Has Online delivery']:
    pct = df[col].value_counts(normalize=True) * 100
    print(f"\n{col} percentage:\n", pct)

# Compare average ratings for table booking vs no table booking
sns.boxplot(x='Has Table booking', y='Aggregate rating', data=df)
plt.title("Ratings vs Table Booking")
plt.savefig(out_dir / "ratings_vs_table_booking.png")
plt.show()

# Online delivery availability among different price ranges
sns.countplot(x='Price range', hue='Has Online delivery', data=df)
plt.title("Online Delivery by Price Range")
plt.savefig(out_dir / "online_delivery_by_price_range.png")
plt.show()


## Task 2: Price Range Analysis

In [None]:
# Most common price range
print("Most Common Price Range:", df['Price range'].mode()[0])

# Average rating per price range
price_rating = df.groupby('Price range')['Aggregate rating'].mean()
print("\nAverage rating per price range:\n", price_rating)

# Color with highest average rating
if 'Rating color' in df.columns:
    color_rating = df.groupby('Rating color')['Aggregate rating'].mean()
    print("\nAverage rating by Rating color:\n", color_rating)


## Task 3: Feature Engineering

In [None]:
# Extract additional features: length of restaurant name and address
df['Name Length'] = df['Restaurant Name'].apply(lambda x: len(str(x)))
df['Address Length'] = df['Address'].apply(lambda x: len(str(x)))

# Encode table booking & online delivery as binary
df['Has Table booking (bin)'] = df['Has Table booking'].map(lambda x: 1 if str(x).strip().lower() in ['yes','y','true','1'] else 0)
df['Has Online delivery (bin)'] = df['Has Online delivery'].map(lambda x: 1 if str(x).strip().lower() in ['yes','y','true','1'] else 0)

# Save engineered dataset
df.to_csv(out_dir / "dataset_with_features.csv", index=False)

# Display first few rows with new features
df[['Restaurant Name', 'Name Length', 'Address', 'Address Length', 'Has Table booking (bin)', 'Has Online delivery (bin)']].head()
