In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np

In [None]:
# Load the Zomato dataset
df = pd.read_csv('zomato.csv')

In [None]:
# Drop unnecessary columns
df3 = df.drop(columns=['url', 'phone', 'rest_type', 'dish_liked', 'reviews_list', 'menu_item', 'listed_in(city)'], axis=1)

In [None]:
# Rename columns for better readability
df4 = df3.rename(columns={'approx_cost(for two people)': 'two_people_cost', 'listed_in(type)': 'type_of_restaurant'})

In [None]:
# Rename 'rate' column to 'rating'
df5 = df4.rename(columns={'rate': 'rating'})

In [None]:
# Drop rows with missing 'location'
df5 = df5.dropna(subset=['location'])

In [None]:
# Drop rows with missing 'cuisines' or 'two_people_cost'
df5 = df5.dropna(subset=['cuisines', 'two_people_cost'])

In [None]:
# Clean 'two_people_cost' column by removing commas and converting to int
df5['two_people_cost'] = df5['two_people_cost'].str.replace(',', '').astype(int)

In [None]:
# Calculate cost per person
df5['two_people_cost'] = df5['two_people_cost'] / 2

In [None]:
# Rename 'two_people_cost' to 'cost_per_person'
df5 = df5.rename(columns={'two_people_cost': 'cost_per_person'})

In [None]:
# Function to clean 'rating' column
def handlerate(value):
    if value == 'NEW' or value == '-':
        return np.nan
    else:
        value = str(value).split('/')[0]
        return float(value)

In [None]:
# Apply the handlerate function to clean ratings
df5['rating'] = df5['rating'].apply(handlerate)

In [None]:
# Check for missing values
df5.isnull().sum()

In [None]:
# Fill missing ratings with the mean rating
df5['rating'] = df5['rating'].fillna(df5['rating'].mean())

In [None]:
# Save the cleaned data to a new CSV file
df5.to_csv('Zomato_Clean_Data.csv', index=False)

In [None]:
# Display first few rows to check
df5.head()