# Load the dataset

In [1]:
import pandas as pd

# Load the dataset
file_path = 'data/Dataset .csv'  
data = pd.read_csv(file_path)

# Display the first few rows of the dataframe
print(data.head())


   Restaurant ID         Restaurant Name  Country Code              City  \
0        6317637        Le Petit Souffle           162       Makati City   
1        6304287        Izakaya Kikufuji           162       Makati City   
2        6300002  Heat - Edsa Shangri-La           162  Mandaluyong City   
3        6318506                    Ooma           162  Mandaluyong City   
4        6314302             Sambo Kojin           162  Mandaluyong City   

                                             Address  \
0  Third Floor, Century City Mall, Kalayaan Avenu...   
1  Little Tokyo, 2277 Chino Roces Avenue, Legaspi...   
2  Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...   
3  Third Floor, Mega Fashion Hall, SM Megamall, O...   
4  Third Floor, Mega Atrium, SM Megamall, Ortigas...   

                                     Locality  \
0   Century City Mall, Poblacion, Makati City   
1  Little Tokyo, Legaspi Village, Makati City   
2  Edsa Shangri-La, Ortigas, Mandaluyong City   
3      SM 

# Calculate basic statistical measures for numerical columns.

# Identify Numerical Columns

In [2]:
# Identifying numerical columns
numerical_data = data.select_dtypes(include=['float64', 'int64'])
print(numerical_data.columns)


Index(['Restaurant ID', 'Country Code', 'Longitude', 'Latitude',
       'Average Cost for two', 'Price range', 'Aggregate rating', 'Votes'],
      dtype='object')


# Calculate Mean, Median, and Standard Deviation

In [3]:
# Calculating mean, median, and standard deviation
numerical_stats = numerical_data.describe().loc[['mean', '50%', 'std']]
numerical_stats.rename(index={'50%': 'median'}, inplace=True)
print(numerical_stats)


        Restaurant ID  Country Code  Longitude   Latitude  \
mean     9.051128e+06     18.365616  64.126574  25.854381   
median   6.004089e+06      1.000000  77.191964  28.570469   
std      8.791521e+06     56.750546  41.467058  11.007935   

        Average Cost for two  Price range  Aggregate rating       Votes  
mean             1199.210763     1.804837          2.666370  156.909748  
median            400.000000     2.000000          3.200000   31.000000  
std             16121.183073     0.905609          1.516378  430.169145  


# Explore the distribution of categorical variables such as "Country Code", "City", and "Cuisines"

# Distribution of "Country Code"

In [4]:
# Distribution of "Country Code"
country_code_distribution = data['Country Code'].value_counts()
print(country_code_distribution)


Country Code
1      8652
216     434
215      80
30       60
214      60
189      60
148      40
208      34
14       24
162      22
94       21
184      20
166      20
191      20
37        4
Name: count, dtype: int64


# Distribution of "City"

In [5]:
# Distribution of "City"
city_distribution = data['City'].value_counts()
print(city_distribution)


City
New Delhi           5473
Gurgaon             1118
Noida               1080
Faridabad            251
Ghaziabad             25
                    ... 
Panchkula              1
Mc Millan              1
Mayfield               1
Macedon                1
Vineland Station       1
Name: count, Length: 141, dtype: int64


# Distribution of "Cuisines"

In [6]:
# Distribution of "Cuisines"
cuisines_distribution = data['Cuisines'].value_counts()
print(cuisines_distribution)


Cuisines
North Indian                                             936
North Indian, Chinese                                    511
Chinese                                                  354
Fast Food                                                354
North Indian, Mughlai                                    334
                                                        ... 
Bengali, Fast Food                                         1
North Indian, Rajasthani, Asian                            1
Chinese, Thai, Malaysian, Indonesian                       1
Bakery, Desserts, North Indian, Bengali, South Indian      1
Italian, World Cuisine                                     1
Name: count, Length: 1825, dtype: int64


# Distribution of "Restaurant Name"

In [9]:
# Distribution of "Restaurant Name"
restaurant_name_distribution = data['Restaurant Name'].value_counts().head(10)
print("Restaurant Name Distribution:\n", restaurant_name_distribution)


Restaurant Name Distribution:
 Restaurant Name
Cafe Coffee Day     83
Domino's Pizza      79
Subway              63
Green Chick Chop    51
McDonald's          48
Keventers           34
Pizza Hut           30
Giani               29
Baskin Robbins      28
Barbeque Nation     26
Name: count, dtype: int64


# Distribution of "Locality"

In [10]:
# Distribution of "Locality"
locality_distribution = data['Locality'].value_counts().head(10)
print("\nLocality Distribution:\n", locality_distribution)



Locality Distribution:
 Locality
Connaught Place        122
Rajouri Garden          99
Shahdara                87
Defence Colony          86
Malviya Nagar           85
Pitampura               85
Mayur Vihar Phase 1     84
Rajinder Nagar          81
Safdarjung              80
Satyaniketan            79
Name: count, dtype: int64


# Distribution of "Rating Text"

In [11]:
# Distribution of "Rating Text"
rating_text_distribution = data['Rating text'].value_counts()
print("\nRating Text Distribution:\n", rating_text_distribution)



Rating Text Distribution:
 Rating text
Average      3737
Not rated    2148
Good         2100
Very Good    1079
Excellent     301
Poor          186
Name: count, dtype: int64


# Distribution of "Has Table Booking"

In [12]:
# Distribution of "Has Table Booking"
has_table_booking_distribution = data['Has Table booking'].value_counts()
print("\nHas Table Booking Distribution:\n", has_table_booking_distribution)



Has Table Booking Distribution:
 Has Table booking
No     8393
Yes    1158
Name: count, dtype: int64


#  Distribution of "Has Online Delivery"


In [13]:
# Distribution of "Has Online Delivery"
has_online_delivery_distribution = data['Has Online delivery'].value_counts()
print("\nHas Online Delivery Distribution:\n", has_online_delivery_distribution)



Has Online Delivery Distribution:
 Has Online delivery
No     7100
Yes    2451
Name: count, dtype: int64


# Identify the top cuisines and cities with the highest number of restaurants.

# Top Cuisines

In [14]:
# Identifying top cuisines
top_cuisines = data['Cuisines'].value_counts().head(10)
print(top_cuisines)


Cuisines
North Indian                      936
North Indian, Chinese             511
Chinese                           354
Fast Food                         354
North Indian, Mughlai             334
Cafe                              299
Bakery                            218
North Indian, Mughlai, Chinese    197
Bakery, Desserts                  170
Street Food                       149
Name: count, dtype: int64


# Top Cities

In [15]:
# Identifying top cities
top_cities = data['City'].value_counts().head(10)
print(top_cities)


City
New Delhi       5473
Gurgaon         1118
Noida           1080
Faridabad        251
Ghaziabad         25
Bhubaneshwar      21
Amritsar          21
Ahmedabad         21
Lucknow           21
Guwahati          21
Name: count, dtype: int64
