In [2]:
# Reading dataset and printing its shape, size and datatypes. 

import pandas as pd
df = pd.read_csv('indian_food.csv')

print("Shape of the dataset is: ")
print(df.shape)
print("\nSize of the dataset is: ")
print(df.size)
print("\nDatatypes of the dataset are: ")
print(df.dtypes)

Shape of the dataset is: 
(255, 9)

Size of the dataset is: 
2295

Datatypes of the dataset are: 
name              object
ingredients       object
diet              object
prep_time          int64
cook_time          int64
flavor_profile    object
course            object
state             object
region            object
dtype: object


In [3]:
# Printing the number of unique dishes in the dataset.

print("Number of unique dishes are:", df.name.nunique())

Number of unique dishes are: 255


In [4]:
# Using describe function to view statistics of all columns

df.describe()

Unnamed: 0,prep_time,cook_time
count,255.0,255.0
mean,31.105882,34.529412
std,72.554409,48.26565
min,-1.0,-1.0
25%,10.0,20.0
50%,10.0,30.0
75%,20.0,40.0
max,500.0,720.0


In [5]:
# Checking for missing values in the dataset. 
print("Missing values in each column: ")
print(df.isnull().sum())

# There is one null value in region column. 
df.fillna('NaN', inplace = True)

print("\nNumber of missing values after replacing them: ")
print(df.isnull().sum())

Missing values in each column: 
name              0
ingredients       0
diet              0
prep_time         0
cook_time         0
flavor_profile    0
course            0
state             0
region            1
dtype: int64

Number of missing values after replacing them: 
name              0
ingredients       0
diet              0
prep_time         0
cook_time         0
flavor_profile    0
course            0
state             0
region            0
dtype: int64


In [6]:
# Numeric and categorical features of the dataset. 

print("Number of numeric and categorical features of the dataset:")
obj = df.select_dtypes(include='object').columns
num = df.select_dtypes(include='number').columns

print("Categorical:", len(obj))
print("Numerical:", len(num))

Number of numeric and categorical features of the dataset:
Categorical: 7
Numerical: 2


In [7]:
# Number of unique values in each column 

print("Number of unique values in each column:")
print(df.nunique())

Number of unique values in each column:
name              255
ingredients       252
diet                2
prep_time          22
cook_time          19
flavor_profile      5
course              4
state              25
region              8
dtype: int64


In [8]:
# Adding new column to show total time taken to prepare a dish 

df['total_time'] = df['prep_time'] + df['cook_time']
df

Unnamed: 0,name,ingredients,diet,prep_time,cook_time,flavor_profile,course,state,region,total_time
0,Balu shahi,"Maida flour, yogurt, oil, sugar",vegetarian,45,25,sweet,dessert,West Bengal,East,70
1,Boondi,"Gram flour, ghee, sugar",vegetarian,80,30,sweet,dessert,Rajasthan,West,110
2,Gajar ka halwa,"Carrots, milk, sugar, ghee, cashews, raisins",vegetarian,15,60,sweet,dessert,Punjab,North,75
3,Ghevar,"Flour, ghee, kewra, milk, clarified butter, su...",vegetarian,15,30,sweet,dessert,Rajasthan,West,45
4,Gulab jamun,"Milk powder, plain flour, baking powder, ghee,...",vegetarian,15,40,sweet,dessert,West Bengal,East,55
...,...,...,...,...,...,...,...,...,...,...
250,Til Pitha,"Glutinous rice, black sesame seeds, gur",vegetarian,5,30,sweet,dessert,Assam,North East,35
251,Bebinca,"Coconut milk, egg yolks, clarified butter, all...",vegetarian,20,60,sweet,dessert,Goa,West,80
252,Shufta,"Cottage cheese, dry dates, dried rose petals, ...",vegetarian,-1,-1,sweet,dessert,Jammu & Kashmir,North,-2
253,Mawa Bati,"Milk powder, dry fruits, arrowroot powder, all...",vegetarian,20,45,sweet,dessert,Madhya Pradesh,Central,65


In [10]:
for index, row in df.iterrows():
    ingredients = row['ingredients']
    ingredients_count = len(ingredients.split(','))
    df.at[index, 'ingredient_counts'] = ingredients_count
    
df

Unnamed: 0,name,ingredients,diet,prep_time,cook_time,flavor_profile,course,state,region,total_time,ingredient_counts
0,Balu shahi,"Maida flour, yogurt, oil, sugar",vegetarian,45,25,sweet,dessert,West Bengal,East,70,4.0
1,Boondi,"Gram flour, ghee, sugar",vegetarian,80,30,sweet,dessert,Rajasthan,West,110,3.0
2,Gajar ka halwa,"Carrots, milk, sugar, ghee, cashews, raisins",vegetarian,15,60,sweet,dessert,Punjab,North,75,6.0
3,Ghevar,"Flour, ghee, kewra, milk, clarified butter, su...",vegetarian,15,30,sweet,dessert,Rajasthan,West,45,10.0
4,Gulab jamun,"Milk powder, plain flour, baking powder, ghee,...",vegetarian,15,40,sweet,dessert,West Bengal,East,55,8.0
...,...,...,...,...,...,...,...,...,...,...,...
250,Til Pitha,"Glutinous rice, black sesame seeds, gur",vegetarian,5,30,sweet,dessert,Assam,North East,35,3.0
251,Bebinca,"Coconut milk, egg yolks, clarified butter, all...",vegetarian,20,60,sweet,dessert,Goa,West,80,4.0
252,Shufta,"Cottage cheese, dry dates, dried rose petals, ...",vegetarian,-1,-1,sweet,dessert,Jammu & Kashmir,North,-2,5.0
253,Mawa Bati,"Milk powder, dry fruits, arrowroot powder, all...",vegetarian,20,45,sweet,dessert,Madhya Pradesh,Central,65,4.0
