In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv('../input/indian-food-101/indian_food.csv')

# Notes

### Potential Questions that can be answered 
- Proportions of Veg to Non veg dishes
- Number of dishes that come from each region 
- What sort of flavour profiles are there 
- What is the most popular flavour profile of each region 
- What is the most popular flavour profile in each state  
- Average Preparation Time 
- Average Cooking Time 
- What types of course of meal are there 
- What sort of ingredients are used in indian cooking 
- What foods have the shortest and longest cooking time 
- What foods have the shortest and longest preparation time
- What is the average cooking time for each region and state
- What is the average preparation time for each region and state 
- Average main course cooking time 
- Average snack cooking time 
- Average cooking time for main courses in each state and region 
- Average preparation time for snacks in each state and region 
- Longest and shortest preparation times for main courses
- Longest and shortest preparation times for snacks
- Infographs on each region

#### References
- https://www.kaggle.com/nehaprabhavalkar/indian-cuisine-analysis
- https://www.kaggle.com/mysarahmadbhat/eda-on-indian-food

# Light data analysis 

We do this in order to understand the general idea of what the data looks like before we start going into detail analysis and understanding the different aspects and answering questions that I have asked above which can be answered by the following dataset

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.isnull().sum()

In [None]:
v1 = df.fillna("Unknown")

In [None]:
v1.isnull().sum()

In [None]:
v2 = v1.set_index('name')
v2

In [None]:
q=df.replace(-1,np.nan)
w=df.replace('-1',np.nan)
q

## Flavour Profiles

All the different types of potential flavour profiles that exist within the Indian palatte

In [None]:
v2.flavor_profile.unique()

#### The amount of foods placed according to their flavour profile category and the origin of the dish statewise

In [None]:
state_flavor_profile = v2.groupby(['flavor_profile', 'state']).size()

In [None]:
state_flavor_profile

In [None]:
region_flavor_profile = v2.groupby(["flavor_profile", 'region']).size()

In [None]:
region_flavor_profile

#### Most popular foods according to their flavour profile as well as theor region and state

#### Flavour 

#### Dishes with highest prep and cook time of each flavour profile of each region

In [None]:
v1.groupby(['flavor_profile', 'region']).max()

#### Dishes with highest prep and cook time of each flavour profile of each state

In [None]:
v1.groupby(['flavor_profile', 'state']).max()

## How many different ingredients are used in Indian cooking

Indian cooking is often characterized by its variert of ingredients that are put in together to make what we know as indian food, in this part I take into consideration the different ingredients that are used in making the food by making the indgredients provided into a list and then converting into a set of unique values.

In [None]:
ing_set = {''}
ingredients_seperated = v2['ingredients'].str.split(',', expand=True).stack().unique().tolist()
#ingredients_seperated
for i in ingredients_seperated:
    ing_set.add(i)
print("Number of ingredients present in Indian cooking: ", len(ing_set)-1, 'ingredients')


In [None]:
print("The unique variety of ingredients: ")
print(ing_set)

## Proportion of vegetarian dishes to non-vegetarian dishes

Vegetarianism is given a lot of importance and emphasis in the indian subcontinent, thus from the foods that are presented in the dataset, lets see the different proportions of veg dishes compared to non veg dishes

In [None]:
diet_count = v2.diet.value_counts()

In [None]:
diet_count.plot.pie(autopct="%.1f%%")

## Number of dishes that come from each region and state 

### By State

In [None]:
state_val = v2.groupby('state').size()
x = state_val.rename(index={'-1': 'Unknown'})
x

In [None]:
plt.figure(figsize=(10,10))
sns.barplot(y=x.index, x=x.values)

### By Region

In [None]:
region_val = v2.groupby('region').size()
b = region_val.rename(index={'-1': 'Unknown'})
b

In [None]:
plt.figure(figsize=(5,5))
sns.barplot(y=b.index, x=b.values)

##  Cooking and Preparation times 

### Average cooking and preparation times

In [None]:
v2.mean()

### Average cooking and preparation time per state

In [None]:
avg_c_and_p_time_state = v1.groupby("state").mean().reset_index()

In [None]:
avg_c_and_p_time_state['total_avg_time'] = avg_c_and_p_time_state['prep_time'] + avg_c_and_p_time_state['cook_time']
avg_c_and_p_time_state

### Average cooking and preparation  time per region

In [None]:
avg_c_and_p_time_region = v1.groupby("region").mean().reset_index()

In [None]:
avg_c_and_p_time_region['total_avg_time'] = avg_c_and_p_time_region['prep_time'] + avg_c_and_p_time_region['cook_time']
avg_c_and_p_time_region

#### Shortest Cooking time and preparation time 

In [None]:
shortest_cooking_times = q.sort_values(['cook_time'], ascending=True).iloc[:10, :]
shortest_cooking_times

In [None]:
shortest_prep_times = q.sort_values(['prep_time'], ascending=True).iloc[:10, :]
shortest_prep_times

#### Longest Cooking and Preparation time 

In [None]:
longest_cooking_times = q.sort_values(['cook_time'], ascending=False).iloc[:10, :]
longest_cooking_times

In [None]:
longest_prep_times = q.sort_values(['prep_time'], ascending=False).iloc[:10, :]
longest_prep_times

## Types of meals that are there

#### Types of courses of meals

In [None]:
v1['course'].unique()

#### Amount of foods per course of meal

In [None]:
v1['course'].value_counts()

#### Amount of courses of food per state per region

In [None]:
v1.groupby(['region', 'state', 'course']).size()