In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Importing the DataSet

In [None]:
df = pd.read_csv("../input/indian-food-101/indian_food.csv")
df.head()

In [None]:
df.info()

# Pre-processing the DataFrame

## Chaning minutes to hours

In [None]:
df["prep_time"] = df["prep_time"]/60
df["cook_time"] = df["cook_time"]/60

## Working with non-numerical columns

In [None]:
df["diet"] = pd.get_dummies(df["diet"]).iloc[:, -1]

In [None]:
df = df.join(pd.get_dummies(df["course"], prefix="course", prefix_sep="_"))
df = df.drop(columns=["course"])

In [None]:
df = df.join(pd.get_dummies(df["flavor_profile"], prefix="flavour", prefix_sep="_"))
df = df.drop(columns=["flavor_profile"])
df = df.drop(columns=["flavour_-1"])

In [None]:
df = df.join(pd.get_dummies(df["state"], prefix="state", prefix_sep="_"))
df = df.drop(columns=["state"])
df = df.drop(columns=["state_-1"])

In [None]:
df = df.join(pd.get_dummies(df["region"], prefix="region", prefix_sep="_"))
df = df.drop(columns=["region"])
df = df.drop(columns=["region_-1"])

In [None]:
temp = df["ingredients"].str.split(",",expand=True)
temp.columns = ['ingredient_{}'.format(i) for i in range(1, 11)]
temp =temp.replace(np.nan, "")

In [None]:
ingredients = pd.DataFrame(np.reshape(temp.values, 2550), columns=["ingredients"])
ingredients = ingredients["ingredients"]

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit(ingredients)

for i in range(1, 11):
    temp["ingredient_{}".format(i)] = le.transform(temp["ingredient_{}".format(i)])
    
df = df.join(temp)
df = df.drop(columns=["ingredients"])

# DataFrame after pre-processing.

In [None]:
df.info()

# Visualising the DataFrame

## States:

In [None]:
def top_ingredient(state):
    temp = pd.DataFrame((df["state_{}".format(state)]*df["ingredient_1"]).value_counts().iloc[1:7])
    for i in range(2, 11):
        temp = temp.add(pd.DataFrame((df["state_{}".format(state)]*df["ingredient_{}".format(i)]).value_counts().iloc[1:7]), fill_value=0)
    temp.plot(kind='bar')

In [None]:
def top_flavour(state):
    print("flavour_sweet", (df["flavour_sweet"]*df["state_{}".format(state)]).replace(0,np.NaN).value_counts())
    print("flavour_bitter", (df["flavour_bitter"]*df["state_{}".format(state)]).replace(0,np.NaN).value_counts())
    print("flavour_sour", (df["flavour_sour"]*df["state_{}".format(state)]).replace(0,np.NaN).value_counts())
    print("flavour_spicy", (df["flavour_spicy"]*df["state_{}".format(state)]).replace(0,np.NaN).value_counts())

### Andhra Pradesh:

In [None]:
top_ingredient("Andhra Pradesh")

In [None]:
le.inverse_transform([402, 146, 122, 254])

In [None]:
top_flavour("Andhra Pradesh")

### Assam:

In [None]:
top_ingredient("Assam")

In [None]:
le.inverse_transform([184, 21, 269, 271, 319, 402])

In [None]:
top_flavour("Assam")

### Bihar:

In [None]:
top_ingredient("Bihar")

In [None]:
top_flavour("Bihar")

### Chhattisgarh:

In [None]:
top_ingredient("Chhattisgarh")

In [None]:
top_flavour("Chhattisgarh")

### Goa:

In [None]:
top_ingredient("Goa")

In [None]:
top_flavour("Goa")

### Gujarat:

In [None]:
top_ingredient("Gujarat")

In [None]:
le.inverse_transform([26, 254, 422, 316])

In [None]:
top_flavour("Gujarat")

### Haryana:

In [None]:
top_ingredient("Haryana")

In [None]:
top_flavour("Haryana")

### Jammu & Kashmir:

In [None]:
top_ingredient("Jammu & Kashmir")

In [None]:
top_flavour("Jammu & Kashmir")

### Karnataka:

In [None]:
top_ingredient("Karnataka")

In [None]:
le.inverse_transform([67, 79, 122, ])

In [None]:
top_flavour("Karnataka")

### Kerala:

In [None]:
top_ingredient("Kerala")

In [None]:
top_flavour("Kerala")

### Madhya Pradesh:

In [None]:
top_ingredient("Madhya Pradesh")

In [None]:
top_flavour("Madhya Pradesh")

### Maharashtra:

In [None]:
top_ingredient("Maharashtra")

In [None]:
le.inverse_transform([79, 402, 122, 218, 254, 423])

In [None]:
top_flavour("Maharashtra")

### Manipur:

In [None]:
top_ingredient("Manipur")

In [None]:
top_flavour("Manipur")

### NCT of Delhi:

In [None]:
top_ingredient("NCT of Delhi")

In [None]:
top_flavour("NCT of Delhi")

### Nagaland:

In [None]:
top_ingredient("Nagaland")

In [None]:
top_flavour("Nagaland")

### Odisha:

In [None]:
top_ingredient("Odisha")

In [None]:
le.inverse_transform([254, 326, 327])

In [None]:
top_flavour("Odisha")

### Punjab:

In [None]:
top_ingredient("Punjab")

In [None]:
le.inverse_transform([118,123])

In [None]:
top_flavour("Punjab")

### Rajasthan:

In [None]:
top_ingredient("Rajasthan")

In [None]:
le.inverse_transform([122, 140, 238, 254])

In [None]:
top_flavour("Rajasthan")

### Tamil Nadu:

In [None]:
top_ingredient("Tamil Nadu")

In [None]:
le.inverse_transform([79, 272, 325])

In [None]:
top_flavour("Tamil Nadu")

### Telangana:

In [None]:
top_ingredient("Telangana")

In [None]:
le.inverse_transform([175])

In [None]:
top_flavour("Telangana")

### Tripura:

In [None]:
top_ingredient("Tripura")

In [None]:
top_flavour("Tripura")

### Uttar Pradesh:

In [None]:
top_ingredient("Uttar Pradesh")

In [None]:
le.inverse_transform([118, 254])

In [None]:
top_flavour("Uttar Pradesh")

### Uttarakhand:

In [None]:
top_ingredient("Uttarakhand")

In [None]:
top_flavour("Uttarakhand")

### West Bengal:

In [None]:
top_ingredient("West Bengal")

In [None]:
le.inverse_transform([254, 122, 327, 377])

In [None]:
top_flavour("West Bengal")