In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
sns.set()
from sklearn.cluster import KMeans

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        importFileName = os.path.join(dirname, filename)

In [None]:
food = pd.read_csv(importFileName)
food.head()

In [None]:
food.shape

In [None]:
food.dtypes

# **Removed Timestamp and Participant_ID as they are irrelevant to the analysis.**

In [None]:
food_new = food.drop(['Timestamp', 'Participant_ID'], axis = 1)
food_new

In [None]:
food_new.Nationality.value_counts()

### **Looking at the data we can tell that maximum number of participants who have taken the survey are Indians therefore I will base this analysis on Indian nationality only and remove any other nationality in the data frame.**

In [None]:
foodindia = food_new[food_new['Nationality'] == 'Indian']
foodindia

In [None]:
foodindia.Nationality.value_counts()

#### **Removed Nationality as we know the data is of Indians only so it is irrelevant to the analysis**

In [None]:
foodindia = foodindia.drop(['Nationality'], axis = 1)
foodindia

# Plotting the gender difference, Age range, food choice.

### **Plotting the Gender**

In [None]:
f1 = foodindia['Gender'].value_counts(ascending = True)
f1

In [None]:
fig = plt.figure(figsize=(8,4))
ax1 = fig.add_subplot(1,2,1)
sns.countplot(x=foodindia['Gender'],ax = ax1)
plt.xlabel('Gender')
plt.ylabel('Count')

for i, v in f1.reset_index().iterrows():
    ax1.text(i - 0.08, v.Gender +0.3 , v.Gender , color='red')
plt.show()

### **Plotting the Age range** 

### Grouping the age into range

In [None]:
def age_buckets(x): 
    if x < 10: return '00-09' 
    elif x < 20: return '10-19' 
    elif x < 30: return '20-29' 
    elif x < 40: return '30-39'
    elif x < 50: return '40-49' 
    elif x < 60: return '50-59' 
    elif x < 70: return '60-69' 
    elif x < 80: return '70-79' 
    elif x < 90: return '80-89' 
    else: return 'other'

In [None]:
foodindia['Agerange'] = foodindia.Age.apply(age_buckets)
foodindia.sort_values(by = ['Age'], ascending = True, inplace = True)
foodindia

In [None]:
f2 = foodindia['Agerange'].value_counts().sort_index(ascending = True)
f2

In [None]:
fig = plt.figure(figsize=(30,4))
ax2 = fig.add_subplot(1,2,1)
sns.countplot(x=foodindia['Agerange'],ax = ax2)
plt.xlabel('Age Range')
plt.ylabel('Count')

for i, v in f2.reset_index().iterrows():
    ax2.text(i - 0.08, v.Agerange +0.3 , v.Agerange , color='red')
plt.show()


### **Plotting Food choice**

In [None]:
f3 = foodindia['Food'].value_counts(ascending = False)
f3

In [None]:
fig = plt.figure(figsize=(8,4))
ax3 = fig.add_subplot(1,2,1)
sns.countplot(x=foodindia['Food'],ax = ax3)
plt.xlabel('Food Choice')
plt.ylabel('Count')

for i, v in f3.reset_index().iterrows():
    ax3.text(i - 0.08, v.Food +0.3 , v.Food , color='red')
plt.show()

### **Plotting Juice choice**

In [None]:
f4 = foodindia['Juice'].value_counts(ascending = False)
f4

In [None]:
fig = plt.figure(figsize=(8,4))
ax4 = fig.add_subplot(1,2,1)
sns.countplot(x=foodindia['Juice'],ax = ax4)
plt.xlabel('Juice Choice')
plt.ylabel('Count')

for i, v in f4.reset_index().iterrows():
    ax4.text(i - 0.08, v.Juice +0.3 , v.Juice , color='red')
plt.show()

### **Plotting Desserts**

In [None]:
f5 = foodindia['Dessert'].value_counts(ascending = True)
f5

In [None]:
fig = plt.figure(figsize=(8,4))
ax5 = fig.add_subplot(1,2,1)
sns.countplot(x=foodindia['Dessert'],ax = ax5)
plt.xlabel('Dessert Choice')
plt.ylabel('Count')

for i, v in f5.reset_index().iterrows():
    ax5.text(i - 0.08, v.Dessert +0.3 , v.Dessert , color='red')
plt.show()

### Understanding the data and finding the co relation between the same.

In [None]:
dims = (30, 5)
fig, ax = plt.subplots(1,4,figsize=dims)
ax[0].set(title = 'Gender-wise count of Food Choice')
ax[1].set(title = 'Gender-wise count of Juice Choice')
ax[2].set(title = 'Gender-wise count of Dessert Choice')
ax[3].set(title = 'Gender-wise count of Age Range')

#visualizing the above
sns.countplot('Food', hue = 'Gender', data = foodindia, ax = ax[0])
sns.countplot('Juice', hue = 'Gender', data = foodindia, ax = ax[1])
sns.countplot('Dessert', hue = 'Gender', data = foodindia, ax = ax[2])
sns.countplot('Agerange', hue = 'Gender', data = foodindia, ax = ax[3])
plt.xlabel('Age Range')

plt.show()

# To conclude if you are an Indian, Female or Male you stick with more traditional food and fresh drinks if given a choice.