In [1]:
import pandas as pd
import re

data = pd.read_csv('thanksgiving.csv',encoding = 'Latin-1')
data.columns.values

array(['RespondentID', 'Do you celebrate Thanksgiving?',
       'What is typically the main dish at your Thanksgiving dinner?',
       'What is typically the main dish at your Thanksgiving dinner? - Other (please specify)',
       'How is the main dish typically cooked?',
       'How is the main dish typically cooked? - Other (please specify)',
       'What kind of stuffing/dressing do you typically have?',
       'What kind of stuffing/dressing do you typically have? - Other (please specify)',
       'What type of cranberry saucedo you typically have?',
       'What type of cranberry saucedo you typically have? - Other (please specify)',
       'Do you typically have gravy?',
       'Which of these side dishes aretypically served at your Thanksgiving dinner? Please select all that apply. - Brussel sprouts',
       'Which of these side dishes aretypically served at your Thanksgiving dinner? Please select all that apply. - Carrots',
       'Which of these side dishes aretypically served

## Filtering Out Rows From A DataFrame

In [148]:
data_yes = data[data['Do you celebrate Thanksgiving?'] == 'Yes']
data_yes.head(2)

Unnamed: 0,RespondentID,Do you celebrate Thanksgiving?,What is typically the main dish at your Thanksgiving dinner?,What is typically the main dish at your Thanksgiving dinner? - Other (please specify),How is the main dish typically cooked?,How is the main dish typically cooked? - Other (please specify),What kind of stuffing/dressing do you typically have?,What kind of stuffing/dressing do you typically have? - Other (please specify),What type of cranberry saucedo you typically have?,What type of cranberry saucedo you typically have? - Other (please specify),...,Will you shop any Black Friday sales on Thanksgiving Day?,Do you work in retail?,Will you employer make you work on Black Friday?,How would you describe where you live?,Age,What is your gender?,How much total combined money did all members of your HOUSEHOLD earn last year?,US Region,int_age,int_income
0,4337954960,Yes,Turkey,,Baked,,Bread-based,,,,...,No,No,,Suburban,18 - 29,Male,"$75,000 to $99,999",Middle Atlantic,18.0,75000.0
1,4337951949,Yes,Turkey,,Baked,,Bread-based,,Other (please specify),Homemade cranberry gelatin ring,...,Yes,No,,Rural,18 - 29,Female,"$50,000 to $74,999",East South Central,18.0,50000.0


## Exploring Main Dishes

In [3]:
main_dishes = data['What is typically the main dish at your Thanksgiving dinner?'].value_counts()
main_dishes

Turkey                    859
Other (please specify)     35
Ham/Pork                   29
Tofurkey                   20
Chicken                    12
Roast beef                 11
I don't know                5
Turducken                   3
Name: What is typically the main dish at your Thanksgiving dinner?, dtype: int64

In [4]:
tofurkey = data[data['What is typically the main dish at your Thanksgiving dinner?']=='Tofurkey']
tofurkey['Do you typically have gravy?']

4      Yes
33     Yes
69      No
72      No
77     Yes
145    Yes
175    Yes
218     No
243    Yes
275     No
393    Yes
399    Yes
571    Yes
594    Yes
628     No
774     No
820     No
837    Yes
860     No
953    Yes
Name: Do you typically have gravy?, dtype: object

## Figuring Out What Pies People Eat

In [5]:
apple_isnull = data['Which type of pie is typically served at your Thanksgiving dinner? Please select all that apply. - Apple'].isnull()
pumpkin_isnull = data['Which type of pie is typically served at your Thanksgiving dinner? Please select all that apply. - Pumpkin'].isnull()
pecan_isnull = data['Which type of pie is typically served at your Thanksgiving dinner? Please select all that apply. - Pecan'].isnull()
ate_pies = apple_isnull & pumpkin_isnull & pecan_isnull

In [6]:
print('Out of',len(data),'people,',apple_isnull.value_counts()[1],'ate Apple pie\n')
print('Out of',len(data),'people,',pumpkin_isnull.value_counts()[1],'ate Pumplin pie\n')
print('Out of',len(data),'people,',pecan_isnull.value_counts()[1],'ate Pecan pie\n')
print('Out of',len(data),'people,',ate_pies.value_counts()[1],'ate all the three pies\n')

Out of 1058 people, 544 ate Apple pie

Out of 1058 people, 329 ate Pumplin pie

Out of 1058 people, 716 ate Pecan pie

Out of 1058 people, 182 ate all the three pies



In [7]:
def convert_age(i):
    if pd.isnull(i):
        return(None)
    elif '+' in i:
        return(int(i[:-1]))
    else:
        return(int(i.split(' ')[0]))
int_age = data['Age'].apply(lambda x:convert_age(x))
data['int_age'] = int_age
data.describe()

Unnamed: 0,RespondentID,int_age
count,1058.0,1025.0
mean,4336731000.0,39.383415
std,493783.4,15.398493
min,4335895000.0,18.0
25%,4336339000.0,30.0
50%,4336797000.0,45.0
75%,4337012000.0,60.0
max,4337955000.0,60.0


From the above result it is evident that that the mean age of the respondents is 39, but this cannot be taken into consideration as this is not mean of actual age of the respondents.

## Converting Income to Numeric

In [8]:
def convert_income(x):
    if pd.isnull(x):
        return(None)
    elif x=='Prefer not to answer':
        return(None)
    else:
        temp = x.split(' ')[0]
        return(int(re.sub('[$,]','',temp)))
int_income = data['How much total combined money did all members of your HOUSEHOLD earn last year?'].apply(lambda x:convert_income(x))
data['int_income'] = int_income
data.describe()

Unnamed: 0,RespondentID,int_age,int_income
count,1058.0,1025.0,889.0
mean,4336731000.0,39.383415,74077.615298
std,493783.4,15.398493,59360.742902
min,4335895000.0,18.0,0.0
25%,4336339000.0,30.0,25000.0
50%,4336797000.0,45.0,50000.0
75%,4337012000.0,60.0,100000.0
max,4337955000.0,60.0,200000.0


## Correlating Travel Distance and Income

In [54]:
income_under_150k = data[data['int_income']<150000]
values_under_150k = income_under_150k['How far will you travel for Thanksgiving?'].value_counts()
values_under_150k

Thanksgiving is happening at my home--I won't travel at all                         281
Thanksgiving is local--it will take place in the town I live in                     203
Thanksgiving is out of town but not too far--it's a drive of a few hours or less    150
Thanksgiving is out of town and far away--I have to drive several hours or fly       55
Name: How far will you travel for Thanksgiving?, dtype: int64

In [55]:
percent = (1-values_under_150k[0]/sum(values_under_150k[:]))*100
print('%.2f' % percent, '% people having income less than $150k and will travel to thanksgiving dinner')

59.22 % people having income less than $150k and will travel to thanksgiving dinner


In [58]:
income_over_150k = data[data['int_income']>150000]
values_over_150k =income_over_150k['How far will you travel for Thanksgiving?'].value_counts()
values_over_150k

Thanksgiving is happening at my home--I won't travel at all                         49
Thanksgiving is local--it will take place in the town I live in                     25
Thanksgiving is out of town but not too far--it's a drive of a few hours or less    16
Thanksgiving is out of town and far away--I have to drive several hours or fly      12
Name: How far will you travel for Thanksgiving?, dtype: int64

In [59]:
percent = (1-values_over_150k[0]/sum(values_over_150k[:]))*100
print('%.2f' % percent, '% people having income more then $150k and will travel to thanksgiving dinner')

51.96 % people having income more then $150k and will travel to thanksgiving dinner


In [62]:
print('Around 52% of respondents have income over $150k and will travel to thanksgiving dinner (be it locally, few hours drive, or several hours of drive), while 59% of people having income under $150k will travel to thanksgiving dinner.')

Around 52% of respondents have income over $150k and will travel to thanksgiving dinner (be it locally, few hours drive, or several hours of drive), while 59% of people having income under $150k will travel to thanksgiving dinner.


## Linking Friendship And Age

In [66]:
data.pivot_table(index = 'Have you ever tried to meet up with hometown friends on Thanksgiving night?', 
                 columns = 'Have you ever attended a "Friendsgiving?"', 
                 values = 'int_age')

"Have you ever attended a ""Friendsgiving?""",No,Yes
Have you ever tried to meet up with hometown friends on Thanksgiving night?,Unnamed: 1_level_1,Unnamed: 2_level_1
No,42.283702,37.010526
Yes,41.47541,33.976744


In [67]:
data.pivot_table(index = 'Have you ever tried to meet up with hometown friends on Thanksgiving night?', 
                 columns = 'Have you ever attended a "Friendsgiving?"', 
                 values = 'int_income')

"Have you ever attended a ""Friendsgiving?""",No,Yes
Have you ever tried to meet up with hometown friends on Thanksgiving night?,Unnamed: 1_level_1,Unnamed: 2_level_1
No,78914.549654,72894.736842
Yes,78750.0,66019.736842


It is evident from the above pivot tables, that the younger age group people with lower mean income are more inclined to meet/invite friends over thanksgiving dinner then the older age group respondants who have higher mean income.

## Most Common desert

In [124]:
apple_cobler = data['Which of these desserts do you typically have at Thanksgiving dinner? Please select all that apply.   - Apple cobbler'].value_counts()
blondies = data['Which of these desserts do you typically have at Thanksgiving dinner? Please select all that apply.   - Blondies'].value_counts()
brownies = data['Which of these desserts do you typically have at Thanksgiving dinner? Please select all that apply.   - Brownies'].value_counts()
carrot_cake = data['Which of these desserts do you typically have at Thanksgiving dinner? Please select all that apply.   - Carrot cake'].value_counts()
cheesecake = data['Which of these desserts do you typically have at Thanksgiving dinner? Please select all that apply.   - Cheesecake'].value_counts()
cookies = data['Which of these desserts do you typically have at Thanksgiving dinner? Please select all that apply.   - Cookies'].value_counts()
fudge=data['Which of these desserts do you typically have at Thanksgiving dinner? Please select all that apply.   - Fudge'].value_counts()
icecream = data['Which of these desserts do you typically have at Thanksgiving dinner? Please select all that apply.   - Ice cream'].value_counts()
peach_cobler = data['Which of these desserts do you typically have at Thanksgiving dinner? Please select all that apply.   - Peach cobbler'].value_counts()

In [146]:
dessert_list = [apple_cobler, blondies, brownies, carrot_cake, cheesecake, cookies, fudge, icecream, peach_cobler]
m=0
for dessert in dessert_list:
    if dessert[0] > m:
        m = dessert[0]
        name = dessert
print('Most common dessert in thanksgiving dinner is:%s\n%d people out of %d people' % (name.index[0],m,len(data)))

Most common dessert in thanksgiving dinner is:Ice cream
266 people out of 1058 people


## Working people on Thanksgiving day

In [149]:
data['Will you employer make you work on Black Friday?'].value_counts()

Yes              43
No               20
Doesn't apply     7
Name: Will you employer make you work on Black Friday?, dtype: int64