# Analysis of Thanksgiving Responses to interpret user preferences basing on Gender/Age and Place of stay

In [2]:
import pandas as pd
import numpy as np

data = pd.read_csv("thanksgiving.csv",encoding="Latin-1")
#print(data.head(2))

In [3]:
#Print the Columns in the data
print(data.columns)
# print(data.shape)
data["Do you celebrate Thanksgiving?"].value_counts()

#filter data basing on response to 'Do you celebrate Thanksgiving'
data = data[data["Do you celebrate Thanksgiving?"] == 'Yes']

Index(['RespondentID', 'Do you celebrate Thanksgiving?',
       'What is typically the main dish at your Thanksgiving dinner?',
       'What is typically the main dish at your Thanksgiving dinner? - Other (please specify)',
       'How is the main dish typically cooked?',
       'How is the main dish typically cooked? - Other (please specify)',
       'What kind of stuffing/dressing do you typically have?',
       'What kind of stuffing/dressing do you typically have? - Other (please specify)',
       'What type of cranberry saucedo you typically have?',
       'What type of cranberry saucedo you typically have? - Other (please specify)',
       'Do you typically have gravy?',
       'Which of these side dishes aretypically served at your Thanksgiving dinner? Please select all that apply. - Brussel sprouts',
       'Which of these side dishes aretypically served at your Thanksgiving dinner? Please select all that apply. - Carrots',
       'Which of these side dishes aretypically served

# Use Value_Counts to explore data

In [4]:
print(data["What is typically the main dish at your Thanksgiving dinner?"].value_counts())

Turkey                    859
Other (please specify)     35
Ham/Pork                   29
Tofurkey                   20
Chicken                    12
Roast beef                 11
I don't know                5
Turducken                   3
Name: What is typically the main dish at your Thanksgiving dinner?, dtype: int64


In [5]:
data[data["What is typically the main dish at your Thanksgiving dinner?"]=="Tofurkey"]["Do you typically have gravy?"].value_counts()

Yes    12
No      8
Name: Do you typically have gravy?, dtype: int64

# No of people not eating any Pies

In [6]:
apple_isnull = data["Which type of pie is typically served at your Thanksgiving dinner? Please select all that apply. - Apple"].isnull()
pecan_isnull = data["Which type of pie is typically served at your Thanksgiving dinner? Please select all that apply. - Pumpkin"].isnull()
pumpkin_isnull = data["Which type of pie is typically served at your Thanksgiving dinner? Please select all that apply. - Pumpkin"].isnull()

ate_pies = apple_isnull & pecan_isnull & pumpkin_isnull
ate_pies.value_counts()

False    831
True     149
dtype: int64

# Converting age to Numeric

In [7]:
def convert2int(x):
    try:
        if pd.isnull(x):
            return None
        else:
            xlist = x.split(" ")
            xlist[0] = xlist[0].replace("+","")
            return int(xlist[0])
    except:
        return None

data["int_age"] = data["Age"].apply(convert2int)
data["int_age"].describe()
#data["Age"].value_counts()
#data["int_age"].value_counts()

count    947.000000
mean      40.089757
std       15.352014
min       18.000000
25%       30.000000
50%       45.000000
75%       60.000000
max       60.000000
Name: int_age, dtype: float64

# Converting income to Numeric

In [8]:
def convertincometoint(x):
    try:
        if pd.isnull(x):
            return None
        elif x == "Prefer":
            return None
        else:
            xlist = x.split(" ")
            xlist[0] = xlist[0].replace("$","")
            xlist[0] = xlist[0].replace(",","")
            return int(xlist[0])
    except:
        return None
    
data["int_income"] = data["How much total combined money did all members of your HOUSEHOLD earn last year?"].apply(convertincometoint)
data["int_income"].describe()

count       829.000000
mean      75965.018094
std       59068.636748
min           0.000000
25%       25000.000000
50%       75000.000000
75%      100000.000000
max      200000.000000
Name: int_income, dtype: float64

In [12]:
data[data["int_income"]<150000]["How far will you travel for Thanksgiving?"].value_counts()/len(data[data["int_income"]<150000])

Thanksgiving is happening at my home--I won't travel at all                         0.407837
Thanksgiving is local--it will take place in the town I live in                     0.294630
Thanksgiving is out of town but not too far--it's a drive of a few hours or less    0.217707
Thanksgiving is out of town and far away--I have to drive several hours or fly      0.079826
Name: How far will you travel for Thanksgiving?, dtype: float64

In [13]:
data[data["int_income"]>=150000]["How far will you travel for Thanksgiving?"].value_counts()/len(data[data["int_income"]>=150000])

Thanksgiving is happening at my home--I won't travel at all                         0.471429
Thanksgiving is local--it will take place in the town I live in                     0.242857
Thanksgiving is out of town but not too far--it's a drive of a few hours or less    0.178571
Thanksgiving is out of town and far away--I have to drive several hours or fly      0.107143
Name: How far will you travel for Thanksgiving?, dtype: float64

# Linking Friendship and age

In [16]:
data.pivot_table(
    index="Have you ever tried to meet up with hometown friends on Thanksgiving night?", 
    columns='Have you ever attended a "Friendsgiving?"',
    values="int_age"
)

"Have you ever attended a ""Friendsgiving?""",No,Yes
Have you ever tried to meet up with hometown friends on Thanksgiving night?,Unnamed: 1_level_1,Unnamed: 2_level_1
No,42.283702,37.010526
Yes,41.47541,33.976744


In [20]:
data.pivot_table(
    index="Have you ever tried to meet up with hometown friends on Thanksgiving night?", 
    columns='Have you ever attended a "Friendsgiving?"',
    values="int_income",
    aggfunc=np.mean
)

"Have you ever attended a ""Friendsgiving?""",No,Yes
Have you ever tried to meet up with hometown friends on Thanksgiving night?,Unnamed: 1_level_1,Unnamed: 2_level_1
No,78914.549654,72894.736842
Yes,78750.0,66019.736842
