In [1]:
import pandas as pd

file_path = '/Users/xinqiaoyang/Desktop/Data Literacy/project/flying-etiquette.csv'


try:
    df = pd.read_csv(file_path, sep=';')
    print("Shape of the DataFrame:", df.shape)
except Exception as e:
    print("Error reading file:", e)



Shape of the DataFrame: (1040, 27)


In [2]:
# Counting the number of questions in the dataset
# We'll exclude the first column (RespondentID) as it is not a question
num_questions = len(df.columns) - 1

# To find out the number of different types of answers for each question,
# we'll look at the unique values in each column (excluding RespondentID)
# 21 self-report questions plus 5 demographic variable questions
answer_types = {column: df[column].nunique() for column in df.columns if column != 'RespondentID'}
num_questions, answer_types


(26,
 {'How often do you travel by plane?': 6,
  'Do you ever recline your seat when you fly?': 5,
  'How tall are you?': 20,
  'Do you have any children under 18?': 2,
  'In a row of three seats, who should get to use the two arm rests?': 5,
  'In a row of two seats, who should get to use the middle arm rest?': 5,
  'Who should have control over the window shade?': 2,
  'Is itrude to move to an unsold seat on a plane?': 3,
  'Generally speaking, is it rude to say more than a few words tothe stranger sitting next to you on a plane?': 3,
  "On a 6 hour flight from NYC to LA, how many times is it acceptable to get up if you're not in an aisle seat?": 6,
  'Under normal circumstances, does a person who reclines their seat during a flight have any obligation to the person sitting behind them?': 2,
  'Is itrude to recline your seat on a plane?': 3,
  'Given the opportunity, would you eliminate the possibility of reclining seats on planes entirely?': 2,
  'Is it rude to ask someone to switch

In [3]:
# Calculating the number of rows that have at least one 'NaN' value, how to deal with it? big problem, 33% data lost.
num_rows_with_na = df.isna().any(axis=1).sum()
num_rows_with_na

353

In [4]:
# To print out all different types of answers for each question, we'll iterate over each column
# and use the unique() function to get the unique values in each column (excluding RespondentID)

answer_types_detailed = {column: df[column].unique() for column in df.columns if column != 'RespondentID'}

# Displaying the answer types for each question
for question, answers in answer_types_detailed.items():
    print(f"{question}: {answers}\n")

# Note: This will display the unique answers for each question in the dataset.


How often do you travel by plane?: ['Once a year or less' 'Once a month or less' 'Never'
 'A few times per week' 'Every day' 'A few times per month']

Do you ever recline your seat when you fly?: [nan 'About half the time' 'Usually' 'Always' 'Once in a while' 'Never']

How tall are you?: [nan '6\'3"' '5\'8"' '5\'11"' '5\'7"' '5\'9"' '6\'2"' '6\'0"' '5\'6"'
 '6\'1"' '5\'10"' '5\'5"' '6\'4"' '6\'6" and above' 'Under 5 ft.' '6\'5"'
 '5\'4"' '5\'0"' '5\'3"' '5\'1"' '5\'2"']

Do you have any children under 18?: [nan 'Yes' 'No']

In a row of three seats, who should get to use the two arm rests?: [nan 'The arm rests should be shared'
 'Whoever puts their arm on the arm rest first'
 'The person in the middle seat gets both arm rests'
 'Other (please specify)'
 'The people in the aisle and window seats get both arm rests']

In a row of two seats, who should get to use the middle arm rest?: [nan 'The arm rests should be shared' 'The person in aisle'
 'Whoever puts their arm on the arm rest first

In [5]:
# "Yes" represents more polite, and "No" represents less polite. Higher number of coding means more polite, vice versa.
# Excpet "LOCATION" and "TWO ARM RESTS" ,"MIDDLE ARM RESTS" question(not so clear)!
# Reverse coding for "frequency of taking flight",
#"Education" ,
#"Do you ever recline your seat when you fly",
#"On a 6 hour flight from NYC to LA, how many times is it acceptable to get up if you're not in an aisle seat".
# We can calculate a sum score for the politeness.



In [6]:
# Coding the responses according to the new specified rules

# Function to apply the coding rules
def code_responses(row):
    # Binary type coding for Yes/No questions
    binary_map = {'Yes': 1, 'No': 0}
    binary_questions = ['Do you have any children under 18?',
                        'Have you ever used personal electronics during take off or landing in violation of a flight attendant\'s direction?',
                        'Have you ever smoked a cigarette in an airplane bathroom when it was against the rules?',
                       'Given the opportunity, would you eliminate the possibility of reclining seats on planes entirely?']
    for col in binary_questions:
        if row[col] in binary_map:
            row[col] = binary_map[row[col]]

    # Gender coding
    gender_map = {'Male': 0, 'Female': 1}
    if row['Gender'] in gender_map:
        row['Gender'] = gender_map[row['Gender']]

    # Age coding
    age_map = {'18-29': 0, '30-44': 1, '45-60': 2, '>60': 3}
    if row['Age'] in age_map:
        row['Age'] = age_map[row['Age']]

    # Household Income coding
    income_map = {'$0 - $24,999': 0, '$25,000 - $49,999': 1, '$50,000 - $99,999': 2, '$100,000 - $149,999': 3, '150000': 4}
    if row['Household Income'] in income_map:
        row['Household Income'] = income_map[row['Household Income']]

    # Education coding
    education_map = {'Less than high school degree': 0, 'High school degree': 1, 'Some college or Associate degree': 2, 
                     'Bachelor degree': 3, 'Graduate degree': 4}
    if row['Education'] in education_map:
        row['Education'] = education_map[row['Education']]

    # Location coding
    location_map = {'Pacific': 0, 'East North Central': 1, 'New England': 2, 'Mountain': 3, 'South Atlantic': 4, 
                    'East South Central': 5, 'Middle Atlantic': 6, 'West North Central': 7, 'West South Central': 8}
    if row['Location (Census Region)'] in location_map:
        row['Location (Census Region)'] = location_map[row['Location (Census Region)']]

    return row

# Applying the coding rules to the dataframe
coded_df = df.apply(code_responses, axis=1)
coded_df.head()  # Displaying the first few rows of the coded dataframe for inspection


Unnamed: 0,RespondentID,How often do you travel by plane?,Do you ever recline your seat when you fly?,How tall are you?,Do you have any children under 18?,"In a row of three seats, who should get to use the two arm rests?","In a row of two seats, who should get to use the middle arm rest?",Who should have control over the window shade?,Is itrude to move to an unsold seat on a plane?,"Generally speaking, is it rude to say more than a few words tothe stranger sitting next to you on a plane?",...,Is itrude to wake a passenger up if you are trying to walk around?,"In general, is itrude to bring a baby on a plane?","In general, is it rude to knowingly bring unruly children on a plane?",Have you ever used personal electronics during take off or landing in violation of a flight attendant's direction?,Have you ever smoked a cigarette in an airplane bathroom when it was against the rules?,Gender,Age,Household Income,Education,Location (Census Region)
0,3436139758,Once a year or less,,,,,,,,,...,,,,,,,,,,
1,3434278696,Once a year or less,About half the time,"6'3""",1.0,The arm rests should be shared,The arm rests should be shared,Everyone in the row should have some say,"No, not rude at all","No, not at all rude",...,"No, not at all rude","No, not at all rude","No, not at all rude",0.0,0.0,0.0,1.0,,4.0,0.0
2,3434275578,Once a year or less,Usually,"5'8""",0.0,Whoever puts their arm on the arm rest first,The arm rests should be shared,The person in the window seat should have excl...,"No, not rude at all","No, not at all rude",...,"Yes, somewhat rude","Yes, somewhat rude","Yes, very rude",0.0,0.0,0.0,1.0,3.0,3.0,0.0
3,3434268208,Once a year or less,Always,"5'11""",0.0,The arm rests should be shared,The arm rests should be shared,Everyone in the row should have some say,"No, not rude at all","No, not at all rude",...,"Yes, somewhat rude","Yes, somewhat rude","Yes, very rude",0.0,0.0,0.0,1.0,0.0,3.0,0.0
4,3434250245,Once a month or less,About half the time,"5'7""",0.0,The person in the middle seat gets both arm rests,The person in aisle,Everyone in the row should have some say,"No, not rude at all","No, not at all rude",...,"Yes, somewhat rude","Yes, somewhat rude","Yes, very rude",1.0,0.0,0.0,1.0,2.0,3.0,0.0


In [7]:
# Reapplying the additional rules (9 to 11) to the dataset which already has the initial 7 rules applied

# Specific question coding for "On a 6 hour flight from NYC to LA..."
times_map = {
    'More than five times': 0,
    'Four times': 1,
    'Three times': 2,
    'Twice': 3,
    'Once': 4,
    'It is not okay to get up during a flight': 5
}
flight_question = 'On a 6 hour flight from NYC to LA, how many times is it acceptable to get up if you\'re not in an aisle seat?'
coded_df[flight_question] = coded_df[flight_question].map(times_map)

# Coding for "Who should have control over the window shade?"
shade_control_map = {
    'The person in the window seat should have exclusive control': 0,
    'Everyone in the row should have some say': 1
}
shade_question = 'Who should have control over the window shade?'
coded_df[shade_question] = coded_df[shade_question].map(shade_control_map)

# Coding for recline obligation question
recline_obligation_map = {
    'Yes, they should not recline their chair if the person behind them asks them not to': 1,
    'No, the person on the flight has no obligation to the person behind them': 0
}
recline_question = 'Under normal circumstances, does a person who reclines their seat during a flight have any obligation to the person sitting behind them?'
coded_df[recline_question] = coded_df[recline_question].map(recline_obligation_map)

coded_df.head()  # Displaying the first few rows of the fully coded dataframe for inspection


Unnamed: 0,RespondentID,How often do you travel by plane?,Do you ever recline your seat when you fly?,How tall are you?,Do you have any children under 18?,"In a row of three seats, who should get to use the two arm rests?","In a row of two seats, who should get to use the middle arm rest?",Who should have control over the window shade?,Is itrude to move to an unsold seat on a plane?,"Generally speaking, is it rude to say more than a few words tothe stranger sitting next to you on a plane?",...,Is itrude to wake a passenger up if you are trying to walk around?,"In general, is itrude to bring a baby on a plane?","In general, is it rude to knowingly bring unruly children on a plane?",Have you ever used personal electronics during take off or landing in violation of a flight attendant's direction?,Have you ever smoked a cigarette in an airplane bathroom when it was against the rules?,Gender,Age,Household Income,Education,Location (Census Region)
0,3436139758,Once a year or less,,,,,,,,,...,,,,,,,,,,
1,3434278696,Once a year or less,About half the time,"6'3""",1.0,The arm rests should be shared,The arm rests should be shared,1.0,"No, not rude at all","No, not at all rude",...,"No, not at all rude","No, not at all rude","No, not at all rude",0.0,0.0,0.0,1.0,,4.0,0.0
2,3434275578,Once a year or less,Usually,"5'8""",0.0,Whoever puts their arm on the arm rest first,The arm rests should be shared,0.0,"No, not rude at all","No, not at all rude",...,"Yes, somewhat rude","Yes, somewhat rude","Yes, very rude",0.0,0.0,0.0,1.0,3.0,3.0,0.0
3,3434268208,Once a year or less,Always,"5'11""",0.0,The arm rests should be shared,The arm rests should be shared,1.0,"No, not rude at all","No, not at all rude",...,"Yes, somewhat rude","Yes, somewhat rude","Yes, very rude",0.0,0.0,0.0,1.0,0.0,3.0,0.0
4,3434250245,Once a month or less,About half the time,"5'7""",0.0,The person in the middle seat gets both arm rests,The person in aisle,1.0,"No, not rude at all","No, not at all rude",...,"Yes, somewhat rude","Yes, somewhat rude","Yes, very rude",1.0,0.0,0.0,1.0,2.0,3.0,0.0


In [8]:
# Coding the responses for the question "How often do you travel by plane?" according to the specified rule

travel_frequency_map = {
    'Every day': 0,
    'A few times per week': 1,
    'a few times per month': 2,
    'Once a month or less': 3,
    'Once a year or less': 4,
    'Never': 5
}

travel_question = 'How often do you travel by plane?'
coded_df[travel_question] = coded_df[travel_question].map(travel_frequency_map)



In [9]:
# Coding the responses for the question "Do you ever recline your seat when you fly?" according to the specified rule

recline_seat_map = {
    'Always': 0,
    'Usually': 1,
    'About half the time': 2,
    'Once in a while': 3,
    'Never': 4
}

recline_question = 'Do you ever recline your seat when you fly?'
coded_df[recline_question] = coded_df[recline_question].map(recline_seat_map)



In [10]:
# Coding the responses for the specified rudeness-related questions

rudeness_map = {
    'No, not at all rude': 0,
    'Yes, somewhat rude': 1,
    'Yes, very rude': 2
}

rudeness_questions = [
    'Generally speaking, is it rude to say more than a few words tothe stranger sitting next to you on a plane?',
    'Is it rude to ask someone to switch seats with you in order to be closer to friends?',
    'Is itrude to ask someone to switch seats with you in order to be closer to family?',
    'Is it rude to wake a passenger up if you are trying to go to the bathroom?',
    'Is itrude to wake a passenger up if you are trying to walk around?',
    'In general, is itrude to bring a baby on a plane?',
    'In general, is it rude to knowingly bring unruly children on a plane?'
]

for question in rudeness_questions:
    coded_df[question] = coded_df[question].map(rudeness_map)



In [11]:
# Coding the responses for the specified rudeness-related questions

rudeness_map2 = {
    'No, not rude at all': 0,
    'Yes, somewhat rude': 1,
    'Yes, very rude': 2
}

rudeness_questions2 = ['Is itrude to move to an unsold seat on a plane?',
                      'Is itrude to recline your seat on a plane?'
                     ]
for question in rudeness_questions2:
    coded_df[question] = coded_df[question].map(rudeness_map2)


In [12]:
coded_df.to_csv('/Users/xinqiaoyang/Desktop/coded.csv', index=False)