In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss, roc_auc_score, recall_score, precision_score, average_precision_score, f1_score, classification_report, accuracy_score, plot_roc_curve, plot_precision_recall_curve, plot_confusion_matrix

In [2]:
df = pd.read_csv("Desktop/AlaskaProject/Data/seatsurveymodel.csv")

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [3]:
# Drop irrelevant columns
df.drop(['participant.code', 'participant.label','participant._is_bot','participant._max_page_index',
        'participant._current_page_name','participant.visited',
        'participant.mturk_worker_id','participant.mturk_assignment_id','participant.payoff',
        'session.label','session.mturk_HITId','session.mturk_HITGroupId','session.comment', 'session.is_demo',  'session.config.real_world_currency_per_point', 'session.config.participation_fee', 'randseq.1.player.role', 'randseq.1.player.payoff', 'randseq.1.group.id_in_subsession', 'randseq.1.subsession.round_number', 'survey.1.player.id_in_group', 'survey.1.player.role', 'survey.1.player.payoff'], inplace = True, axis = 1
       )

In [4]:
# Rename/shorten column names
df.rename(columns=
{
"session.code": "session_code",    
"participant.id_in_session": "id_in_session",
"participant._index_in_pages": "index_in_pages",
"participant._current_app_name": "current_app_name",
"participant.time_started": "time_started",
"randseq.1.player.id_in_group": "id_in_group",
"randseq.1.player.page_sequence": "page_sequence",

"randseq.1.player.choice1": "choice1",
"randseq.1.player.distance_1": "distance_1",
"randseq.1.player.time_1": "time_1",
"randseq.1.player.price_1": "price_1",
"randseq.1.player.row_1a": "row_1a",
"randseq.1.player.row_1b": "row_1b",
"randseq.1.player.col_1a": "col_1a",
"randseq.1.player.col_1b": "col_1b",
"randseq.1.player.price_1a": "price_1a",
"randseq.1.player.price_1b": "price_1b",
"randseq.1.player.legroom_1a": "legroom_1a",
"randseq.1.player.legroom_1b": "legroom_1b",

"randseq.1.player.choice2": "choice2",
"randseq.1.player.distance_2": "distance_2",
"randseq.1.player.time_2": "time_2",
"randseq.1.player.price_2": "price_2",
"randseq.1.player.row_2a": "row_2a",
"randseq.1.player.row_2b": "row_2b",
"randseq.1.player.col_2a": "col_2a",
"randseq.1.player.col_2b": "col_2b",
"randseq.1.player.price_2a": "price_2a",
"randseq.1.player.price_2b": "price_2b",
"randseq.1.player.legroom_2a": "legroom_2a",
"randseq.1.player.legroom_2b": "legroom_2b",

"randseq.1.player.choice3": "choice3",
"randseq.1.player.distance_3": "distance_3",
"randseq.1.player.time_3": "time_3",
"randseq.1.player.price_3": "price_3",
"randseq.1.player.row_3a": "row_3a",
"randseq.1.player.row_3b": "row_3b",
"randseq.1.player.col_3a": "col_3a",
"randseq.1.player.col_3b": "col_3b",
"randseq.1.player.price_3a": "price_3a",
"randseq.1.player.price_3b": "price_3b",
"randseq.1.player.legroom_3a": "legroom_3a",
"randseq.1.player.legroom_3b": "legroom_3b",

"randseq.1.player.choice4": "choice4",
"randseq.1.player.distance_4": "distance_4",
"randseq.1.player.time_4": "time_4",
"randseq.1.player.price_4": "price_4",
"randseq.1.player.row_4a": "row_4a",
"randseq.1.player.row_4b": "row_4b",
"randseq.1.player.col_4a": "col_4a",
"randseq.1.player.col_4b": "col_4b",
"randseq.1.player.price_4a": "price_4a",
"randseq.1.player.price_4b": "price_4b",
"randseq.1.player.legroom_4a": "legroom_4a",
"randseq.1.player.legroom_4b": "legroom_4b",

"randseq.1.player.choice5": "choice5",
"randseq.1.player.distance_5": "distance_5",
"randseq.1.player.time_5": "time_5",
"randseq.1.player.price_5": "price_5",
"randseq.1.player.row_5a": "row_5a",
"randseq.1.player.row_5b": "row_5b",
"randseq.1.player.col_5a": "col_5a",
"randseq.1.player.col_5b": "col_5b",
"randseq.1.player.price_5a": "price_5a",
"randseq.1.player.price_5b": "price_5b",
"randseq.1.player.legroom_5a": "legroom_5a",
"randseq.1.player.legroom_5b": "legroom_5b",

"randseq.1.player.choice6": "choice6",
"randseq.1.player.distance_6": "distance_6",
"randseq.1.player.time_6": "time_6",
"randseq.1.player.price_6": "price_6",
"randseq.1.player.row_6a": "row_6a",
"randseq.1.player.row_6b": "row_6b",
"randseq.1.player.col_6a": "col_6a",
"randseq.1.player.col_6b": "col_6b",
"randseq.1.player.price_6a": "price_6a",
"randseq.1.player.price_6b": "price_6b",
"randseq.1.player.legroom_6a": "legroom_6a",
"randseq.1.player.legroom_6b": "legroom_6b",

"randseq.1.player.choice7": "choice7",
"randseq.1.player.distance_7": "distance_7",
"randseq.1.player.time_7": "time_7",
"randseq.1.player.price_7": "price_7",
"randseq.1.player.row_7a": "row_7a",
"randseq.1.player.row_7b": "row_7b",
"randseq.1.player.col_7a": "col_7a",
"randseq.1.player.col_7b": "col_7b",
"randseq.1.player.price_7a": "price_7a",
"randseq.1.player.price_7b": "price_7b",
"randseq.1.player.legroom_7a": "legroom_7a",
"randseq.1.player.legroom_7b": "legroom_7b",

"randseq.1.player.choice8": "choice8",
"randseq.1.player.distance_8": "distance_8",
"randseq.1.player.time_8": "time_8",
"randseq.1.player.price_8": "price_8",
"randseq.1.player.row_8a": "row_8a",
"randseq.1.player.row_8b": "row_8b",
"randseq.1.player.col_8a": "col_8a",
"randseq.1.player.col_8b": "col_8b",
"randseq.1.player.price_8a": "price_8a",
"randseq.1.player.price_8b": "price_8b",
"randseq.1.player.legroom_8a": "legroom_8a",
"randseq.1.player.legroom_8b": "legroom_8b",

"randseq.1.player.choice9": "choice9",
"randseq.1.player.distance_9": "distance_9",
"randseq.1.player.time_9": "time_9",
"randseq.1.player.price_9": "price_9",
"randseq.1.player.row_9a": "row_9a",
"randseq.1.player.row_9b": "row_9b",
"randseq.1.player.col_9a": "col_9a",
"randseq.1.player.col_9b": "col_9b",
"randseq.1.player.price_9a": "price_9a",
"randseq.1.player.price_9b": "price_9b",
"randseq.1.player.legroom_9a": "legroom_9a",
"randseq.1.player.legroom_9b": "legroom_9b",

"randseq.1.player.choice10": "choice10",
"randseq.1.player.distance_10": "distance_10",
"randseq.1.player.time_10": "time_10",
"randseq.1.player.price_10": "price_10",
"randseq.1.player.row_10a": "row_10a",
"randseq.1.player.row_10b": "row_10b",
"randseq.1.player.col_10a": "col_10a",
"randseq.1.player.col_10b": "col_10b",
"randseq.1.player.price_10a": "price_10a",
"randseq.1.player.price_10b": "price_10b",
"randseq.1.player.legroom_10a": "legroom_10a",
"randseq.1.player.legroom_10b": "legroom_10b",

"randseq.1.player.choice11": "choice11",
"randseq.1.player.distance_11": "distance_11",
"randseq.1.player.time_11": "time_11",
"randseq.1.player.price_11": "price_11",
"randseq.1.player.row_11a": "row_11a",
"randseq.1.player.row_11b": "row_11b",
"randseq.1.player.col_11a": "col_11a",
"randseq.1.player.col_11b": "col_11b",
"randseq.1.player.price_11a": "price_11a",
"randseq.1.player.price_11b": "price_11b",
"randseq.1.player.legroom_11a": "legroom_11a",
"randseq.1.player.legroom_11b": "legroom_11b",

"randseq.1.player.choice12": "choice12",
"randseq.1.player.distance_12": "distance_12",
"randseq.1.player.time_12": "time_12",
"randseq.1.player.price_12": "price_12",
"randseq.1.player.row_12a": "row_12a",
"randseq.1.player.row_12b": "row_12b",
"randseq.1.player.col_12a": "col_12a",
"randseq.1.player.col_12b": "col_12b",
"randseq.1.player.price_12a": "price_12a",
"randseq.1.player.price_12b": "price_12b",
"randseq.1.player.legroom_12a": "legroom_12a",
"randseq.1.player.legroom_12b": "legroom_12b",

"randseq.1.player.choice13": "choice13",
"randseq.1.player.distance_13": "distance_13",
"randseq.1.player.time_13": "time_13",
"randseq.1.player.price_13": "price_13",
"randseq.1.player.row_13a": "row_13a",
"randseq.1.player.row_13b": "row_13b",
"randseq.1.player.col_13a": "col_13a",
"randseq.1.player.col_13b": "col_13b",
"randseq.1.player.price_13a": "price_13a",
"randseq.1.player.price_13b": "price_13b",
"randseq.1.player.legroom_13a": "legroom_13a",
"randseq.1.player.legroom_13b": "legroom_13b",

"randseq.1.player.choice14": "choice14",
"randseq.1.player.distance_14": "distance_14",
"randseq.1.player.time_14": "time_14",
"randseq.1.player.price_14": "price_14",
"randseq.1.player.row_14a": "row_14a",
"randseq.1.player.row_14b": "row_14b",
"randseq.1.player.col_14a": "col_14a",
"randseq.1.player.col_14b": "col_14b",
"randseq.1.player.price_14a": "price_14a",
"randseq.1.player.price_14b": "price_14b",
"randseq.1.player.legroom_14a": "legroom_14a",
"randseq.1.player.legroom_14b": "legroom_14b",

"randseq.1.player.choice15": "choice15",
"randseq.1.player.distance_15": "distance_15",
"randseq.1.player.time_15": "time_15",
"randseq.1.player.price_15": "price_15",
"randseq.1.player.row_15a": "row_15a",
"randseq.1.player.row_15b": "row_15b",
"randseq.1.player.col_15a": "col_15a",
"randseq.1.player.col_15b": "col_15b",
"randseq.1.player.price_15a": "price_15a",
"randseq.1.player.price_15b": "price_15b",
"randseq.1.player.legroom_15a": "legroom_15a",
"randseq.1.player.legroom_15b": "legroom_15b",

"randseq.1.player.choice16": "choice16",
"randseq.1.player.distance_16": "distance_16",
"randseq.1.player.time_16": "time_16",
"randseq.1.player.price_16": "price_16",
"randseq.1.player.row_16a": "row_16a",
"randseq.1.player.row_16b": "row_16b",
"randseq.1.player.col_16a": "col_16a",
"randseq.1.player.col_16b": "col_16b",
"randseq.1.player.price_16a": "price_16a",
"randseq.1.player.price_16b": "price_16b",
"randseq.1.player.legroom_16a": "legroom_16a",
"randseq.1.player.legroom_16b": "legroom_16b",

"randseq.1.player.choice17": "choice17",
"randseq.1.player.distance_17": "distance_17",
"randseq.1.player.time_17": "time_17",
"randseq.1.player.price_17": "price_17",
"randseq.1.player.row_17a": "row_17a",
"randseq.1.player.row_17b": "row_17b",
"randseq.1.player.col_17a": "col_17a",
"randseq.1.player.col_17b": "col_17b",
"randseq.1.player.price_17a": "price_17a",
"randseq.1.player.price_17b": "price_17b",
"randseq.1.player.legroom_17a": "legroom_17a",
"randseq.1.player.legroom_17b": "legroom_17b",

"randseq.1.player.choice18": "choice18",
"randseq.1.player.distance_18": "distance_18",
"randseq.1.player.time_18": "time_18",
"randseq.1.player.price_18": "price_18",
"randseq.1.player.row_18a": "row_18a",
"randseq.1.player.row_18b": "row_18b",
"randseq.1.player.col_18a": "col_18a",
"randseq.1.player.col_18b": "col_18b",
"randseq.1.player.price_18a": "price_18a",
"randseq.1.player.price_18b": "price_18b",
"randseq.1.player.legroom_18a": "legroom_18a",
"randseq.1.player.legroom_18b": "legroom_18b",

"randseq.1.player.choice19": "choice19",
"randseq.1.player.distance_19": "distance_19",
"randseq.1.player.time_19": "time_19",
"randseq.1.player.price_19": "price_19",
"randseq.1.player.row_19a": "row_19a",
"randseq.1.player.row_19b": "row_19b",
"randseq.1.player.col_19a": "col_19a",
"randseq.1.player.col_19b": "col_19b",
"randseq.1.player.price_19a": "price_19a",
"randseq.1.player.price_19b": "price_19b",
"randseq.1.player.legroom_19a": "legroom_19a",
"randseq.1.player.legroom_19b": "legroom_19b",

"randseq.1.player.choice20": "choice20",
"randseq.1.player.distance_20": "distance_20",
"randseq.1.player.time_20": "time_20",
"randseq.1.player.price_20": "price_20",
"randseq.1.player.row_20a": "row_20a",
"randseq.1.player.row_20b": "row_20b",
"randseq.1.player.col_20a": "col_20a",
"randseq.1.player.col_20b": "col_20b",
"randseq.1.player.price_20a": "price_20a",
"randseq.1.player.price_20b": "price_20b",
"randseq.1.player.legroom_20a": "legroom_20a",
"randseq.1.player.legroom_20b": "legroom_20b",

"randseq.1.player.choice21": "choice21",
"randseq.1.player.distance_21": "distance_21",
"randseq.1.player.time_21": "time_21",
"randseq.1.player.price_21": "price_21",
"randseq.1.player.row_21a": "row_21a",
"randseq.1.player.row_21b": "row_21b",
"randseq.1.player.col_21a": "col_21a",
"randseq.1.player.col_21b": "col_21b",
"randseq.1.player.price_21a": "price_21a",
"randseq.1.player.price_21b": "price_21b",
"randseq.1.player.legroom_21a": "legroom_21a",
"randseq.1.player.legroom_21b": "legroom_21b",

"survey.1.player.age": "age",
"survey.1.player.gender": "gender",
"survey.1.player.feet": "height_feet",
"survey.1.player.inches": "height_inches",
"survey.1.player.rt": "annual_trip_freq",
"survey.1.player.bizlei": "percentage_biz",
"survey.1.player.seattype": "seat_type_preference",
"survey.1.player.section": "section_preference",
"survey.1.player.comment": "comment"

}, inplace=True)

In [5]:
# Filter to keep only completed survey rows
filtered_df = df.query('current_app_name == "survey" and session_code == "s6qkz77s"')
filtered_df.info()
filtered_df.describe()
filtered_df.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 234 entries, 24 to 622
Columns: 270 entries, id_in_session to survey.1.subsession.round_number
dtypes: float64(26), int64(152), object(92)
memory usage: 495.4+ KB


Unnamed: 0,id_in_session,index_in_pages,current_app_name,time_started,session_code,id_in_group,page_sequence,choice1,choice2,distance_1,...,gender,height_feet,height_inches,annual_trip_freq,percentage_biz,seat_type_preference,section_preference,comment,survey.1.group.id_in_subsession,survey.1.subsession.round_number
24,1,23,survey,2021-04-15 23:36:35.096369+00:00,s6qkz77s,1,"[""Q21"", ""Q15"", ""Q4"", ""Q2"", ""Q19"", ""Q12"", ""Q10""...",Seat A,Seat B,3.4,...,Male,5.0,10.0,6.0,25.0,Window,Economy,,1,1
28,5,23,survey,2021-04-16 01:35:43.132533+00:00,s6qkz77s,5,"[""Q7"", ""Q14"", ""Q9"", ""Q3"", ""Q18"", ""Q15"", ""Q21"",...",Seat A,Seat B,1.6,...,Male,5.0,8.0,5.0,40.0,Aisle,First,,1,1
29,6,23,survey,2021-04-16 01:36:06.063812+00:00,s6qkz77s,6,"[""Q3"", ""Q12"", ""Q1"", ""Q21"", ""Q17"", ""Q18"", ""Q11""...",Seat A,Seat B,0.8,...,Female,5.0,8.0,6.0,50.0,Window,Economy,,1,1
38,15,23,survey,2021-04-16 02:40:31.624725+00:00,s6qkz77s,15,"[""Q14"", ""Q17"", ""Q3"", ""Q4"", ""Q9"", ""Q16"", ""Q8"", ...",Seat B,Seat B,2.1,...,Male,5.0,9.0,1.0,0.0,Window,Economy,How do you handle bias from people who have ne...,1,1
39,16,23,survey,2021-04-16 02:55:05.067225+00:00,s6qkz77s,16,"[""Q15"", ""Q3"", ""Q14"", ""Q1"", ""Q20"", ""Q5"", ""Q9"", ...",Seat A,Seat B,2.4,...,Female,5.0,11.0,12.0,9.0,Middle,Premier,,1,1


In [6]:
filtered_df['seat1_a'] = filtered_df['row_1a'].astype(str) + filtered_df['col_1a']
filtered_df['seat1_b'] = filtered_df['row_1b'].astype(str) + filtered_df['col_1b']
filtered_df['seat2_a'] = filtered_df['row_2a'].astype(str) + filtered_df['col_2a']
filtered_df['seat2_b'] = filtered_df['row_2b'].astype(str) + filtered_df['col_2b']
filtered_df['seat3_a'] = filtered_df['row_3a'].astype(str) + filtered_df['col_3a']
filtered_df['seat3_b'] = filtered_df['row_3b'].astype(str) + filtered_df['col_3b']
filtered_df['seat4_a'] = filtered_df['row_4a'].astype(str) + filtered_df['col_4a']
filtered_df['seat4_b'] = filtered_df['row_4b'].astype(str) + filtered_df['col_4b']
filtered_df['seat5_a'] = filtered_df['row_5a'].astype(str) + filtered_df['col_5a']
filtered_df['seat5_b'] = filtered_df['row_5b'].astype(str) + filtered_df['col_5b']
filtered_df['seat6_a'] = filtered_df['row_6a'].astype(str) + filtered_df['col_6a']
filtered_df['seat6_b'] = filtered_df['row_6b'].astype(str) + filtered_df['col_6b']
filtered_df['seat7_a'] = filtered_df['row_7a'].astype(str) + filtered_df['col_7a']
filtered_df['seat7_b'] = filtered_df['row_7b'].astype(str) + filtered_df['col_7b']
filtered_df['seat8_a'] = filtered_df['row_8a'].astype(str) + filtered_df['col_8a']
filtered_df['seat8_b'] = filtered_df['row_8b'].astype(str) + filtered_df['col_8b']
filtered_df['seat9_a'] = filtered_df['row_9a'].astype(str) + filtered_df['col_9a']
filtered_df['seat9_b'] = filtered_df['row_9b'].astype(str) + filtered_df['col_9b']
filtered_df['seat10_a'] = filtered_df['row_10a'].astype(str) + filtered_df['col_10a']
filtered_df['seat10_b'] = filtered_df['row_10b'].astype(str) + filtered_df['col_10b']
filtered_df['seat11_a'] = filtered_df['row_11a'].astype(str) + filtered_df['col_11a']
filtered_df['seat11_b'] = filtered_df['row_11b'].astype(str) + filtered_df['col_11b']
filtered_df['seat12_a'] = filtered_df['row_12a'].astype(str) + filtered_df['col_12a']
filtered_df['seat12_b'] = filtered_df['row_12b'].astype(str) + filtered_df['col_12b']
filtered_df['seat13_a'] = filtered_df['row_13a'].astype(str) + filtered_df['col_13a']
filtered_df['seat13_b'] = filtered_df['row_13b'].astype(str) + filtered_df['col_13b']
filtered_df['seat14_a'] = filtered_df['row_14a'].astype(str) + filtered_df['col_14a']
filtered_df['seat14_b'] = filtered_df['row_14b'].astype(str) + filtered_df['col_14b']
filtered_df['seat15_a'] = filtered_df['row_15a'].astype(str) + filtered_df['col_15a']
filtered_df['seat15_b'] = filtered_df['row_15b'].astype(str) + filtered_df['col_15b']
filtered_df['seat16_a'] = filtered_df['row_16a'].astype(str) + filtered_df['col_16a']
filtered_df['seat16_b'] = filtered_df['row_16b'].astype(str) + filtered_df['col_16b']
filtered_df['seat17_a'] = filtered_df['row_17a'].astype(str) + filtered_df['col_17a']
filtered_df['seat17_b'] = filtered_df['row_17b'].astype(str) + filtered_df['col_17b']
filtered_df['seat18_a'] = filtered_df['row_18a'].astype(str) + filtered_df['col_18a']
filtered_df['seat18_b'] = filtered_df['row_18b'].astype(str) + filtered_df['col_18b']
filtered_df['seat19_a'] = filtered_df['row_19a'].astype(str) + filtered_df['col_19a']
filtered_df['seat19_b'] = filtered_df['row_19b'].astype(str) + filtered_df['col_19b']
filtered_df['seat20_a'] = filtered_df['row_20a'].astype(str) + filtered_df['col_20a']
filtered_df['seat20_b'] = filtered_df['row_20b'].astype(str) + filtered_df['col_20b']
filtered_df['seat21_a'] = filtered_df['row_21a'].astype(str) + filtered_df['col_21a']
filtered_df['seat21_b'] = filtered_df['row_21b'].astype(str) + filtered_df['col_21b']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['seat1_a'] = filtered_df['row_1a'].astype(str) + filtered_df['col_1a']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['seat1_b'] = filtered_df['row_1b'].astype(str) + filtered_df['col_1b']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['seat2_a'] = filtered_df['r

In [7]:
dfbyseatid = filtered_df.melt(id_vars = ['id_in_session','age','gender','height_feet','height_inches','annual_trip_freq','percentage_biz','seat_type_preference','section_preference'], 
                                    value_vars = ['seat1_a', 'seat1_b','seat2_a', 'seat2_b', 'seat3_a', 'seat3_b', 'seat4_a', 'seat4_b',
                                                   'seat5_a', 'seat5_b', 'seat6_a', 'seat6_b', 'seat7_a', 'seat7_b',
                                                   'seat8_a', 'seat8_b', 'seat9_a', 'seat9_b', 'seat10_a', 'seat10_b',
                                                   'seat11_a', 'seat11_b', 'seat12_a', 'seat12_b', 'seat13_a', 'seat13_b',
                                                   'seat14_a', 'seat14_b', 'seat15_a', 'seat15_b', 'seat16_a', 'seat16_b',
                                                   'seat17_a', 'seat17_b', 'seat18_a', 'seat18_b', 'seat19_a', 'seat19_b',
                                                   'seat20_a', 'seat20_b', 'seat21_a', 'seat21_b'],
                                    value_name = 'seat')

In [10]:
dfbyseatid.head()

Unnamed: 0,id_in_session,age,gender,height_feet,height_inches,annual_trip_freq,percentage_biz,seat_type_preference,section_preference,variable,seat
0,1,32.0,Male,5.0,10.0,6.0,25.0,Window,Economy,seat1_a,13E
1,5,28.0,Male,5.0,8.0,5.0,40.0,Aisle,First,seat1_a,13E
2,6,33.0,Female,5.0,8.0,6.0,50.0,Window,Economy,seat1_a,13E
3,15,20.0,Male,5.0,9.0,1.0,0.0,Window,Economy,seat1_a,13E
4,16,53.0,Female,5.0,11.0,12.0,9.0,Middle,Premier,seat1_a,13E


In [11]:
dfbyseatid.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9828 entries, 0 to 9827
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   id_in_session         9828 non-null   int64  
 1   age                   9828 non-null   float64
 2   gender                9828 non-null   object 
 3   height_feet           9828 non-null   float64
 4   height_inches         9828 non-null   float64
 5   annual_trip_freq      9828 non-null   float64
 6   percentage_biz        9828 non-null   float64
 7   seat_type_preference  9828 non-null   object 
 8   section_preference    9828 non-null   object 
 9   variable              9828 non-null   object 
 10  seat                  9828 non-null   object 
dtypes: float64(5), int64(1), object(5)
memory usage: 844.7+ KB
