In [1]:
# Import required libraries
from sklearn.decomposition import FactorAnalysis
from factor_analyzer import FactorAnalyzer
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
import pandas as pd
import numpy as np
import seaborn as sns

In [2]:
# Load the airline passenger satisfcation dataset
passengers = pd.read_csv('passengers.csv')

In [3]:
# Subset input and output features
X = passengers.iloc[:,:-1]
y = passengers[['Satisfied']]

In [4]:
# Display columns
X.columns

Index(['Age', 'Flight distance', 'Inflight Wi-Fi service',
       'Convenient arrival/departure time', 'Ease of online booking',
       'Gate location', 'Food and drink', 'Online boarding', 'Seat comfort',
       'Inflight entertainment', 'On-board service', 'Leg room service',
       'Baggage handling', 'Check-in service', 'Inflight service',
       'Cleanliness', 'Departure delay in minutes', 'Gender', 'Customer type',
       'Type of travel', 'Eco', 'Eco plus'],
      dtype='object')

In [5]:
# Display first five instances
X.head()

Unnamed: 0,Age,Flight distance,Inflight Wi-Fi service,Convenient arrival/departure time,Ease of online booking,Gate location,Food and drink,Online boarding,Seat comfort,Inflight entertainment,...,Baggage handling,Check-in service,Inflight service,Cleanliness,Departure delay in minutes,Gender,Customer type,Type of travel,Eco,Eco plus
0,52,160,5,4,3,4,3,4,3,5,...,5,2,5,5,50,0,0,0,1,0
1,36,2863,1,1,3,1,5,4,5,4,...,4,3,4,5,0,0,0,0,0,0
2,20,192,2,0,2,4,2,2,2,2,...,3,2,2,2,0,1,1,0,1,0
3,44,3377,0,0,0,2,3,4,4,1,...,1,3,1,4,0,1,0,0,0,0
4,49,1182,2,3,4,3,4,1,2,2,...,2,4,2,4,0,0,0,0,1,0


In [6]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

In [7]:
# Create data pipeline with the scaler, factor analysis model, and support vector classifier
scaler = StandardScaler()
factor_analysis = FactorAnalysis(rotation='varimax', n_components=6, random_state=123)
clf = SVC(random_state=123)
pipeline_factor_analysis = Pipeline(steps=[('scaler',scaler),('factor_analysis',factor_analysis),('clf',clf)])

In [8]:
# Fit the data pipeline to the training set
pipeline_factor_analysis.fit(X_train, np.ravel(y_train))

In [9]:
# Display accuracy of the classifier using the test set
pipeline_factor_analysis.score(X_test, y_test)

0.7565024903154399

In [10]:
# Create another data pipeline using the FactorAnalyzer() function
factor_analyzer = FactorAnalyzer(rotation='varimax', n_factors=6)
pipeline_factor_analyzer = Pipeline(steps=[('scaler',scaler),('factor_analyzer',factor_analyzer),('clf',clf)])

In [11]:
# Fit the data pipeline that uses FactorAnalyzer to the training set
pipeline_factor_analyzer.fit(X_train,np.ravel(y_train))

In [12]:
# Display accuracy of the classifier using the test set
pipeline_factor_analyzer.score(X_test, y_test)

0.7565024903154399

In [13]:
# Create a matrix of factor loadings
loadings = pd.DataFrame(factor_analyzer.loadings_, 
                        columns=['FA1','FA2','FA3','FA4','FA5','FA6'], index=X.columns)

In [14]:
# Display the factor loadings
np.round(loadings,4)

Unnamed: 0,FA1,FA2,FA3,FA4,FA5,FA6
Age,0.0516,-0.0117,0.0008,-0.0002,0.2264,-0.0145
Flight distance,0.0783,0.1032,-0.0106,0.0071,0.2268,-0.0067
Inflight Wi-Fi service,0.0908,0.113,0.7023,0.0373,0.269,0.0302
Convenient arrival/departure time,-0.0249,0.0493,0.5663,0.0189,-0.0472,-0.0052
Ease of online booking,-0.0481,0.0304,0.856,0.0397,0.2425,0.018
Gate location,-0.0148,-0.0391,0.6097,-0.0045,-0.1374,-0.0167
Food and drink,0.7798,0.0097,0.0453,-0.0408,0.0617,0.0061
Online boarding,0.2246,0.0996,0.237,0.0446,0.8548,0.0457
Seat comfort,0.7256,0.0631,-0.0036,-0.0118,0.3153,-0.0051
Inflight entertainment,0.7738,0.457,0.0521,-0.034,0.0653,-0.0064
