In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Sample dataset
data = {
    'Satisfaction': ['satisfied', 'satisfied', 'satisfied', 'satisfied', 'dissatisfied', 'dissatisfied', 'satisfied'],
    'Customer Type': ['Loyal Customer']*7,
    'Age': [65, 47, 23, 18, 18, 28, 28],
    'Type of Travel': ['PersonalTravel']*7,
    'Class': ['Eco', 'Business', 'Eco', 'Eco', 'Eco', 'Eco', 'Eco'],
    'Flight Distance': [265, 2438, 2138, 2541, 2411, 2541, 3179]
}
df = pd.DataFrame(data)

# Encode categorical variables
label_encoders = {}
for column in ['Satisfaction', 'Customer Type', 'Type of Travel', 'Class']:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le

# Correlation analysis
correlation_matrix = df.corr()
correlation_customer_type = correlation_matrix['Customer Type']
print("Correlation of Customer Type with other features:\n", correlation_customer_type)

# Define features and target
X = df[['Customer Type', 'Age', 'Type of Travel', 'Class', 'Flight Distance']]
y = df['Satisfaction']

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train logistic regression model
model = LogisticRegression()
model.fit(X_scaled, y)

# New instance
new_instance = pd.DataFrame({
    'Customer Type': [label_encoders['Customer Type'].transform(['Loyal Customer'])[0]],
    'Age': [30],
    'Type of Travel': [label_encoders['Type of Travel'].transform(['PersonalTravel'])[0]],
    'Class': [label_encoders['Class'].transform(['Business'])[0]],
    'Flight Distance': [600]
})
new_instance_scaled = scaler.transform(new_instance)

# Predict satisfaction
predicted_satisfaction = model.predict(new_instance_scaled)
predicted_satisfaction_label = label_encoders['Satisfaction'].inverse_transform(predicted_satisfaction)
print("Predicted satisfaction for the new instance:", predicted_satisfaction_label[0])

# Performance measures
y_pred = model.predict(X_scaled)
accuracy = accuracy_score(y, y_pred)
precision = precision_score(y, y_pred)
recall = recall_score(y, y_pred)
f1 = f1_score(y, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")


Correlation of Customer Type with other features:
 Satisfaction      NaN
Customer Type     NaN
Age               NaN
Type of Travel    NaN
Class             NaN
Flight Distance   NaN
Name: Customer Type, dtype: float64
Predicted satisfaction for the new instance: satisfied
Accuracy: 0.71
Precision: 0.71
Recall: 1.00
F1 Score: 0.83
