In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

# Data
data = {
    'Satisfaction': ['satisfied', 'satisfied', 'satisfied', 'satisfied', 'dissatisfied', 'dissatisfied', 'satisfied'],
    'Customer Type': ['Loyal Customer', 'Loyal Customer', 'Loyal Customer', 'Loyal Customer', 'Loyal Customer', 'Loyal Customer', 'Loyal Customer'],
    'Age': [65, 47, 15, 60, 18, 14, 28],
    'Type of Travel': ['Personal Travel', 'Personal Travel', 'Personal Travel', 'Personal Travel', 'Personal Travel', 'Personal Travel', 'Personal Travel'],
    'Class': ['Eco', 'Business', 'Eco', 'Eco', 'Eco', 'Eco', 'Eco'],
    'Flight Distance': [265, 2464, 2138, 623, 2411, 2541, 3179]
}

df = pd.DataFrame(data)

# Encode categorical variables
le = LabelEncoder()
df['Satisfaction'] = le.fit_transform(df['Satisfaction'])
df['Customer Type'] = le.fit_transform(df['Customer Type'])
df['Type of Travel'] = le.fit_transform(df['Type of Travel'])
df['Class'] = le.fit_transform(df['Class'])

# Split the data into features and target
X = df[['Customer Type', 'Age', 'Type of Travel', 'Class', 'Flight Distance']]
y = df['Satisfaction']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print(f'Classification Report:\n{report}')

# Predict for new instance
new_instance = np.array([[0, 30, 0, 0, 600]])  # Encoded values for [Loyal Customer, 30, Personal Travel, Eco, 600]
new_pred = model.predict(new_instance)
new_pred_label = le.inverse_transform(new_pred)

print(f'Prediction for new instance: {new_pred_label[0]}')

# Correlation matrix
correlation_matrix = df.corr()
print('Correlation matrix:\n', correlation_matrix['Customer Type'])


Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00         2

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2

Prediction for new instance: Eco
Correlation matrix:
 Satisfaction      NaN
Customer Type     NaN
Age               NaN
Type of Travel    NaN
Class             NaN
Flight Distance   NaN
Name: Customer Type, dtype: float64


