In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset
file_path = 'Customer-survey-data.csv'  # Update with your file path if needed
data = pd.read_csv(file_path)

# Rename columns for easier access
data.columns = [
    'Customer',
    'Overall_Delivery_Satisfaction',
    'Food_Quality_Satisfaction',
    'Delivery_Speed_Satisfaction',
    'Order_Accuracy'
]

# Fill missing values in satisfaction columns with median
data.fillna(data[['Overall_Delivery_Satisfaction', 'Food_Quality_Satisfaction', 'Delivery_Speed_Satisfaction']].median(), inplace=True)

# Remove the Order Accuracy column
data.drop(columns=['Order_Accuracy'], inplace=True)

# Create Satisfaction Average and Binary variables
data['Satisfaction_Average'] = data[['Overall_Delivery_Satisfaction', 'Food_Quality_Satisfaction', 'Delivery_Speed_Satisfaction']].mean(axis=1)
data['Satisfaction_Binary'] = (data['Satisfaction_Average'] >= 3).astype(int)

# Select features and target variable for logistic regression
X = data[['Overall_Delivery_Satisfaction', 'Food_Quality_Satisfaction', 'Delivery_Speed_Satisfaction']]
y = data['Satisfaction_Binary']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

# Initialize and fit the logistic regression model
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)

# Make predictions on the test set
y_pred = log_reg.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

# Print results
print("Accuracy:", accuracy)


Accuracy: 1.0


In [16]:
log_reg.predict([[4,3,2]])



array([1])