# Logistic Regression

## Part 1 - Data Preprocessing

### Importing the dataset

In [2]:
import pandas as pd
dataset = pd.read_csv('Customer-survey-data.csv')

In [49]:
dataset.head()

Unnamed: 0,Customer,How satisfied were you with your overall delivery experience at Ali? 1-5 where 1 = extremely dissatisfied and 5 = extremely satisfied,How satisfied were you with the quality of the food at Alis? 1-5 where 1 = extremely dissatisfied and 5 = extremely satisfied,How satisfied were you with the speed of delivery at Alis? 1-5 where 1 = extremely dissatisfied and 5 = extremely satisfied,Was your order accurate? Please respond yes or no.
0,1,5.0,3.0,4.0,Yes
1,2,3.0,4.0,3.0,Yes
2,3,4.0,5.0,2.0,Yes
3,4,5.0,3.0,4.0,Yes
4,5,2.0,5.0,1.0,Yes


In [50]:
# Check for NaN values in the dataset
print("NaN values in dataset:\n", dataset.isna().sum())

NaN values in dataset:
 Customer                                                                                                                                                      0
How satisfied were you with your overall delivery experience at Ali?                    1-5 where 1 = extremely dissatisfied and 5 = extremely satisfied      0
How satisfied were you with the quality of the food at Alis?                             1-5 where 1 = extremely dissatisfied and 5 = extremely satisfied     0
How satisfied were you with the speed of delivery at Alis?                                1-5 where 1 = extremely dissatisfied and 5 = extremely satisfied    0
Was your order accurate? Please respond yes or no.                                                                                                            0
dtype: int64


In [51]:
# Fill missing values (if any)
dataset.fillna(method='ffill', inplace=True)  # Example: forward fill to handle NaN

  dataset.fillna(method='ffill', inplace=True)  # Example: forward fill to handle NaN


In [52]:
# Check for NaN values again after filling or dropping
print("NaN values in dataset after handling:\n", dataset.isna().sum())

NaN values in dataset after handling:
 Customer                                                                                                                                                      0
How satisfied were you with your overall delivery experience at Ali?                    1-5 where 1 = extremely dissatisfied and 5 = extremely satisfied      0
How satisfied were you with the quality of the food at Alis?                             1-5 where 1 = extremely dissatisfied and 5 = extremely satisfied     0
How satisfied were you with the speed of delivery at Alis?                                1-5 where 1 = extremely dissatisfied and 5 = extremely satisfied    0
Was your order accurate? Please respond yes or no.                                                                                                            0
dtype: int64


### Getting the inputs and output

In [53]:
X = dataset.iloc[:, 1:-1].values
y = dataset.iloc[:, -1].values

In [54]:
# Check original shapes
print("Original X shape:", X.shape)
print("Original y shape:", y.shape)


Original X shape: (10616, 3)
Original y shape: (10616,)


In [55]:
X

array([[5., 3., 4.],
       [3., 4., 3.],
       [4., 5., 2.],
       ...,
       [5., 3., 3.],
       [3., 3., 3.],
       [1., 3., 3.]])

In [56]:
y

array(['Yes', 'Yes', 'Yes', ..., 'Yes', 'Yes', 'Yes'], dtype=object)

In [57]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [58]:
X_train

array([[5., 5., 5.],
       [3., 3., 3.],
       [5., 5., 5.],
       ...,
       [5., 3., 3.],
       [3., 5., 3.],
       [1., 3., 5.]])

In [59]:
X_test

array([[3., 3., 3.],
       [2., 2., 5.],
       [4., 3., 3.],
       ...,
       [5., 5., 5.],
       [3., 3., 3.],
       [4., 4., 1.]])

In [60]:
y_train

array(['Yes', 'Yes', 'Yes', ..., 'Yes', 'No', 'Yes'], dtype=object)

In [61]:
y_test

array(['Yes', 'Yes', 'No', ..., 'No', 'Yes', 'Yes'], dtype=object)

In [62]:
# Check shapes after splitting
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)

X_train shape: (8492, 3)
y_train shape: (8492,)


### Feature Scaling

In [63]:
from sklearn.preprocessing import StandardScaler 
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)  # Important to use transform, not fit_transform

In [64]:
X_train

array([[ 1.21762205,  1.20312471,  1.21596761],
       [-0.21999303, -0.22581725, -0.21660688],
       [ 1.21762205,  1.20312471,  1.21596761],
       ...,
       [ 1.21762205, -0.22581725, -0.21660688],
       [-0.21999303,  1.20312471, -0.21660688],
       [-1.6576081 , -0.22581725,  1.21596761]])

## Part 2 - Building and training the model

### Building the model

In [65]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(random_state = 0)

### Training the model

In [66]:
try:
    model.fit(X_train, y_train)
except ValueError as e:
    print("Error during model training:", e)

### Inference

Making the predictons of the data points in the test set

In [67]:
y_pred = model.predict(sc.transform(X_test))

In [70]:
y_pred

array(['Yes', 'Yes', 'Yes', ..., 'Yes', 'Yes', 'Yes'], dtype=object)

In [73]:
model.predict(sc.transform([[1,2,3]]))

array(['Yes'], dtype=object)

## Part 3: Evaluating the model 

### Confusion Matrix

In [74]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_pred)

array([[   0,  573],
       [   0, 1551]])

In [75]:
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

Confusion Matrix:
[[   0  573]
 [   0 1551]]


### Accuracy

In [82]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.730225988700565


In [84]:
sample_prediction = sc.transform([[1, 2, 3]])  # Ensure these values match the feature columns
print("Sample prediction:", model.predict(sample_prediction))

Sample prediction: ['Yes']
