In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.pipeline import Pipeline

In [None]:
df = pd.read_csv('/content/product_purchase_prediction.csv')

In [None]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Customer ID            10 non-null     int64  
 1   Age                    10 non-null     int64  
 2   Annual Income          10 non-null     int64  
 3   Loyalty Score          10 non-null     int64  
 4   Online Visits          10 non-null     int64  
 5   Purchase Frequency     10 non-null     float64
 6   Discount Preference    10 non-null     object 
 7   Previous Product Type  10 non-null     object 
 8   Time Spent on Website  10 non-null     int64  
 9   Region                 10 non-null     object 
 10  Next Purchase          10 non-null     object 
dtypes: float64(1), int64(6), object(4)
memory usage: 1012.0+ bytes
None


In [None]:
print(df.isnull().sum())

Customer ID              0
Age                      0
Annual Income            0
Loyalty Score            0
Online Visits            0
Purchase Frequency       0
Discount Preference      0
Previous Product Type    0
Time Spent on Website    0
Region                   0
Next Purchase            0
dtype: int64


In [None]:
print(df['Next Purchase'].value_counts)

<bound method IndexOpsMixin.value_counts of 0    Electronics
1     Home Goods
2    Electronics
3       Clothing
4     Home Goods
5       Clothing
6     Home Goods
7    Electronics
8    Electronics
9       Clothing
Name: Next Purchase, dtype: object>


In [None]:
df.head()

Unnamed: 0,Customer ID,Age,Annual Income,Loyalty Score,Online Visits,Purchase Frequency,Discount Preference,Previous Product Type,Time Spent on Website,Region,Next Purchase
0,1,25,50000,85,15,2.0,Always,Clothing,12,North,Electronics
1,2,40,75000,65,25,3.0,Sometimes,Electronics,18,South,Home Goods
2,3,30,65000,90,5,1.5,Never,Home Goods,7,East,Electronics
3,4,22,30000,45,12,2.5,Always,Clothing,14,West,Clothing
4,5,55,85000,75,20,3.5,Sometimes,Electronics,16,North,Home Goods


In [None]:
print(df['Next Purchase'].value_counts())

Next Purchase
Electronics    4
Home Goods     3
Clothing       3
Name: count, dtype: int64


In [None]:
df_encoded = pd.get_dummies(df, columns =['Discount Preference', 'Previous Product Type', 'Region'], drop_first=True)

In [None]:
scaler = StandardScaler()
numeric_features = ['Age', 'Annual Income', 'Loyalty Score', 'Online Visits', 'Purchase Frequency', 'Time Spent on Website']
df_encoded[numeric_features] = scaler.fit_transform(df_encoded[numeric_features])


In [None]:
X = df_encoded.drop(columns=['Customer ID', 'Next Purchase'])
y = df_encoded['Next Purchase']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
logreg = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=1000)
logreg.fit(X_train, y_train)



In [None]:
y_pred = logreg.predict(X_test)

In [None]:
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.5


In [None]:
print("\nClassification Report:\n", classification_report(y_test, y_pred))



Classification Report:
               precision    recall  f1-score   support

    Clothing       0.00      0.00      0.00         0
 Electronics       0.00      0.00      0.00         1
  Home Goods       1.00      1.00      1.00         1

    accuracy                           0.50         2
   macro avg       0.33      0.33      0.33         2
weighted avg       0.50      0.50      0.50         2



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Confusion Matrix:
 [[0 0 0]
 [1 0 0]
 [0 0 1]]


In [None]:
logreg_reg = LogisticRegression(multi_class='multinomial', solver='lbfgs', C=0.1, max_iter=1000)
logreg_reg.fit(X_train, y_train)



In [None]:
y_pred_reg = logreg_reg.predict(X_test)

In [None]:
print("Accuracy with Regularization:", accuracy_score(y_test, y_pred_reg))

Accuracy with Regularization: 0.0


In [None]:
print("\nClassification Report with Regularization:\n", classification_report(y_test, y_pred_reg))



Classification Report with Regularization:
               precision    recall  f1-score   support

    Clothing       0.00      0.00      0.00       0.0
 Electronics       0.00      0.00      0.00       1.0
  Home Goods       0.00      0.00      0.00       1.0

    accuracy                           0.00       2.0
   macro avg       0.00      0.00      0.00       2.0
weighted avg       0.00      0.00      0.00       2.0



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
coefficients = pd.DataFrame(logreg.coef_, columns=X.columns, index=logreg.classes_)
print("Feature Coefficients:\n", coefficients)

Feature Coefficients:
                   Age  Annual Income  Loyalty Score  Online Visits  \
Clothing    -0.205067      -0.301480      -0.663973      -0.012309   
Electronics -0.148232       0.141349       0.768780       0.141420   
Home Goods   0.353299       0.160131      -0.104807      -0.129111   

             Purchase Frequency  Time Spent on Website  \
Clothing              -0.213474              -0.019133   
Electronics           -0.190818               0.281124   
Home Goods             0.404291              -0.261991   

             Discount Preference_Never  Discount Preference_Sometimes  \
Clothing                      0.055927                       0.066922   
Electronics                   0.116401                      -0.565935   
Home Goods                   -0.172328                       0.499013   

             Previous Product Type_Electronics  \
Clothing                             -0.088773   
Electronics                           0.033162   
Home Goods          

In [None]:
from sklearn.model_selection import cross_val_score
cv_scores = cross_val_score(logreg, X, y, cv=3)
print("Cross-validation accuracy scores:", cv_scores)
print("Average cross-validation accuracy:", np.mean(cv_scores))

Cross-validation accuracy scores: [0.25       0.66666667 0.        ]
Average cross-validation accuracy: 0.3055555555555555


