Importing packages

**About Dataset**


**Description:**
This dataset provides insights into consumer electronics sales, featuring product categories, brands, prices, customer demographics, purchase behavior, and satisfaction metrics. It aims to analyze factors influencing purchase intent and customer satisfaction in the consumer electronics market.

**Features:**
**ProductID:** Unique identifier for each product.
**ProductCategory:** Category of the consumer electronics product (e.g., Smartphones, Laptops).
**ProductBrand:** Brand of the product (e.g., Apple, Samsung).
**ProductPrice:** Price of the product ($).
**CustomerAge:** Age of the customer.
**CustomerGender:** Gender of the customer (0 - Male, 1 - Female).
**PurchaseFrequency:** Average number of purchases per year.
**CustomerSatisfaction:** Customer satisfaction rating (1 - 5).
**PurchaseIntent (Target Variable):** Intent to purchase.

In [None]:
#bagging classifier
#LDA
#QDA
#NuSVC


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')


Data loading, data frame and data understanding

In [None]:
df = pd.read_csv('/kaggle/input/consumer-electronics-sales-dataset/consumer_electronics_sales_data.csv')

In [None]:
df.head(10)

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df.duplicated().sum()

Feature Engineering

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['ProductCategory'] = le.fit_transform(df['ProductCategory'])
df['ProductBrand'] = le.fit_transform(df['ProductBrand'])

In [None]:
df.drop('ProductID',axis=1,inplace=True)

In [None]:
sns.boxplot(df)
plt.xticks(rotation=90)
plt.show()

In [None]:
plt.figure(figsize=(12,10))
sns.heatmap(df.corr()*10,annot=True,cmap='coolwarm',linecolor='yellow',linewidths=1)
plt.show()

Train Test Split

In [None]:
X = df.drop('PurchaseIntent',axis=1)
y = df['PurchaseIntent']

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

Bagging CLassifier

In [None]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

base_clf = DecisionTreeClassifier()
bag_clf = BaggingClassifier(base_estimator=base_clf, n_estimators=500, max_samples=0.25, bootstrap=False, random_state=42, n_jobs=-1)
bag_clf.fit(X_train, y_train)

y_pred = bag_clf.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

In [None]:
from sklearn.model_selection import GridSearchCV
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_samples': [0.25, 0.5, 0.75],
    'bootstrap': [False,True],
    'random_state': [42],
    'n_jobs': [-1]
}

grid_search = GridSearchCV(bag_clf, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)


In [None]:
grid_search.predict(X_test)

In [None]:
grid_search.best_params_

In [None]:
grid_search.best_score_

NuSVC

In [None]:
from sklearn.svm import NuSVC
nsc = NuSVC()
nsc.fit(X_train, y_train)
y_pred2= nsc.predict(X_test)

In [None]:
print(accuracy_score(y_test, y_pred2))

Linear Discriminant Analysis

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
lda = LinearDiscriminantAnalysis()
lda.fit(X_train, y_train)

In [None]:
y_pred3 = lda.predict(X_test)

In [None]:
print(accuracy_score(y_test, y_pred3))

In [None]:
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
print(confusion_matrix(y_test, y_pred3))

In [None]:
print(classification_report(y_test, y_pred3))

Ridge CLassifier

In [None]:
# prompt: ridge classifier code

from sklearn.linear_model import RidgeClassifier

ridge_clf = RidgeClassifier()
ridge_clf.fit(X_train, y_train)

y_pred_ridge = ridge_clf.predict(X_test)
accuracy_ridge = accuracy_score(y_test, y_pred_ridge)

print(f"Accuracy (Ridge Classifier): {accuracy_ridge:.4f}")


In [None]:
df.head()

Predicative Model building

In [None]:
input_data = (3,2,312.949668,18,0,2,1)
input_data_as_numpy_array = np.asarray(input_data)
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)
prediction = bag_clf.predict(input_data_reshaped)
print(prediction)
if prediction[0] == 0:
  print('No Purchase')
else:
  print('Purchase')

Pickling the file

In [None]:
import pickle
pickle.dump(bag_clf,open('model.pkl','wb'))