<a href="https://www.kaggle.com/code/swapnilshivpuje/cars-purchase-decision-logistic-regression?scriptVersionId=143380304" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

## Cars - Purchase Decision Prediction Using Logistic Regression 

#### A purchase decision data set, indicating whether or not a client bought a car
*This dataset contains details of 1000 customers who intend to buy a car, considering their annual salaries.*

#### Columns:
* User ID
* Gender
* Age
* Annual Salary
* Purchase Decision (No = 0; Yes = 1)

### 1. Importing Important Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### 2. Reading CSV File

In [None]:
df = pd.read_csv('/kaggle/input/cars-purchase-decision-dataset/car_data.csv')

### 3. Data Analysis 

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.isna().sum()

In [None]:
df.nunique()

In [None]:
df['Purchased'].value_counts()

In [None]:
df.columns

In [None]:
df = df.drop(['User ID'],axis=1) 

In [None]:
df.columns

In [None]:
df.duplicated().sum()

In [None]:
df = df.drop_duplicates()
df.duplicated().sum()

In [None]:
df["Purchased"].value_counts().plot(kind="pie", autopct="%1.1f%%")

In [None]:
sns.set(style="white")

# Histogram for Insulin
plt.figure(figsize=(9,4))
plt.subplot(1, 2, 1)
sns.histplot(data=df, x='AnnualSalary',bins=5, kde=True)
plt.title('AnnualSalary Distribution')
plt.xlabel('Salary')
plt.ylabel('Frequency')

### 4. Data Preprocessing

In [None]:
X = df.iloc[:,:-1].values
y = df.iloc[:,-1].values

In [None]:
X.shape

In [None]:
y.shape

In [None]:
X[:,]

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

ct = ColumnTransformer(transformers = [('encode',OneHotEncoder(),[0])], remainder='passthrough' )
X = np.array(ct.fit_transform(X))

In [None]:
X

### 5. Splitting Dataset 

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=42)

In [None]:
X_train

### 6. Scaler

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train[:,2:] = sc.fit_transform(X_train[:,2:])
X_test[:,2:] = sc.transform(X_test[:,2:])

In [None]:
X_train[1,:]

### 7. Implementing Logistic Regression Model

In [None]:
from sklearn.linear_model import LogisticRegression
reg = LogisticRegression()
reg.fit(X_train,y_train)

### 8. Predictions

In [None]:
y_pred=reg.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
print("The Accuracy of Model for Prediction is : ",accuracy*100)

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print("The Confusion Matrix of Model is : ",cm)

In [None]:
sns.heatmap(cm,annot=True)

In [None]:
from sklearn.model_selection import cross_val_score
scores = cross_val_score(reg, X_test, y_test, cv=2)
print("The Cross Val Score using CV=2 is : ",scores)