In [None]:
from google.colab import files
Car_data_file = files.upload()

Saving car_data.csv to car_data.csv


In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input

In [None]:
# We read the dataset into a datframe
dataset = pd.read_csv("car_data.csv")
print(dataset)

     User ID  Gender  Age  AnnualSalary  Purchased
0        385    Male   35         20000          0
1        681    Male   40         43500          0
2        353    Male   49         74000          0
3        895    Male   40        107500          1
4        661    Male   25         79000          0
..       ...     ...  ...           ...        ...
995      863    Male   38         59000          0
996      800  Female   47         23500          0
997      407  Female   28        138500          1
998      299  Female   48        134000          1
999      687  Female   44         73500          0

[1000 rows x 5 columns]


In [None]:
#We now want to change the categorical data into numerical values to feed them into neural network
#We also want to drop the User ID column becasue it is an unnecessary feature
le = LabelEncoder()
dataset["Gender"] = le.fit_transform(dataset["Gender"])
dataset = dataset.drop("User ID", axis=1)
print(dataset)

     Gender  Age  AnnualSalary  Purchased
0         1   35         20000          0
1         1   40         43500          0
2         1   49         74000          0
3         1   40        107500          1
4         1   25         79000          0
..      ...  ...           ...        ...
995       1   38         59000          0
996       0   47         23500          0
997       0   28        138500          1
998       0   48        134000          1
999       0   44         73500          0

[1000 rows x 4 columns]


In [None]:
#We now want to prepare the data for training
'''what is random state'''


X = dataset[['Gender', 'Age', 'AnnualSalary']]
Y = dataset['Purchased']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)


# Standardize the features for better performance
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# We now build the model
# We use Sequential because it allows us to build a "sequential" stack of layers with one input tensor and one output tensor, which is ideal for feed-forward neural networks

model = Sequential()

model.add(Input(shape = (X_train.shape[1],)))

model.add(Dense(units=16, activation='relu'))

model.add(Dense(units=8, activation='relu'))

model.add(Dense(units=1, activation='sigmoid'))

In [None]:
# We now compile the model

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
#We now train the model

model.fit(X_train,Y_train, epochs = 50, batch_size = 10, validation_data=(X_test,Y_test))

Epoch 1/50
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9063 - loss: 0.2514 - val_accuracy: 0.9150 - val_loss: 0.2692
Epoch 2/50
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9122 - loss: 0.2417 - val_accuracy: 0.9150 - val_loss: 0.2715
Epoch 3/50
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9079 - loss: 0.2399 - val_accuracy: 0.9150 - val_loss: 0.2686
Epoch 4/50
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9120 - loss: 0.2427 - val_accuracy: 0.9150 - val_loss: 0.2706
Epoch 5/50
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9083 - loss: 0.2493 - val_accuracy: 0.9150 - val_loss: 0.2691
Epoch 6/50
[1m80/80[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9094 - loss: 0.2276 - val_accuracy: 0.9150 - val_loss: 0.2722
Epoch 7/50
[1m80/80[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7eb7ae72dc60>

In [None]:
# Evaluate the model on the test data
test_loss, test_accuracy = model.evaluate(X_test, Y_test)
print("Test Accuracy:", test_accuracy)

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9131 - loss: 0.2797 
Test Accuracy: 0.9150000214576721


In [None]:
#This is an Interactive demo generated by ChatGPT that'll query the model to get inference based on the input from the user.


def predict_purchase(model, scaler):
    # Step 1: Prompt the user for input
    gender = input("Enter Gender (Male/Female): ")
    age = int(input("Enter Age: "))
    annual_salary = float(input("Enter Annual Salary: "))

    # Step 2: Process the input data
    # Convert gender to numerical value
    if gender.lower() == "male":
        gender = 1
    elif gender.lower() == "female":
        gender = 0
    else:
        print("Invalid input for gender.")
        return

    # Create a DataFrame for the new data
    new_data = pd.DataFrame([[gender, age, annual_salary]], columns=['Gender', 'Age', 'AnnualSalary'])

    # Scale the data (using the same scaler fitted on the training data)
    new_data_scaled = scaler.transform(new_data)

    # Step 3: Make a prediction
    prediction = model.predict(new_data_scaled)

    # Convert the output to a binary class (0 or 1) based on a threshold
    predicted_class = int(prediction > 0.5)

    # Step 4: Display the result
    if predicted_class == 1:
        print("The model predicts that the person will purchase a car.")
    else:
        print("The model predicts that the person will NOT purchase a car.")


predict_purchase(model, scaler)

Enter Gender (Male/Female): Male
Enter Age: 30
Enter Annual Salary: 50000
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
The model predicts that the person will NOT purchase a car.


  predicted_class = int(prediction > 0.5)
