# PROBLEM STATEMENT

You are working as a car salesman and you would like to develop a model to predict the total dollar amount that customers are willing to pay given the following attributes: 
- Customer Name
- Customer e-mail
- Country
- Gender
- Age
- Annual Salary 
- Credit Card Debt 
- Net Worth 

The model should predict: 
- Car Purchase Amount 

# STEP #0: LIBRARIES IMPORT


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# STEP #1: IMPORT DATASET

In [None]:
# Use this encoding for good results
car_df = pd.read_csv('Car_Purchasing_Data.csv', encoding='ISO-8859-1')

In [None]:
car_df

# STEP #2: VISUALIZE DATASET

In [None]:
sns.pairplot(car_df)

# STEP #3: CREATE TESTING AND TRAINING DATASET/DATA CLEANING

In [None]:
X = car_df.drop(['Customer Name', 'Customer e-mail', 'Country', 'Car Purchase Amount'], axis = 1)

In [None]:
y = car_df['Car Purchase Amount']

#### Normalize the data

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)


In [None]:
# Get max values for scaled features
scaler.data_max_

In [None]:
# Get minimum values for scaled features
scaler.data_min_

In [None]:
y.shape

In [None]:
# Reshape the data to fit sklearn format
y = y.values.reshape(-1,1)

In [None]:
y.shape

In [None]:
# Normalize the target variable
y_scaled = scaler.fit_transform(y)

In [None]:
y_scaled

# STEP#4: TRAINING THE MODEL

In [None]:
# Split the model into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size = 0.33)

In [None]:
import tensorflow.keras
from keras.models import Sequential
from keras.layers import Dense
from sklearn.preprocessing import MinMaxScaler

# Build a fully connected multilayer perceptron model

model = Sequential()
# Use 5 input variables, 25 neurons in first hidden layer, and relu activation function
model.add(Dense(25, input_dim=5, activation='relu'))
# Use 25 neurons in second hidden layer with relu activation function
model.add(Dense(25, activation='relu'))
# Get 1 output with linear activation function to predict value
model.add(Dense(1, activation='linear'))
# Get a summary of the model
model.summary()


In [None]:
model.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
# Train the model
epochs_hist = model.fit(X_train, y_train, epochs=20, batch_size=25,  verbose=1, validation_split=0.2)

# STEP#5: EVALUATING THE MODEL 

In [None]:
# Plot the loss of the model

plt.plot(epochs_hist.history['loss'])
plt.plot(epochs_hist.history['val_loss'])

plt.title('Model Loss Progression During Training/Validation')
plt.ylabel('Training and Validation Losses')
plt.xlabel('Epoch Number')
plt.legend(['Training Loss', 'Validation Loss'])


In [None]:
# Predict the test set
predictions = model.predict(X_test)

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [None]:
# Display our metrics for performance on the test set
print(f"Mean Absolute Error: {mean_absolute_error(y_test, predictions)}")
print(f"Mean Squared Error: {mean_squared_error(y_test, predictions)}")
print(f"Root Mean Squared Error: {(mean_squared_error(y_test, predictions)) ** 0.5}")
print(f"R Squared: {r2_score(y_test, predictions)}")