### **CCT College Dublin**
### **Module Title:** Machine Learning for AI 
### **Lecture Name:** David McQuaid 
### **Continuous Assessment 2**
### **Student Full Name:** Breno Silva Brito
### **Student Number:** 2023462
### **Date of Submission:** ?/05/2024
### **github:** https://github.com/sbritobreno/ML_CA2

### **Neural Networks**

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

import warnings
warnings.filterwarnings("ignore")

# Load the dataset
df = pd.read_csv('./BankRecords.csv')

# Convert binary categorical variables to numerical
df['Personal Loan'] = df['Personal Loan'].apply(lambda x: 1 if x == 'Yes' else 0)
df['Securities Account'] = df['Securities Account'].apply(lambda x: 1 if x == 'Yes' else 0)
df['CD Account'] = df['CD Account'].apply(lambda x: 1 if x == 'Yes' else 0)
df['Online Banking'] = df['Online Banking'].apply(lambda x: 1 if x == 'Yes' else 0)
df['CreditCard'] = df['CreditCard'].apply(lambda x: 1 if x == 'Yes' else 0)

# One-hot encode the 'Education' column
encoder = OneHotEncoder()
encoded_education = encoder.fit_transform(df[['Education']]).toarray()
encoded_education_df = pd.DataFrame(encoded_education, columns=encoder.get_feature_names_out(['Education']))

# Concatenate the one-hot encoded variables with the original DataFrame
df = pd.concat([df, encoded_education_df], axis=1)

# Drop the original 'Education' column and other non-predictive columns
df.drop(['ID', 'Sort Code', 'Education'], axis=1, inplace=True)

# Identify numerical features
numerical_features = ['Age', 'Experience(Years)', 'Credit Score', 'Mortgage(Thousands\'s)']

# Normalize numerical features
scaler = StandardScaler()
df[numerical_features] = scaler.fit_transform(df[numerical_features])

# Define features (X) and target variable (y)
X = df.drop('Income(Thousands\'s)', axis=1)
y = df['Income(Thousands\'s)']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the Neural Network model
model = Sequential()
model.add(Dense(13, input_dim=X_train.shape[1], activation='relu'))  
model.add(Dense(1))  

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
history = model.fit(X_train, y_train, validation_split=0.2, epochs=10, batch_size=10, verbose=0)

# Train a Random Forest Regressor
regressor = RandomForestRegressor(n_estimators=100, random_state=42)
regressor.fit(X_train, y_train)

# Predict and evaluate the Random Forest Regressor
y_pred_rf = regressor.predict(X_test)
mse_rf = mean_squared_error(y_test, y_pred_rf)
r2_rf = r2_score(y_test, y_pred_rf)

print(f'Random Forest Regressor Mean Squared Error: {mse_rf}')
print(f'Random Forest Regressor R-squared: {r2_rf}')

# Predict and evaluate the Neural Network
y_pred_nn = model.predict(X_test)
mse_nn = mean_squared_error(y_test, y_pred_nn)
r2_nn = r2_score(y_test, y_pred_nn)

print(f'Neural Network Mean Squared Error: {mse_nn}')
print(f'Neural Network R-squared: {r2_nn}')

# Example new customer data 
new_customer_data = {
    'Age': 40,
    'Experience(Years)': 15,
    'Family': 2,
    'Credit Score': 1.8,
    'Mortgage(Thousands\'s)': 0,
    'Personal Loan': 0,
    'Securities Account': 1,
    'CD Account': 0,
    'Online Banking': 1,
    'CreditCard': 0,
    'Education_Degree': 0,
    'Education_Diploma': 1,   
    'Education_Masters': 0 
}

# Convert to DataFrame
new_customer_df = pd.DataFrame([new_customer_data])

# Normalize numerical features
new_customer_df[numerical_features] = scaler.transform(new_customer_df[numerical_features])

# Reshape input data to match the neural network model
new_customer_array = new_customer_df.to_numpy()
new_customer_array = new_customer_array.reshape((1, -1))  # Reshaping to have a batch size of 1

# Make predictions
income_prediction_nn = model.predict(new_customer_array)
income_prediction_rf = regressor.predict(new_customer_df)

print(f'Predicted Income (Neural Network): {income_prediction_nn[0][0]}')
print(f'Predicted Income (Random Forest Regressor): {income_prediction_rf[0]}')

Random Forest Regressor Mean Squared Error: 389.3815946716255
Random Forest Regressor R-squared: 0.8163745567116953
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
Neural Network Mean Squared Error: 1089.633411667839
Neural Network R-squared: 0.48614823818206787
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
Predicted Income (Neural Network): 74.59297180175781
Predicted Income (Random Forest Regressor): 67.57


### **Semantic Analysis**