# Artificial Neural Network

### Importing the libraries

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [None]:
tf.__version__

'2.17.1'

## Part 1 - Data Preprocessing

### Importing the dataset

In [None]:
dataset = pd.read_csv('Dev_data_to_be_shared.csv')
X = dataset.iloc[:, 2:].values
y = dataset.iloc[:, 1].values

In [None]:
print(X)

[[    nan     nan     nan ...     nan     nan     nan]
 [221000.      0.      0. ...      0.      0.      0.]
 [ 25000.      0.      0. ...     nan     nan     nan]
 ...
 [ 95000.      0.      0. ...     nan     nan     nan]
 [ 88000.      0.      0. ...     nan     nan     nan]
 [ 94000.      0.      0. ...     nan     nan     nan]]


In [None]:
from sklearn.impute import SimpleImputer

# Example for numerical data
imputer = SimpleImputer(strategy='mean')  # or 'median', 'most_frequent'
data_imputed = imputer.fit_transform(X)



In [None]:
from imblearn.over_sampling import SMOTE

# Create SMOTE object
smote = SMOTE(random_state=42)

# Perform oversampling
X_smote, y_smote = smote.fit_resample(data_imputed, y)

In [None]:
print(data_imputed)

[[1.54239120e+05 9.57076900e+00 2.20747468e-03 ... 1.43049096e+00
  1.21447028e-01 1.19207580e-01]
 [2.21000000e+05 0.00000000e+00 0.00000000e+00 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [2.50000000e+04 0.00000000e+00 0.00000000e+00 ... 1.43049096e+00
  1.21447028e-01 1.19207580e-01]
 ...
 [9.50000000e+04 0.00000000e+00 0.00000000e+00 ... 1.43049096e+00
  1.21447028e-01 1.19207580e-01]
 [8.80000000e+04 0.00000000e+00 0.00000000e+00 ... 1.43049096e+00
  1.21447028e-01 1.19207580e-01]
 [9.40000000e+04 0.00000000e+00 0.00000000e+00 ... 1.43049096e+00
  1.21447028e-01 1.19207580e-01]]


In [None]:
print(y)

[1 0 1 ... 1 1 0]


### Encoding categorical data

Label Encoding the "Gender" column

In [None]:
#from sklearn.preprocessing import LabelEncoder
#le = LabelEncoder()
#X[:, 2] = le.fit_transform(X[:, 2])

In [None]:
print(X)

[[619 'France' 0 ... 1 1 101348.88]
 [608 'Spain' 0 ... 0 1 112542.58]
 [502 'France' 0 ... 1 0 113931.57]
 ...
 [709 'France' 0 ... 0 1 42085.58]
 [772 'Germany' 1 ... 1 0 92888.52]
 [792 'France' 0 ... 1 0 38190.78]]


One Hot Encoding the "Geography" column

In [None]:
#from sklearn.compose import ColumnTransformer
#from sklearn.preprocessing import OneHotEncoder
#ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
#X = np.array(ct.fit_transform(X))

In [None]:
print(X)

[[1.0 0.0 0.0 ... 1 1 101348.88]
 [0.0 0.0 1.0 ... 0 1 112542.58]
 [1.0 0.0 0.0 ... 1 0 113931.57]
 ...
 [1.0 0.0 0.0 ... 0 1 42085.58]
 [0.0 1.0 0.0 ... 1 0 92888.52]
 [1.0 0.0 0.0 ... 1 0 38190.78]]


### Splitting the dataset into the Training set and Test set

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_smote, y_smote, test_size = 0.2, random_state = 0)

### Feature Scaling

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## Part 2 - Building the ANN

### Initializing the ANN

In [None]:
ann=tf.keras.models.Sequential()

### Adding the input layer and the first hidden layer

In [None]:
from keras.layers import Dropout
ann.add(tf.keras.layers.Dense(units=6,activation='relu'))
ann.add(Dropout(0.2))

### Adding the second hidden layer

In [None]:
ann.add(tf.keras.layers.Dense(units=6,activation='relu'))
ann.add(Dropout(0.2))

### Adding the output layer

In [None]:
ann.add(tf.keras.layers.Dense(units=1,activation='sigmoid'))

## Part 3 - Training the ANN

### Compiling the ANN

In [None]:
ann.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

### Training the ANN on the Training set

In [None]:
ann.fit(X_train,y_train,batch_size=32,epochs=20)

Epoch 1/20
[1m4772/4772[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 3ms/step - accuracy: 0.7282 - loss: 0.5308
Epoch 2/20
[1m4772/4772[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 3ms/step - accuracy: 0.7965 - loss: 0.4460
Epoch 3/20
[1m4772/4772[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 2ms/step - accuracy: 0.7986 - loss: 0.4449
Epoch 4/20
[1m4772/4772[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3ms/step - accuracy: 0.7966 - loss: 0.4460
Epoch 5/20
[1m4772/4772[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 3ms/step - accuracy: 0.7993 - loss: 0.4445
Epoch 6/20
[1m4772/4772[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 3ms/step - accuracy: 0.7964 - loss: 0.4469
Epoch 7/20
[1m4772/4772[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 3ms/step - accuracy: 0.7984 - loss: 0.4455
Epoch 8/20
[1m4772/4772[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 3ms/step - accuracy: 0.7987 - loss: 0.4419
Epoch 9/20
[1m4

<keras.src.callbacks.history.History at 0x7d0bc5015210>

## Part 4 - Making the predictions and evaluating the model

**Solution**

### Predicting the Test set results

In [None]:
y_pred=ann.predict(X_test)
y_pred=(y_pred>0.5)
print(np.concatenate((y_pred.reshape(len(y_pred),1),y_test.reshape(len(y_test),1)),1))

[1m1193/1193[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step
[[1 1]
 [1 1]
 [1 1]
 ...
 [1 1]
 [1 1]
 [0 0]]


### Making the Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm=confusion_matrix(y_test,y_pred)
print(cm)
accuracy_score(y_test,y_pred)

[[17948  1063]
 [  655 18508]]


0.9549955467071829