# American Express Data Analysis

## Importing Basic Libraries

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import openpyxl

## Importing dataset

In [2]:
dataset  = pd.read_excel('dataset/AMEX_Credit.xlsx', engine='openpyxl')
X = dataset.iloc[:, 0:-1].values
y = dataset.iloc[:, -1].values

In [3]:
print("X :\n", X)
print("y :\n",y)

X :
 [[553 'Delhi' 'Female' ... 4 1 274150]
 [447 'Bengaluru' 'Male' ... 4 1 519360]
 [501 'Delhi' 'Female' ... 4 1 545501]
 ...
 [627 'Mumbai' 'Female' ... 4 0 494067]
 [600 'Bengaluru' 'Female' ... 2 1 109375]
 [553 'Delhi' 'Male' ... 4 1 180031]]
y :
 [0 0 0 ... 0 1 0]


## Importing LabelEncoder

### Encoding Gender

In [4]:
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
X[:, 2] = label_encoder.fit_transform(X[:, 2])

In [5]:
print("X :\n", X)

X :
 [[553 'Delhi' 0 ... 4 1 274150]
 [447 'Bengaluru' 1 ... 4 1 519360]
 [501 'Delhi' 0 ... 4 1 545501]
 ...
 [627 'Mumbai' 0 ... 4 0 494067]
 [600 'Bengaluru' 0 ... 2 1 109375]
 [553 'Delhi' 1 ... 4 1 180031]]


In [6]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [7]:
print("X :\n", X)

X :
 [[0.0 1.0 0.0 ... 4 1 274150]
 [1.0 0.0 0.0 ... 4 1 519360]
 [0.0 1.0 0.0 ... 4 1 545501]
 ...
 [0.0 0.0 1.0 ... 4 0 494067]
 [1.0 0.0 0.0 ... 2 1 109375]
 [0.0 1.0 0.0 ... 4 1 180031]]


## Spliting dataset into Test and Training Set

In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [9]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## ANN Build Up

In [10]:
ann = tf.keras.models.Sequential()

### Adding 1st layer

In [11]:
ann.add(tf.keras.layers.Dense(units=5, activation='relu'))

### Adding 2nd layer

In [12]:
ann.add(tf.keras.layers.Dense(units=5, activation='relu'))

### Adding output layer

In [13]:
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

### Compiling Model

In [14]:
ann.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

### Training Model

In [15]:
ann.fit(X_train, y_train, batch_size=32, epochs=120)

Epoch 1/120
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.4164 - loss: 0.8582
Epoch 2/120
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8029 - loss: 0.5165
Epoch 3/120
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7953 - loss: 0.4746
Epoch 4/120
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7959 - loss: 0.4536
Epoch 5/120
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7889 - loss: 0.4472
Epoch 6/120
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7966 - loss: 0.4361
Epoch 7/120
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7953 - loss: 0.4353
Epoch 8/120
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7986 - loss: 0.4311
Epoch 9/120
[1m249/249[0m [32

<keras.src.callbacks.history.History at 0x1dcd1a333d0>

### Predicting Sample Output

In [16]:
print(ann.predict(sc.transform([[0.0, 1.0, 0.0, 501, 0, 32, 2, 0.0, 4, 1, 545501]])) > 0.5)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 109ms/step
[[False]]


In [17]:
y_pred = ann.predict(X_test)
y_pred = (y_pred > 0.5)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [18]:
print(np.concatenate((y_pred.reshape(len(y_pred), 1), y_test.reshape(len(y_test), 1)), 1))

[[0 1]
 [0 0]
 [0 0]
 ...
 [0 1]
 [0 0]
 [0 1]]


## Generating Confusion Matrix and Accuracy Score

In [19]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
ac = accuracy_score(y_test, y_pred)
print("Confusion Matrix =\n", cm)
print("Accuracy Score = ", ac)

Confusion Matrix =
 [[1491   82]
 [ 240  173]]
Accuracy Score =  0.837865055387714
