<a href="https://colab.research.google.com/github/mickeykim70/UDEMY_DL_TF/blob/main/Part_1_Artificial_Neural_Network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Artficial Neural Network

## Part 0. Importing the libraries

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [2]:
tf.__version__

'2.18.0'

## Part 1. Data Preprocessing

### Importing & preprocessing the dataset

In [3]:
import os

DATA_PATH = '/content/drive/MyDrive/TF_Ato_Z/Deep Learning A-Z/Part 1 - Artificial Neural Networks (ANN)'

In [4]:
dataset = pd.read_csv(os.path.join(DATA_PATH, 'Churn_Modelling.csv'))

X = dataset.iloc[ : , 3:-1].values
y = dataset.iloc[ : , -1].values

In [5]:
print(X)
print(y)

[[619 'France' 'Female' ... 1 1 101348.88]
 [608 'Spain' 'Female' ... 0 1 112542.58]
 [502 'France' 'Female' ... 1 0 113931.57]
 ...
 [709 'France' 'Female' ... 0 1 42085.58]
 [772 'Germany' 'Male' ... 1 0 92888.52]
 [792 'France' 'Female' ... 1 0 38190.78]]
[1 0 1 ... 1 1 0]


### Taking care of missing data

In [6]:
from sklearn.impute import SimpleImputer

# 결측치 확인
print("=== 수치형 데이터의 결측치 현황 ===")
print(pd.DataFrame(X[:, 3:]).isnull().sum())

# SimpleImputer 적용
imputer = SimpleImputer(missing_values=np.nan, strategy="mean")
imputer.fit(X[:, 3:])
X[:, 3:] = imputer.transform(X[:, 3:])  # 변환된 결과를 다시 X에 저장

=== 수치형 데이터의 결측치 현황 ===
0    0
1    0
2    0
3    0
4    0
5    0
6    0
dtype: int64


### Label Encoding the "Gender" column

In [7]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
X[:, 2] = le.fit_transform(X[:, 2])

print(X)

[[619 'France' 0 ... 1.0 1.0 101348.88]
 [608 'Spain' 0 ... 0.0 1.0 112542.58]
 [502 'France' 0 ... 1.0 0.0 113931.57]
 ...
 [709 'France' 0 ... 0.0 1.0 42085.58]
 [772 'Germany' 1 ... 1.0 0.0 92888.52]
 [792 'France' 0 ... 1.0 0.0 38190.78]]


### One Hot Encodinf for "Geography" column

In [8]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

ct = ColumnTransformer(
    transformers=[("encoder", OneHotEncoder(sparse_output=False), [1])],
    remainder="passthrough"
)

# 변환 적용 -- 한번만 적용되도록 주의
X = ct.fit_transform(X)
print(X[10:12])

[[1.0 0.0 0.0 528 1 31.0 6.0 102016.72 2.0 0.0 0.0 80181.12]
 [0.0 0.0 1.0 497 1 24.0 3.0 0.0 2.0 1.0 0.0 76390.01]]


### 훈련데이터와 검증데이터로 나누기

In [9]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [10]:
print(X_train.dtype)

object


### Feature Scaling

In [11]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

print(X_train[:2])

[[-1.01460667 -0.5698444   1.74309049  0.16958176 -1.09168714 -0.46460796
   0.00666099 -1.21571749  0.8095029   0.64259497 -1.03227043  1.10643166]
 [-1.01460667  1.75486502 -0.57369368 -2.30455945  0.91601335  0.30102557
  -1.37744033 -0.00631193 -0.92159124  0.64259497  0.9687384  -0.74866447]]


## Part 2. Building the ANN

### Initialize ANN

In [12]:
import keras
from keras import models, layers

In [13]:
# Building sequential model

ann = keras.Sequential(
    [
        layers.Dense(6, activation="relu", name="layer1"),
        layers.Dense(6, activation="relu", name="layer2"),
        layers.Dense(1, activation="sigmoid", name="output"),
    ]
)

## Part 3. Training ANN

### Compiling ANN

In [14]:
ann.compile(
    optimizer="rmsprop",
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

### Training ANN

In [15]:
ann.fit(
    X_train,
    y_train,
    batch_size=32,
    epochs=100,
)

Epoch 1/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.6914 - loss: 0.6307
Epoch 2/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.7924 - loss: 0.5011
Epoch 3/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.8018 - loss: 0.4551
Epoch 4/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8154 - loss: 0.4271
Epoch 5/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.8143 - loss: 0.4126
Epoch 6/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.8224 - loss: 0.4064
Epoch 7/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.8151 - loss: 0.4069
Epoch 8/100
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.8103 - loss: 0.4046
Epoch 9/100
[1m250/250[0m [3

<keras.src.callbacks.history.History at 0x7b2a9093ef50>

## Part 4. Prediction

Geo: France, Credit ScoreL: 600, Gender: Male, Age: 40, Tenure: 3 years, Balance: $60,000, No Prods: 2, Credit card: yes, Salary: 50,000

In [22]:
preds = ann.predict(sc.transform([[1., 0., 0., 600, 1, 40, 3, 60000, 2, 1, 1, 50000 ]]))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step


In [24]:
print(preds > 0.5)

[[False]]
