<a href="https://colab.research.google.com/github/kihoon71/mixed_precision_numpy/blob/main/single_precision.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Mixed Precision

mixed precision을 numpy로 간단하게 재현하기 위한 노트북입니다.numpy로 된 MLP 코드와 학습용 데이터셋을 통해 간단하게, 재현해보도록 하겠습니다.

In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import seaborn as sns

In [11]:
# 해당 코드는 밑바닥부터 딥러닝과 gpt를 통해 생성된 코드입니다.
class MLP:
    def __init__(self, input_size, hidden_sizes, output_size):
        self.input_size = input_size
        self.hidden_sizes = hidden_sizes
        self.output_size = output_size

        # 가중치 초기화
        layer_sizes = [input_size] + hidden_sizes + [output_size]
        self.weights = [np.random.randn(layer_sizes[i], layer_sizes[i+1]) for i in range(len(layer_sizes) - 1)]
        self.biases = [np.random.randn(1, layer_sizes[i+1]) for i in range(len(layer_sizes) - 1)]

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, x):
        return x * (1 - x)

    def softmax(self, x):
        exp_values = np.exp(x - np.max(x, axis=-1, keepdims=True))
        return exp_values / np.sum(exp_values, axis=-1, keepdims=True)

    def softmax_derivative(self, x):
        s = x.reshape(-1, 1)
        return np.diagflat(s) - np.dot(s, s.T)

    def forward(self, x):
        # 순전파 계산
        activations = [x] # input x와 계산된 x가 차례대로 들어가게 됨
        weighted_inputs = []

        for i in range(len(self.weights)):
            weighted_input = np.dot(activations[-1], self.weights[i]) + self.biases[i] # matmul + bias
            weighted_inputs.append(weighted_input)
            if i == len(self.weights) - 1: # 마지막 출력층일 경우
                activation = self.softmax(weighted_input)
            else: # 활성화 함수
                activation = self.sigmoid(weighted_input)
            activations.append(activation)

        return activations, weighted_inputs

    def compute_loss(self, y_true, y_pred):
        # 크로스 엔트로피 손실 계산
        return -np.sum(y_true * np.log(y_pred))

    def compute_output_error(self, y_true, y_pred):
        # 출력층 오차 계산
        return y_true - y_pred

    def compute_hidden_error(self, next_layer_error, next_layer_weights, current_layer_output):
        # 은닉층 오차 계산
        return np.dot(next_layer_error, next_layer_weights.T) * self.sigmoid_derivative(current_layer_output)

    def update_weights(self, activations, errors, learning_rate):
        # 가중치 업데이트
        for i in range(len(self.weights)):
            self.weights[i] += learning_rate * activations[i].T.dot(errors[i])
            self.biases[i] += learning_rate * np.sum(errors[i], axis=0)

    def train(self, X, y, epochs, learning_rate):
        for epoch in range(epochs):
            total_loss = 0
            for i in range(len(X)):
                x = X[i:i+1]
                target = y[i:i+1]

                # 순전파
                activations, weighted_inputs = self.forward(x)

                # 오차 계산
                output_error = self.compute_output_error(target, activations[-1])


                # 역전파를 위한 오차들 초기화
                errors = [output_error]

                # 은닉층 오차 계산
                for j in range(len(self.weights) - 1, 0, -1):
                    error = self.compute_hidden_error(errors[-1], self.weights[j], activations[j])
                    errors.append(error)

                errors.reverse()

                # 가중치 업데이트
                self.update_weights(activations, errors, learning_rate)

                # 손실 계산
                loss = self.compute_loss(target, activations[-1])
                total_loss += loss

            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {total_loss / len(X)}")

    def predict(self, x):
        activations, _ = self.forward(x)
        return activations[-1]


In [9]:
# 데이터 불러오기
df = sns.load_dataset('titanic')

print(df.columns)

# 필요한 특성 선택
selected_features = list(map(lambda x: x.lower(), ['Pclass', 'Age', 'SibSp', 'Parch', 'Fare', 'Sex', 'Embarked', 'Survived']))
df = df[selected_features]

# 결측치 처리
df = df.dropna()

# 범주형 데이터 처리 (One-hot encoding)
df = pd.get_dummies(df, columns=['sex', 'embarked'])

# 입력(X)과 출력(y) 분리
X = df.drop('survived', axis=1).values
y = pd.get_dummies(df['survived']).values  # 생존 여부를 One-hot encoding으로 변환

# 데이터 정규화
scaler = StandardScaler()
X = scaler.fit_transform(X)

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# MLP 모델 생성
mlp = MLP(input_size=X.shape[1], hidden_sizes=[8], output_size=y.shape[1])  # 출력층 크기는 클래스 개수에 맞게 설정

# 학습
mlp.train(X_train, y_train, epochs=1000, learning_rate=0.1)

# 테스트 데이터에 대한 예측
predictions = mlp.predict(X_test)

# 예측 정확도 계산
accuracy = np.mean(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1))
print(f"Accuracy on test set: {accuracy * 100:.2f}%")


Index(['survived', 'pclass', 'sex', 'age', 'sibsp', 'parch', 'fare',
       'embarked', 'class', 'who', 'adult_male', 'deck', 'embark_town',
       'alive', 'alone'],
      dtype='object')
Epoch 0, Loss: 0.5358807230140038
Epoch 100, Loss: 0.36140190917683623
Epoch 200, Loss: 0.3451140471529232
Epoch 300, Loss: 0.3363091962599866
Epoch 400, Loss: 0.33135929390979374
Epoch 500, Loss: 0.32818483634017204
Epoch 600, Loss: 0.3259232883381506
Epoch 700, Loss: 0.32400008998840457
Epoch 800, Loss: 0.3221611464043902
Epoch 900, Loss: 0.32053412163797135
Accuracy on test set: 75.52%
