In [1]:
import pandas as pd

data = pd.read_csv("../Data/gpascore.csv")
data.head()

Unnamed: 0,admit,gre,gpa,rank
0,0,380.0,3.21,3
1,1,660.0,3.67,3
2,1,800.0,4.0,1
3,1,640.0,3.19,4
4,0,520.0,2.33,4


- admit : 합격여부
- gre : 영어성적(Graduate Record Examination)
- gpa : 학점 
- rank : 지원한 대학원 수준 

#### Preprocessing(전처리)

In [2]:
# 결측치 처리
data.isnull().sum()

admit    0
gre      1
gpa      0
rank     0
dtype: int64

In [3]:
# 결측치 데이터 삭제
data.dropna(inplace=True)

#### 결측치 처리시 다른값으로 대체
data.fillna(100)

In [4]:
# 제거 확인
data.isnull().sum()

admit    0
gre      0
gpa      0
rank     0
dtype: int64

In [8]:
# rank의 종류
sorted(data['rank'].unique())

[1, 2, 3, 4]

In [10]:
# 영어점수의 최소값과 최대값
print('최대값:', data['gre'].max())
print('최소값:', data['gre'].min())

최대값: 800.0
최소값: 220.0


In [None]:
# admit 컬럼의 target의 가능 여부 확인
sorted(data['admit'].unique())

[0, 1]

In [16]:
data[data['admit'] == 1].count() / data[data['admit'] == 0].count()

admit    0.897321
gre      0.897321
gpa      0.897321
rank     0.897321
dtype: float64

> 합격, 불합격 비율이 약 90%이므로 Target Data로 충분하다.

#### Train과 Target 나누기

In [17]:
x = data[['gre', 'gpa', 'rank']]
x.head()

Unnamed: 0,gre,gpa,rank
0,380.0,3.21,3
1,660.0,3.67,3
2,800.0,4.0,1
3,640.0,3.19,4
4,520.0,2.33,4


In [18]:
y = data['admit']
y.head()

0    0
1    1
2    1
3    1
4    0
Name: admit, dtype: int64

In [19]:
# 확인
print(x.count())
print(y.count())

gre     425
gpa     425
rank    425
dtype: int64
425


#### Deep Learning Model 만들기

In [20]:
from tensorflow import keras
from tensorflow.keras.layers import Input

#### Model 만들기

In [39]:
model = keras.Sequential()

# 입력층
model.add(
    Input(shape=(3,))
)

# 은닉층 : 1번 Hidden Layer 만들기
model.add(
    keras.layers.Dense(
        # 임의로 넣기 : 2의 배수
        256,
        activation='tanh'
        # sigmoid, tanh, relu, softmax, leakyRelu
    )
)

# 은닉층 : 2번 Hidden Layer 만들기
model.add(
    keras.layers.Dense(
        # 임의로 넣기 : 2의 배수
        128,
        activation='tanh'
        # sigmoid, tanh, relu, softmax, leakyRelu
    )
)

# 출력층
model.add(
    keras.layers.Dense(
        1,
        activation='sigmoid'
    )
)

#### 손실함수

In [40]:
model.compile(
    loss='binary_crossentropy',
    metrics=['accuracy'],
    optimizer='adam'
)

# optimizer : learning rate를 알맞게 조정해줌 
# : adam, adagrad, adadelta, rmsprop, sgd

#### 학습 시키기

In [41]:
import numpy as np

model.fit(
    np.array(x),
    np.array(y),
    epochs=1000,
    verbose=1 # 0으로 하면 log가 않보임 
)

Epoch 1/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5107 - loss: 0.7620  
Epoch 2/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4814 - loss: 0.6985 
Epoch 3/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5425 - loss: 0.7003 
Epoch 4/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5418 - loss: 0.7371 
Epoch 5/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5119 - loss: 0.7179 
Epoch 6/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5616 - loss: 0.6894 
Epoch 7/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5399 - loss: 0.6864 
Epoch 8/1000
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4907 - loss: 0.6950 
Epoch 9/1000
[1m14/14[0m [32

<keras.src.callbacks.history.History at 0x1cc3fa285f0>

#### 예측하기

In [37]:
# gre : 700, 학점 : 3.7, Rank 가능학교

model.predict(
    np.array(
        [
            [700, 3.7, 1],
            [700, 3.7, 2],
            [700, 3.7, 3],
            [700, 3.7, 4]
        ]
    )
)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step


array([[0.6857208 ],
       [0.66308177],
       [0.6372346 ],
       [0.6078747 ]], dtype=float32)

In [38]:
# gre : 400, 학점 : 2.2, Rank 가능학교

model.predict(
    np.array(
        [
            [400, 2.2, 1],
            [400, 2.2, 2],
            [400, 2.2, 3],
            [400, 2.2, 4]
        ]
    )
)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step


array([[0.01716779],
       [0.01426585],
       [0.01191138],
       [0.01002006]], dtype=float32)