## Import Library

In [44]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
import numpy as np

In [45]:
dataset = pd.read_csv("Korean_demographics.csv")
dataset

Unnamed: 0,Date,Region,Birth,Birth_rate,Death,Death_rate,Divorce,Divorce_rate,Marriage,Marriage_rate,Natural_growth,Natural_growth_rate
0,1/1/2000,Busan,3752.0,11.61,1875.0,5.8,814.0,2.5,2435.0,7.5,1877.0,5.8
1,1/1/2000,Chungcheongbuk-do,1903.0,15.06,924.0,7.3,220.0,1.7,828.0,6.6,979.0,7.7
2,1/1/2000,Chungcheongnam-do,2398.0,14.75,1466.0,9.0,321.0,2.0,1055.0,6.5,932.0,5.7
3,1/1/2000,Daegu,3057.0,14.39,1117.0,5.3,422.0,2.0,1577.0,7.4,1940.0,9.1
4,1/1/2000,Daejeon,1859.0,16.08,565.0,4.9,280.0,2.4,868.0,7.5,1294.0,11.2
...,...,...,...,...,...,...,...,...,...,...,...,...
4855,6/1/2022,Jeollanam-do,565.0,3.80,1369.0,9.1,299.0,2.0,479.0,3.2,-805.0,-5.4
4856,6/1/2022,Sejong,248.0,7.90,106.0,3.4,43.0,1.4,123.0,3.9,141.0,4.5
4857,6/1/2022,Seoul,3137.0,4.10,3631.0,4.7,1088.0,1.4,2630.0,3.4,-494.0,-0.6
4858,6/1/2022,Ulsan,443.0,4.80,434.0,4.7,154.0,1.7,316.0,3.5,9.0,0.1


In [46]:
dataset.dtypes

Date                    object
Region                  object
Birth                  float64
Birth_rate             float64
Death                  float64
Death_rate             float64
Divorce                float64
Divorce_rate           float64
Marriage               float64
Marriage_rate          float64
Natural_growth         float64
Natural_growth_rate    float64
dtype: object

In [47]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4860 entries, 0 to 4859
Data columns (total 12 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Date                 4860 non-null   object 
 1   Region               4860 non-null   object 
 2   Birth                4716 non-null   float64
 3   Birth_rate           4709 non-null   float64
 4   Death                4716 non-null   float64
 5   Death_rate           4709 non-null   float64
 6   Divorce              4716 non-null   float64
 7   Divorce_rate         4709 non-null   float64
 8   Marriage             4716 non-null   float64
 9   Marriage_rate        4709 non-null   float64
 10  Natural_growth       4716 non-null   float64
 11  Natural_growth_rate  4709 non-null   float64
dtypes: float64(10), object(2)
memory usage: 455.8+ KB


In [48]:
dataset.isnull().sum()

Date                     0
Region                   0
Birth                  144
Birth_rate             151
Death                  144
Death_rate             151
Divorce                144
Divorce_rate           151
Marriage               144
Marriage_rate          151
Natural_growth         144
Natural_growth_rate    151
dtype: int64

## Data Cleaning

In [49]:
from sklearn.impute import SimpleImputer 
dataset

Unnamed: 0,Date,Region,Birth,Birth_rate,Death,Death_rate,Divorce,Divorce_rate,Marriage,Marriage_rate,Natural_growth,Natural_growth_rate
0,1/1/2000,Busan,3752.0,11.61,1875.0,5.8,814.0,2.5,2435.0,7.5,1877.0,5.8
1,1/1/2000,Chungcheongbuk-do,1903.0,15.06,924.0,7.3,220.0,1.7,828.0,6.6,979.0,7.7
2,1/1/2000,Chungcheongnam-do,2398.0,14.75,1466.0,9.0,321.0,2.0,1055.0,6.5,932.0,5.7
3,1/1/2000,Daegu,3057.0,14.39,1117.0,5.3,422.0,2.0,1577.0,7.4,1940.0,9.1
4,1/1/2000,Daejeon,1859.0,16.08,565.0,4.9,280.0,2.4,868.0,7.5,1294.0,11.2
...,...,...,...,...,...,...,...,...,...,...,...,...
4855,6/1/2022,Jeollanam-do,565.0,3.80,1369.0,9.1,299.0,2.0,479.0,3.2,-805.0,-5.4
4856,6/1/2022,Sejong,248.0,7.90,106.0,3.4,43.0,1.4,123.0,3.9,141.0,4.5
4857,6/1/2022,Seoul,3137.0,4.10,3631.0,4.7,1088.0,1.4,2630.0,3.4,-494.0,-0.6
4858,6/1/2022,Ulsan,443.0,4.80,434.0,4.7,154.0,1.7,316.0,3.5,9.0,0.1


In [50]:
imputer = SimpleImputer(strategy='mean')

In [51]:
dataset['Birth_rate'] = imputer.fit_transform(dataset[['Birth_rate']])
dataset['Death_rate'] = imputer.fit_transform(dataset[['Death_rate']])
dataset['Divorce_rate'] = imputer.fit_transform(dataset[['Divorce_rate']])
dataset['Marriage_rate'] = imputer.fit_transform(dataset[['Marriage_rate']])
dataset['Natural_growth_rate'] = imputer.fit_transform(dataset[['Natural_growth_rate']])

In [52]:
dataset.isna().sum()

Date                     0
Region                   0
Birth                  144
Birth_rate               0
Death                  144
Death_rate               0
Divorce                144
Divorce_rate             0
Marriage               144
Marriage_rate            0
Natural_growth         144
Natural_growth_rate      0
dtype: int64

In [53]:
imputer = SimpleImputer(strategy='median')

In [54]:
dataset['Birth'] = imputer.fit_transform(dataset[['Birth']])
dataset['Death'] = imputer.fit_transform(dataset[['Death']])
dataset['Divorce'] = imputer.fit_transform(dataset[['Divorce']])
dataset['Marriage'] = imputer.fit_transform(dataset[['Marriage']])
dataset['Natural_growth'] = imputer.fit_transform(dataset[['Natural_growth']])

In [55]:
dataset.isna().sum()

Date                   0
Region                 0
Birth                  0
Birth_rate             0
Death                  0
Death_rate             0
Divorce                0
Divorce_rate           0
Marriage               0
Marriage_rate          0
Natural_growth         0
Natural_growth_rate    0
dtype: int64

## Data Splitting

In [56]:
from sklearn.model_selection import train_test_split
dataset.head(5)

Unnamed: 0,Date,Region,Birth,Birth_rate,Death,Death_rate,Divorce,Divorce_rate,Marriage,Marriage_rate,Natural_growth,Natural_growth_rate
0,1/1/2000,Busan,3752.0,11.61,1875.0,5.8,814.0,2.5,2435.0,7.5,1877.0,5.8
1,1/1/2000,Chungcheongbuk-do,1903.0,15.06,924.0,7.3,220.0,1.7,828.0,6.6,979.0,7.7
2,1/1/2000,Chungcheongnam-do,2398.0,14.75,1466.0,9.0,321.0,2.0,1055.0,6.5,932.0,5.7
3,1/1/2000,Daegu,3057.0,14.39,1117.0,5.3,422.0,2.0,1577.0,7.4,1940.0,9.1
4,1/1/2000,Daejeon,1859.0,16.08,565.0,4.9,280.0,2.4,868.0,7.5,1294.0,11.2


In [57]:
X = dataset[['Birth','Birth_rate','Death','Death_rate','Divorce','Divorce_rate','Marriage','Marriage_rate','Natural_growth','Natural_growth_rate']]
Y = dataset[['Birth','Birth_rate','Death','Death_rate','Divorce','Divorce_rate','Marriage','Marriage_rate','Natural_growth','Natural_growth_rate']]

In [58]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.1 , random_state = 0)


In [59]:
X_train.shape

(4374, 10)

In [60]:
X_test.shape

(486, 10)

In [61]:
Y_train.shape

(4374, 10)

In [62]:
Y_test.shape

(486, 10)

## Membuat Model (Sequential)

In [70]:
model = Sequential()

In [71]:
model.add(Dense(1, input_shape=[1]))

In [72]:
model.compile(optimizer='sgd', loss='mse')

In [74]:
model.fit(X, Y, batch_size=64, epochs=50, validation_split=0.3)

Epoch 1/50


ValueError: in user code:

    File "c:\Users\LENOVO\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "c:\Users\LENOVO\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\LENOVO\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "c:\Users\LENOVO\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 993, in train_step
        y_pred = self(x, training=True)
    File "c:\Users\LENOVO\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "c:\Users\LENOVO\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\input_spec.py", line 277, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer "sequential_3" "                 f"(type Sequential).
    
    Input 0 of layer "dense_12" is incompatible with the layer: expected axis -1 of input shape to have value 1, but received input with shape (None, 10)
    
    Call arguments received by layer "sequential_3" "                 f"(type Sequential):
      • inputs=tf.Tensor(shape=(None, 10), dtype=float64)
      • training=True
      • mask=None


In [67]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_8 (Dense)             (None, 512)               3584      
                                                                 
 dense_9 (Dense)             (None, 256)               131328    
                                                                 
 dense_10 (Dense)            (None, 128)               32896     
                                                                 
 dense_11 (Dense)            (None, 6)                 774       
                                                                 
Total params: 168,582
Trainable params: 168,582
Non-trainable params: 0
_________________________________________________________________


In [68]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [69]:
hasil_model = model.fit(X_train, Y_train, epochs=10, batch_size=256, validation_data=(X_test, Y_test))


Epoch 1/10


ValueError: in user code:

    File "c:\Users\LENOVO\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "c:\Users\LENOVO\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\LENOVO\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "c:\Users\LENOVO\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 993, in train_step
        y_pred = self(x, training=True)
    File "c:\Users\LENOVO\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "c:\Users\LENOVO\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\input_spec.py", line 295, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential_2" is incompatible with the layer: expected shape=(None, 6), found shape=(None, 10)
