In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense

In [2]:
# 데이터 로드 및 전처리
data = pd.read_csv('성별_전체_추계인구_유소년노년.csv', encoding='euc-kr')
data.set_index(['성별', '연령별'], inplace=True)
data = data.T
data.index = pd.to_numeric(data.index, errors='coerce')
data = data.dropna().sort_index()

In [3]:
# 유소년, 생산연령, 노년 인구 데이터를 추출
youth_population = data.loc[:, ('전체', '유소년')].values
working_population = data.loc[:, ('전체', '생산연령')].values
elderly_population = data.loc[:, ('전체', '노년')].values

# 데이터 정규화
scaler = MinMaxScaler(feature_range=(0, 1))
youth_population_scaled = scaler.fit_transform(youth_population.reshape(-1, 1))
working_population_scaled = scaler.fit_transform(working_population.reshape(-1, 1))
elderly_population_scaled = scaler.fit_transform(elderly_population.reshape(-1, 1))

In [4]:
# LSTM 모델을 위한 데이터셋 준비
def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset) - time_step - 1):
        a = dataset[i:(i + time_step), 0]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
    return np.array(dataX), np.array(dataY)

time_step = 3
youth_X, youth_y = create_dataset(youth_population_scaled, time_step)
working_X, working_y = create_dataset(working_population_scaled, time_step)
elderly_X, elderly_y = create_dataset(elderly_population_scaled, time_step)

# 데이터 모양 변환 [samples, time steps, features]
youth_X = youth_X.reshape(youth_X.shape[0], youth_X.shape[1], 1)
working_X = working_X.reshape(working_X.shape[0], working_X.shape[1], 1)
elderly_X = elderly_X.reshape(elderly_X.shape[0], elderly_X.shape[1], 1)

In [5]:
# LSTM 모델 구축 및 훈련 함수
def create_and_train_lstm_model(X_train, y_train, epochs=100, batch_size=1):
    model = Sequential()
    model.add(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
    model.add(LSTM(50, return_sequences=False))
    model.add(Dense(1))
    
    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=1)
    
    return model

In [6]:
# 유소년, 생산연령, 노년 인구 모델 생성 및 훈련
youth_model = create_and_train_lstm_model(youth_X, youth_y, epochs=50)
working_model = create_and_train_lstm_model(working_X, working_y, epochs=50)
elderly_model = create_and_train_lstm_model(elderly_X, elderly_y, epochs=50)

Epoch 1/50


  super().__init__(**kwargs)


[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 0.2618
Epoch 2/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0122  
Epoch 3/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 995us/step - loss: 0.0041
Epoch 4/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0035    
Epoch 5/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 980us/step - loss: 0.0035  
Epoch 6/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0027   
Epoch 7/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0030    
Epoch 8/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0052    
Epoch 9/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.0028    
Epoch 10/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 

[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.1456e-04
Epoch 31/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1.5939e-04
Epoch 32/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 2.0497e-04
Epoch 33/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 2.7506e-04
Epoch 34/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 2.6003e-04
Epoch 35/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 4.5113e-04  
Epoch 36/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 4.4432e-04
Epoch 37/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 3.3242e-04
Epoch 38/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 3.1078e-04  
Epoch 39/50
[1m60/60[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s

In [7]:
def predict_future(model, data, steps=48):
    predictions = []
    current_input = data[-3:].reshape(1, 3, 1)
    for _ in range(steps):
        next_pred = model.predict(current_input)
        predictions.append(next_pred[0, 0])
        current_input = np.append(current_input[:, 1:, :], np.array(next_pred).reshape(1, 1, 1), axis=1)
    return scaler.inverse_transform(np.array(predictions).reshape(-1, 1))

In [8]:
# 2024년부터 2072년까지 예측
youth_predictions = predict_future(youth_model, youth_population_scaled)
working_predictions = predict_future(working_model, working_population_scaled)
elderly_predictions = predict_future(elderly_model, elderly_population_scaled)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 154ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12

In [9]:
# 예측된 데이터의 길이 확인
print(f"Length of youth_predictions: {len(youth_predictions)}")
print(f"Length of working_predictions: {len(working_predictions)}")
print(f"Length of elderly_predictions: {len(elderly_predictions)}")

Length of youth_predictions: 48
Length of working_predictions: 48
Length of elderly_predictions: 48


In [10]:
# 예측 결과 출력 및 하나의 CSV 파일로 저장
future_years = np.arange(2024, 2024 + len(youth_predictions))  # 길이를 예측된 데이터 길이로 맞추기
predictions_df = pd.DataFrame({
    'Year': future_years,
    '유소년': youth_predictions.flatten(),
    '생산연령': working_predictions.flatten(),
    '노년': elderly_predictions.flatten()
})

# CSV 파일로 저장
predictions_df.to_csv('LSTM결과.csv', index=False, encoding='euc-kr')


    Year  Youth_Population
0   2024      3.894387e+05
1   2025      1.410250e+05
2   2026     -1.242955e+05
3   2027     -3.884001e+05
4   2028     -6.446246e+05
5   2029     -9.041068e+05
6   2030     -1.159384e+06
7   2031     -1.410005e+06
8   2032     -1.657818e+06
9   2033     -1.900002e+06
10  2034     -2.136463e+06
11  2035     -2.367014e+06
12  2036     -2.590510e+06
13  2037     -2.806712e+06
14  2038     -3.015218e+06
15  2039     -3.215527e+06
16  2040     -3.407446e+06
17  2041     -3.590750e+06
18  2042     -3.765279e+06
19  2043     -3.931000e+06
20  2044     -4.087908e+06
21  2045     -4.236073e+06
22  2046     -4.375630e+06
23  2047     -4.506756e+06
24  2048     -4.629675e+06
25  2049     -4.744650e+06
26  2050     -4.851966e+06
27  2051     -4.951946e+06
28  2052     -5.044916e+06
29  2053     -5.131220e+06
30  2054     -5.211208e+06
31  2055     -5.285232e+06
32  2056     -5.353642e+06
33  2057     -5.416780e+06
34  2058     -5.474984e+06
35  2059     -5.528580e+06
3