In [10]:
import pandas as pd

In [11]:
co2 = pd.read_csv("탄소배출량_평균_전처리.csv", encoding="euc-kr")

In [12]:
co2.head()

Unnamed: 0,일시,이산화탄소(CO2)
0,1999-01,373.1
1,1999-02,374.0
2,1999-03,374.9
3,1999-04,375.1
4,1999-05,374.0


In [13]:
co2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 284 entries, 0 to 283
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   일시          284 non-null    object 
 1   이산화탄소(CO2)  284 non-null    float64
dtypes: float64(1), object(1)
memory usage: 4.6+ KB


In [14]:
# '일시' 열을 datetime 형식으로 변경
co2['일시'] = pd.to_datetime(co2['일시'])

In [15]:
co2.head()

Unnamed: 0,일시,이산화탄소(CO2)
0,1999-01-01,373.1
1,1999-02-01,374.0
2,1999-03-01,374.9
3,1999-04-01,375.1
4,1999-05-01,374.0


In [16]:
co2

Unnamed: 0,일시,이산화탄소(CO2)
0,1999-01-01,373.100000
1,1999-02-01,374.000000
2,1999-03-01,374.900000
3,1999-04-01,375.100000
4,1999-05-01,374.000000
...,...,...
279,2022-04-01,429.083333
280,2022-05-01,426.130000
281,2022-06-01,422.036667
282,2022-07-01,419.610000


In [17]:
# Month 열을 인덱스로 설정
co2.set_index('일시', inplace=True)

In [18]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from math import sqrt



# 데이터 전처리
scaler = MinMaxScaler(feature_range=(0,1))
df_co2_scaled = scaler.fit_transform(co2)

# 트레이닝셋, 테스트셋 분리
train_size = int(len(df_co2_scaled) * 0.7)
train, test = df_co2_scaled[0:train_size, :], df_co2_scaled[train_size:len(df_co2_scaled), :]

# X, Y 데이터셋 생성
def create_dataset(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        X.append(a)
        Y.append(dataset[i + look_back, 0])
    return np.array(X), np.array(Y)

look_back = 1
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)

# LSTM 모델 생성
model = Sequential()
model.add(LSTM(4, input_shape=(1, look_back)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=100, batch_size=1, verbose=2)

# 예측
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)

# 예측값을 원래의 스케일로 변환
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])

# RMSE 계산
trainScore = sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = sqrt(mean_squared_error(testY[0], testPredict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))

# MAE 계산
trainScore_mae = mean_absolute_error(trainY[0], trainPredict[:,0])
print('Train Score: %.2f MAE' % (trainScore_mae))
testScore_mae = mean_absolute_error(testY[0], testPredict[:,0])
print('Test Score: %.2f MAE' % (testScore_mae))

# MSE 계산
trainScore_mse = mean_squared_error(trainY[0], trainPredict[:,0])
print('Train Score: %.2f MSE' % (trainScore_mse))
testScore_mse = mean_squared_error(testY[0], testPredict[:,0])
print('Test Score: %.2f MSE' % (testScore_mse))

# 결정계수(R2 Score) 계산
trainScore_r2 = r2_score(trainY[0], trainPredict[:,0])
print('Train Score: %.2f R2' % (trainScore_r2))
testScore_r2 = r2_score(testY[0], testPredict[:,0])
print('Test Score: %.2f R2' % (testScore_r2))

# 2022년 예측
last_val = df_co2_scaled[-1]
last_val_scaled = last_val.reshape(-1, 1)
next_val = model.predict(last_val_scaled)
print("2022년의 CO2 배출량 예측값: ", scaler.inverse_transform(next_val))

  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if is_sparse(pd_dtype):
  if is_sparse(pd_dtype) or not is_extension_array_dtype(pd_dtype):


Epoch 1/100
196/196 - 2s - loss: 0.1001 - 2s/epoch - 11ms/step
Epoch 2/100
196/196 - 0s - loss: 0.0284 - 414ms/epoch - 2ms/step
Epoch 3/100
196/196 - 1s - loss: 0.0218 - 509ms/epoch - 3ms/step
Epoch 4/100
196/196 - 1s - loss: 0.0187 - 518ms/epoch - 3ms/step
Epoch 5/100
196/196 - 1s - loss: 0.0157 - 549ms/epoch - 3ms/step
Epoch 6/100
196/196 - 1s - loss: 0.0127 - 544ms/epoch - 3ms/step
Epoch 7/100
196/196 - 1s - loss: 0.0102 - 543ms/epoch - 3ms/step
Epoch 8/100
196/196 - 0s - loss: 0.0078 - 478ms/epoch - 2ms/step
Epoch 9/100
196/196 - 1s - loss: 0.0057 - 537ms/epoch - 3ms/step
Epoch 10/100
196/196 - 1s - loss: 0.0042 - 527ms/epoch - 3ms/step
Epoch 11/100
196/196 - 1s - loss: 0.0031 - 523ms/epoch - 3ms/step
Epoch 12/100
196/196 - 1s - loss: 0.0024 - 508ms/epoch - 3ms/step
Epoch 13/100
196/196 - 1s - loss: 0.0020 - 511ms/epoch - 3ms/step
Epoch 14/100
196/196 - 0s - loss: 0.0018 - 443ms/epoch - 2ms/step
Epoch 15/100
196/196 - 1s - loss: 0.0017 - 527ms/epoch - 3ms/step
Epoch 16/100
196/196 

In [19]:
df = pd.read_csv("에너지 생산 소비 병합_전처리7.csv", encoding="euc-kr")

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split


# 데이터 전처리
scaler = MinMaxScaler(feature_range=(0,1))

# 'CO2 emissions (MMtonnes CO2)' 열을 제거한 후 스케일링
df_combined2 = df.drop('CO2 emissions (MMtonnes CO2)', axis=1)
df_combined_scaled2 = scaler.fit_transform(df_combined2)

# 목표 변수에 대해서만 스케일링
y = df['CO2 emissions (MMtonnes CO2)']
y = scaler.fit_transform(y.values.reshape(-1, 1))

X = df_combined_scaled2

# train_test_split() 함수로 학습 데이터와 테스트 데이터 분리하기
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 1, shuffle = False)

# RandomForestRegressor 모델 생성/학습
model_RF = RandomForestRegressor()
model_RF.fit(X_train, y_train.ravel())  # ravel()로 1차원 배열로 전환

# Predict를 수행하고 mean_absolute_error와 rmse 결과 출력하기
pred = model_RF.predict(X_test)
print(mean_absolute_error(y_test, pred))
print(sqrt(mean_squared_error(y_test, pred)))
r2 = r2_score(y_test, pred)
print(f"R-squared: {r2}")