In [1]:
import pandas as pd
from IPython.display import display

base_path = 'C:/Users/언종/Desktop/data'

# 각 데이터 파일 불러오기
weather_forecast_1 = pd.read_csv(f'{base_path}/기상예측데이터_1.csv')
weather_forecast_2 = pd.read_csv(f'{base_path}/기상예측데이터_2.csv')
weather_actual_1 = pd.read_csv(f'{base_path}/기상실측데이터_1.csv')
weather_actual_2 = pd.read_csv(f'{base_path}/기상실측데이터_2.csv')
price_real_time = pd.read_csv(f'{base_path}/제주전력시장_시장전기가격_실시간가격.csv')
price_day_ahead = pd.read_csv(f'{base_path}/제주전력시장_시장전기가격_하루전가격.csv')
market_status = pd.read_csv(f'{base_path}/제주전력시장_현황데이터.csv')

In [2]:
def convert_columns(df):
    for col in df.columns:
        if col == 'location':
            continue  # location은 그대로 두기
        elif col == 'ts' or col == 'base_ts':
            df[col] = pd.to_numeric(df[col], errors='coerce').astype('Int64')  # ts는 Int64형으로 변환
        else:
            df[col] = pd.to_numeric(df[col], errors='coerce').astype(float)  # 나머지는 float형으로 변환
    return df

# 각 데이터프레임에 변환 함수 적용
weather_forecast_1 = convert_columns(weather_forecast_1)
weather_forecast_2 = convert_columns(weather_forecast_2)
weather_actual_1 = convert_columns(weather_actual_1)
weather_actual_2 = convert_columns(weather_actual_2)
price_real_time = convert_columns(price_real_time)
price_day_ahead = convert_columns(price_day_ahead)
market_status = convert_columns(market_status)

In [3]:
for df_name, df in zip(['weather_forecast_1', 'weather_forecast_2', 'weather_actual_1', 'weather_actual_2', 
                        'price_real_time', 'price_day_ahead', 'market_status'], 
                       [weather_forecast_1, weather_forecast_2, weather_actual_1, weather_actual_2, 
                        price_real_time, price_day_ahead, market_status]):
    print(f"Object columns in {df_name}:")
    for col in df.select_dtypes(include='object').columns:
        unique_values = df[col].nunique()
        print(f"  {col} (unique values: {unique_values})")
    print("\n")
locations = pd.concat([
    weather_forecast_1[['location']],
    weather_forecast_2[['location']],
    weather_actual_1[['location']],
    weather_actual_2[['location']]
], axis=0)

Object columns in weather_forecast_1:
  location (unique values: 10)


Object columns in weather_forecast_2:
  location (unique values: 4)


Object columns in weather_actual_1:
  location (unique values: 10)


Object columns in weather_actual_2:
  location (unique values: 4)


Object columns in price_real_time:


Object columns in price_day_ahead:


Object columns in market_status:




In [4]:
# 모든 location 값을 모아 원핫 인코딩
locations_encoded = pd.get_dummies(locations)

# 원핫 인코딩 결과를 각 데이터프레임에 다시 병합
weather_forecast_1 = pd.concat([weather_forecast_1.drop(columns=['location']), locations_encoded[:len(weather_forecast_1)].reset_index(drop=True)], axis=1)
weather_forecast_2 = pd.concat([weather_forecast_2.drop(columns=['location']), locations_encoded[len(weather_forecast_1):len(weather_forecast_1) + len(weather_forecast_2)].reset_index(drop=True)], axis=1)
weather_actual_1 = pd.concat([weather_actual_1.drop(columns=['location']), locations_encoded[len(weather_forecast_1) + len(weather_forecast_2):len(weather_forecast_1) + len(weather_forecast_2) + len(weather_actual_1)].reset_index(drop=True)], axis=1)
weather_actual_2 = pd.concat([weather_actual_2.drop(columns=['location']), locations_encoded[len(weather_forecast_1) + len(weather_forecast_2) + len(weather_actual_1):].reset_index(drop=True)], axis=1)
datasets = {
    "weather_forecast_1": weather_forecast_1,
    "weather_forecast_2": weather_forecast_2,
    "weather_actual_1": weather_actual_1,
    "weather_actual_2": weather_actual_2,
    "price_real_time": price_real_time,
    "price_day_ahead": price_day_ahead,
    "market_status": market_status
}

In [5]:
# ts 열에 결측값이 있는지 확인하고 정렬
for name, df in datasets.items():
    null_count = df['ts'].isnull().sum()
    print(f"{name} ts column null values: {null_count}")
    
    # 결측값 제거 및 ts 열 기준으로 정렬
    datasets[name] = df.dropna(subset=['ts']).sort_values(by='ts').reset_index(drop=True)

# 병합 시작: price_real_time의 ts 열을 기준으로 병합
merged_data = datasets['price_real_time'].copy()  # price_real_time을 기준으로 시작
merged_data = pd.merge_asof(merged_data, datasets['weather_forecast_1'], on='ts', direction='nearest', suffixes=('', '_wf1'))
merged_data = pd.merge_asof(merged_data, datasets['weather_forecast_2'], on='ts', direction='nearest', suffixes=('', '_wf2'))
merged_data = pd.merge_asof(merged_data, datasets['weather_actual_1'], on='ts', direction='nearest', suffixes=('', '_wa1'))
merged_data = pd.merge_asof(merged_data, datasets['weather_actual_2'], on='ts', direction='nearest', suffixes=('', '_wa2'))
merged_data = pd.merge_asof(merged_data, datasets['price_day_ahead'], on='ts', direction='nearest', suffixes=('', '_pda'))
merged_data = pd.merge_asof(merged_data, datasets['market_status'], on='ts', direction='nearest', suffixes=('', '_ms'))


weather_forecast_1 ts column null values: 8
weather_forecast_2 ts column null values: 2
weather_actual_1 ts column null values: 8
weather_actual_2 ts column null values: 2
price_real_time ts column null values: 0
price_day_ahead ts column null values: 0
market_status ts column null values: 0


In [6]:
# 병합 결과 확인
print("Merged data shape:", merged_data.shape)
merged_data

Merged data shape: (5617, 122)


Unnamed: 0,ts,실시간 임시 가격(원/kWh),실시간 확정 가격(원/kWh),base_ts,temp,real_feel_temp,wet_bulb_temp,dew_point,wind_dir,wind_spd,...,location_Yongsu-ri_wa2,location_location_wa2,하루전가격(원/kWh),공급능력(kW),현재 수요(kW),태양광 발전량kW),풍력 발전량(kW),신재생 발전량 총합(kW),공급 예비력(kW),운영 예비력(kW)
0,1709218800,95.30,95.30,1709258400,3.33333,-3.88889,1.11111,-2.77778,343.0,27.8417,...,False,False,107.39,1388000.0,798000.0,0.0,234165.0,248660.0,590000.0,266000.0
1,1709222400,107.39,107.39,1709258400,3.33333,-3.88889,1.11111,-2.77778,343.0,27.8417,...,False,False,107.39,1367000.0,765000.0,0.0,210320.0,226351.0,602000.0,277000.0
2,1709226000,95.30,95.30,1709258400,3.33333,-3.88889,1.11111,-2.77778,343.0,27.8417,...,False,False,95.30,1403000.0,736000.0,0.0,245230.0,261327.0,666000.0,342000.0
3,1709229600,87.89,87.89,1709258400,3.33333,-3.88889,1.11111,-2.77778,343.0,27.8417,...,False,False,87.89,1376000.0,727000.0,0.0,225240.0,239324.0,649000.0,323000.0
4,1709233200,86.50,86.50,1709258400,3.33333,-3.88889,1.11111,-2.77778,343.0,27.8417,...,False,False,0.00,1362000.0,741000.0,0.0,209575.0,223575.0,621000.0,297000.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5612,1729508400,156.43,155.32,1729389600,22.22220,22.77780,21.66670,20.55560,60.0,18.5075,...,False,False,158.38,1060000.0,753000.0,0.0,98888.3,107037.0,302000.0,307000.0
5613,1729512000,156.35,155.32,1729389600,23.88890,25.00000,22.77780,21.66670,44.0,20.4387,...,False,False,157.18,1060000.0,718000.0,0.0,95198.2,103651.0,337000.0,258000.0
5614,1729515600,155.89,154.80,1729389600,23.33330,21.66670,22.22220,21.66670,24.0,18.5075,...,False,False,105.63,1043000.0,687000.0,0.0,79106.1,87497.3,352000.0,276000.0
5615,1729519200,155.23,154.89,1729389600,23.88890,22.77780,22.77780,22.22220,45.0,14.8060,...,False,False,157.56,1033000.0,663000.0,0.0,73191.7,81490.5,366000.0,290000.0


In [7]:
merged_data.describe()

Unnamed: 0,ts,실시간 임시 가격(원/kWh),실시간 확정 가격(원/kWh),base_ts,temp,real_feel_temp,wet_bulb_temp,dew_point,wind_dir,wind_spd,...,rain_wa2,snow_wa2,하루전가격(원/kWh),공급능력(kW),현재 수요(kW),태양광 발전량kW),풍력 발전량(kW),신재생 발전량 총합(kW),공급 예비력(kW),운영 예비력(kW)
count,5617.0,5617.0,5617.0,5617.0,5617.0,5617.0,5617.0,5617.0,5617.0,5617.0,...,5617.0,5617.0,5617.0,5617.0,5617.0,5617.0,5617.0,5617.0,5617.0,5617.0
mean,1719330922.485312,125.150995,125.330972,1719241453.16005,21.40051,22.748996,18.93409,17.179099,173.926473,15.015274,...,0.294654,0.0,128.229523,1301707.0,745679.0,63811.461472,56700.337397,131985.054029,554397.4,351122.307281
std,5843432.136291,50.283059,49.77218,5842648.862372,6.570335,9.3365,6.579506,7.379243,100.41892,8.395687,...,1.288743,0.0,43.680196,132252.7,150247.1,89951.457278,63019.196324,104029.994488,155048.1,91490.4415
min,1709218800.0,-79.32,-79.32,1709258400.0,5.68434e-14,-8.88889,-2.22222,-6.66667,1.0,0.0,...,0.0,0.0,-79.32,957000.0,412000.0,0.0,0.0,3661.89,74000.0,81000.0
25%,1714273200.0,106.85,106.85,1714183200.0,16.6667,16.1111,14.4444,12.2222,89.0,9.3342,...,0.0,0.0,107.42,1205000.0,633000.0,0.0,7020.39,40463.4,449000.0,289000.0
50%,1719327600.0,139.27,139.27,1719280800.0,22.2222,22.7778,20.0,18.3333,164.0,13.0357,...,0.0,0.0,139.06,1298000.0,721000.0,8236.71,30983.5,105396.0,558000.0,341000.0
75%,1724382000.0,154.8,154.78,1724292000.0,26.6667,30.5556,25.0,23.8889,254.0,20.4387,...,0.0,0.0,153.86,1394000.0,836000.0,108198.0,89470.6,206579.0,654000.0,403000.0
max,1729522800.0,330.56,314.82,1729389600.0,33.8889,43.3333,28.8889,27.7778,360.0,53.7521,...,30.0,0.0,329.89,1771000.0,1250000.0,371447.0,309853.0,506193.0,1064000.0,733000.0


In [8]:
merged_data.to_excel('merged_data.xlsx', index=False)  # index=False로 인덱스 저장 생략

print("Data saved to 'merged_data.xlsx'")

Data saved to 'merged_data.xlsx'


In [9]:
import pandas as pd

# IQR 기반으로 특정 칼럼에서 이상치 행만 필터링하는 함수
def detect_outliers_in_column(df, column_name):
    Q1 = df[column_name].quantile(0.25)
    Q3 = df[column_name].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    # 지정한 칼럼에서 이상치가 있는 행만 필터링
    outliers_df = df[(df[column_name] < lower_bound) | (df[column_name] > upper_bound)]
    return outliers_df

In [10]:
temp_outliers_only = detect_outliers_in_column(merged_data, column_name='temp')

# 결과 확인
temp_outliers_only

Unnamed: 0,ts,실시간 임시 가격(원/kWh),실시간 확정 가격(원/kWh),base_ts,temp,real_feel_temp,wet_bulb_temp,dew_point,wind_dir,wind_spd,...,location_Yongsu-ri_wa2,location_location_wa2,하루전가격(원/kWh),공급능력(kW),현재 수요(kW),태양광 발전량kW),풍력 발전량(kW),신재생 발전량 총합(kW),공급 예비력(kW),운영 예비력(kW)
26,1709312400,167.66,136.89,1709258400,0.555556,-8.88889,-1.11111,-3.88889,324.0,25.9104,...,False,False,99.48,1328000.0,814000.0,0.0,170545.0,185359.0,514000.0,287000.0
27,1709316000,165.95,136.72,1709258400,0.555556,-8.33333,-1.11111,-4.44444,324.0,25.9104,...,False,False,95.38,1301000.0,800000.0,0.0,141930.0,158201.0,501000.0,274000.0
28,1709319600,91.24,91.24,1709258400,1.66667,-4.44444,-0.555556,-4.44444,325.0,20.4387,...,False,False,91.24,1289000.0,784000.0,0.0,130682.0,147329.0,505000.0,283000.0
29,1709323200,91.24,91.24,1709258400,5.68434e-14,-7.22222,-2.22222,-6.11111,340.0,22.2089,...,False,False,91.24,1281000.0,788000.0,0.0,122458.0,136992.0,493000.0,268000.0
31,1709330400,99.48,99.48,1709258400,1.66667,-5.0,-0.555556,-4.44444,347.0,24.1402,...,False,False,99.48,1243000.0,820000.0,0.0,86074.3,101136.0,424000.0,310000.0
32,1709334000,102.02,102.02,1709258400,0.555556,-6.11111,-1.66667,-6.66667,353.0,25.9104,...,False,False,135.15,1211000.0,813000.0,5601.7,49169.1,69477.0,397000.0,294000.0
195,1709920800,110.46,110.46,1709863200,1.66667,5.68434e-14,5.68434e-14,-3.88889,303.0,9.3342,...,False,False,110.46,1354000.0,751000.0,0.0,173317.0,188582.0,604000.0,394000.0


In [11]:
import numpy as np
start_timestamp = 1729612800  # 2024-10-23 00:00에 해당하는 Unix timestamp
timestamps = [start_timestamp + 3600 * i for i in range(24)]  # 1시간 간격으로 24개의 타임스탬프 생성

# 데이터프레임 생성
df_23day = pd.DataFrame({'ts': timestamps, 'smp_da': np.nan})  # 'smp_da'는 빈 값으로 초기화

# 결과 확인
print(df_23day)

            ts  smp_da
0   1729612800     NaN
1   1729616400     NaN
2   1729620000     NaN
3   1729623600     NaN
4   1729627200     NaN
5   1729630800     NaN
6   1729634400     NaN
7   1729638000     NaN
8   1729641600     NaN
9   1729645200     NaN
10  1729648800     NaN
11  1729652400     NaN
12  1729656000     NaN
13  1729659600     NaN
14  1729663200     NaN
15  1729666800     NaN
16  1729670400     NaN
17  1729674000     NaN
18  1729677600     NaN
19  1729681200     NaN
20  1729684800     NaN
21  1729688400     NaN
22  1729692000     NaN
23  1729695600     NaN


In [13]:
import requests

date = '2024-10-23'
token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJqaUtiN01nazVKZThnd3JTbmFmMk1HIiwiaWF0IjoxNzI5NTgzOTkyLCJleHAiOjE3MzE1OTY0MDAsInR5cGUiOiJhcGlfa2V5In0.2Y10TiejY2YYCZvaMP1k2F4Xl4m9aLQIHBd5yTwhnuA"
smp_rt_rc = requests.get(f'https://research-api.solarkim.com/data/cmpt-2024/smp-rt-rc/{date}', headers={
                            'Authorization': f'Bearer {token}'
                        }).json()
print(smp_rt_rc)
smp_rt_rc_df = pd.DataFrame(smp_rt_rc)
smp_rt_rc_df

[{'ts': 1729612800, 'smp_rt': 100.23, 'smp_rc': 100.23}, {'ts': 1729616400, 'smp_rt': 92.35, 'smp_rc': 92.35}, {'ts': 1729620000, 'smp_rt': 92.27, 'smp_rc': 92.27}, {'ts': 1729623600, 'smp_rt': 0.0, 'smp_rc': 0.0}, {'ts': 1729627200, 'smp_rt': 0.0, 'smp_rc': 0.0}, {'ts': 1729630800, 'smp_rt': 100.25, 'smp_rc': 100.25}, {'ts': 1729634400, 'smp_rt': 100.25, 'smp_rc': 100.25}, {'ts': 1729638000, 'smp_rt': 0.0, 'smp_rc': 0.0}, {'ts': 1729641600, 'smp_rt': -48.0, 'smp_rc': -48.0}, {'ts': 1729645200, 'smp_rt': -78.74, 'smp_rc': -78.74}, {'ts': 1729648800, 'smp_rt': -78.74, 'smp_rc': -78.74}, {'ts': 1729652400, 'smp_rt': -78.74, 'smp_rc': -78.74}, {'ts': 1729656000, 'smp_rt': -78.74, 'smp_rc': -78.74}, {'ts': 1729659600, 'smp_rt': -78.74, 'smp_rc': -78.74}, {'ts': 1729663200, 'smp_rt': 216.99, 'smp_rc': 216.99}, {'ts': 1729666800, 'smp_rt': 134.74, 'smp_rc': 134.74}, {'ts': 1729670400, 'smp_rt': 134.74, 'smp_rc': 134.74}, {'ts': 1729674000, 'smp_rt': 139.6, 'smp_rc': 139.6}, {'ts': 1729677600

Unnamed: 0,ts,smp_rt,smp_rc
0,1729612800,100.23,100.23
1,1729616400,92.35,92.35
2,1729620000,92.27,92.27
3,1729623600,0.0,0.0
4,1729627200,0.0,0.0
5,1729630800,100.25,100.25
6,1729634400,100.25,100.25
7,1729638000,0.0,0.0
8,1729641600,-48.0,-48.0
9,1729645200,-78.74,-78.74


In [14]:
smp_rt_rc_df = smp_rt_rc_df.drop(columns=['smp_rt'])
smp_rt_rc_df = smp_rt_rc_df.rename(columns={'smp_rc': '실시간 확정 가격(원/kWh)'})
smp_rt_rc_df

Unnamed: 0,ts,실시간 확정 가격(원/kWh)
0,1729612800,100.23
1,1729616400,92.35
2,1729620000,92.27
3,1729623600,0.0
4,1729627200,0.0
5,1729630800,100.25
6,1729634400,100.25
7,1729638000,0.0
8,1729641600,-48.0
9,1729645200,-78.74


In [27]:
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# 실제 데이터와 예측 데이터에 대한 e_F를 계산하는 함수
def calculate_measure(actual, forecast):
    actual = np.array(actual)
    forecast = np.array(forecast)

    positive_index = actual > 0
    negative_index = actual <= 0

    # actual의 값이 0과 -1 사이에 있는 경우 -1로 처리
    actual[(actual <= 0) & (actual > -1)] = -1
    
    # 양수 및 음수 값의 개수
    n1 = np.sum(positive_index) + 1e-7
    n2 = np.sum(negative_index) + 1e-7

    # e1: 양수 가격 예측 오차율
    e1 = (
        np.sum(
            np.abs(actual[positive_index] - forecast[positive_index])
            / np.abs(actual[positive_index])
        )
        / n1
    )

    # e2: 음수 가격 예측 오차율
    e2 = (
        np.sum(
            np.abs(actual[negative_index] - forecast[negative_index])
            / np.abs(actual[negative_index])
        )
        / n2
    )

    TP = np.sum((forecast > 0) & (actual > 0))
    TN = np.sum((forecast <= 0) & (actual <= 0))
    FP = np.sum((forecast > 0) & (actual <= 0))
    FN = np.sum((forecast <= 0) & (actual > 0))

    # 정확도 계산
    Accuracy = (TP + TN) / (TP + TN + FP + FN)
    e_F = 0.2 * e1 + 0.8 * e2 - (Accuracy - 0.95)

    return e_F

# 데이터 준비
target_column = '실시간 확정 가격(원/kWh)'
X = merged_data.drop(columns=[target_column]).values
y = merged_data[target_column].values

# MinMaxScaler로 스케일링 적용
scaler_X, scaler_y = MinMaxScaler(), MinMaxScaler()
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1))

# 시퀀스 데이터 생성
def create_sequences(X, y, sequence_length=5):
    X_seq, y_seq = [], []
    for i in range(sequence_length, len(X)):
        X_seq.append(X[i-sequence_length:i].flatten())
        y_seq.append(y[i])
    return np.array(X_seq), np.array(y_seq)

X_seq, y_seq = create_sequences(X_scaled, y_scaled, sequence_length=15)

# 훈련, 검증, 테스트 데이터 분할
X_train, X_temp, y_train, y_temp = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [28]:
xgb_model = xgb.XGBRegressor(objective='reg:squarederror')

# 모델 학습 및 검증 데이터에 대한 예측
xgb_model.fit(X_train, y_train)
y_val_pred = xgb_model.predict(X_val)

# 스케일링 복원 후 e_F 계산
y_val_pred_original = scaler_y.inverse_transform(y_val_pred.reshape(-1, 1)).flatten()
y_val_original = scaler_y.inverse_transform(y_val.reshape(-1, 1)).flatten()
e_F = calculate_measure(y_val_original, y_val_pred_original)
print("e_F value:", e_F)

e_F value: 25.40377263532932


In [29]:
last_sequence = X_seq[-1].reshape(1, -1)  # 마지막 시퀀스 데이터
predictions = []

for _ in range(24):
    prediction = xgb_model.predict(last_sequence)
    predictions.append(prediction[0])
    # 예측값을 시퀀스에 추가하여 다음 예측을 위해 시퀀스 업데이트
    last_sequence = np.concatenate([last_sequence[:, 1:], prediction.reshape(1, 1)], axis=1)

# 스케일링 복원
predictions = scaler_y.inverse_transform(np.array(predictions).reshape(-1, 1)).flatten()
print("Predictions for the next 24 hours:", predictions)


actual_24h = smp_rt_rc_df[target_column].values[-24:]
e_F = calculate_measure(actual_24h, predictions)
print("e_F for 24-hour prediction:", e_F)

Predictions for the next 24 hours: [151.65565    82.62597    45.537594   81.935455   -9.266094   81.378975
  22.858099   10.224556   59.717266   76.73408    28.529148   59.1982
  64.54856   -25.74324    -3.5809433  51.62632   156.67894    22.36801
  83.22146    54.870598  149.13573    74.370316   77.27889    35.647858 ]
e_F for 24-hour prediction: 10.35421506016025


In [30]:
start_timestamp = 1729612800  # 2024-10-23 00:00에 해당하는 Unix timestamp
timestamps = [start_timestamp + 3600 * i for i in range(24)]

# 예측 결과를 DataFrame으로 저장
df_23day = pd.DataFrame({'ts': timestamps, 'smp_da': predictions})

In [31]:
df_23day

Unnamed: 0,ts,smp_da
0,1729612800,151.655655
1,1729616400,82.625969
2,1729620000,45.537594
3,1729623600,81.935455
4,1729627200,-9.266094
5,1729630800,81.378975
6,1729634400,22.858099
7,1729638000,10.224556
8,1729641600,59.717266
9,1729645200,76.734077


In [32]:
actual = smp_rt_rc_df['실시간 확정 가격(원/kWh)'].values  # 실제 값
forecast = df_23day['smp_da'].values  # 예측 값

# 평가 함수 정의
def calculate_measure(actual, forecast):
    actual = np.array(actual)
    forecast = np.array(forecast)

    positive_index = actual > 0
    negative_index = actual <= 0

    # actual의 값이 0과 -1 사이에 있는 경우 -1로 처리
    actual[(actual <= 0) & (actual > -1)] = -1
    
    # 양수 및 음수 값의 개수
    n1 = np.sum(positive_index) + 1e-3  # 작은 값으로 설정
    n2 = np.sum(negative_index) + 1e-3

    # e1: 양수 가격 예측 오차율
    e1 = (
        np.sum(
            np.abs(actual[positive_index] - forecast[positive_index])
            / np.maximum(np.abs(actual[positive_index]), 1e-3)  # 작은 값으로 분모 안정화
        )
        / n1
    )

    # e2: 음수 가격 예측 오차율
    e2 = (
        np.sum(
            np.abs(actual[negative_index] - forecast[negative_index])
            / np.maximum(np.abs(actual[negative_index]), 1e-3)
        )
        / n2
    )

    TP = np.sum((forecast > 0) & (actual > 0))
    TN = np.sum((forecast <= 0) & (actual <= 0))
    FP = np.sum((forecast > 0) & (actual <= 0))
    FN = np.sum((forecast <= 0) & (actual > 0))

    # 정확도 계산
    Accuracy = (TP + TN) / (TP + TN + FP + FN)
    e_F = 0.2 * e1 + 0.8 * e2 - (Accuracy - 0.95)

    return e_F

# e_F 값 계산
e_F_value = calculate_measure(actual, forecast)
print("e_F value:", e_F_value)

e_F value: 10.353100439568998


In [22]:
import json
import requests

result = {
    'submit_result': predictions.tolist() 
}

success = requests.post(
    'https://research-api.solarkim.com/submissions/cmpt-2024',
    data=json.dumps(result),
    headers={
        'Authorization': f'Bearer {"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJqaUtiN01nazVKZThnd3JTbmFmMk1HIiwiaWF0IjoxNzI5NTgzOTkyLCJleHAiOjE3MzE1OTY0MDAsInR5cGUiOiJhcGlfa2V5In0.2Y10TiejY2YYCZvaMP1k2F4Xl4m9aLQIHBd5yTwhnuA"}'
    }
).json()

print(success)

{'detail': 'NOT_AVAILABLE_TIME'}
