# 신예보 생성
2021년 7월 1일~2022년 6월 30일

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from glob import glob
import warnings
warnings.filterwarnings('ignore')

### Forecast time : 기상청 예보를 발표한 시간 
### forecast_time : 9시간 적용 예보를 발표한 시간 (한국 시간은 UTC + 9) 
### datetime : 예보발표시간에 forecast dateoffset적용한 날씨예측시간(final) 

# <풍력: 화순 동면 예보>

## 신예보 데이터 결합

In [None]:
wind_data_path = '/content/drive/MyDrive/Colab Notebooks/AIfactory_에너지 인공지능 경진대회/raw_data/기상청/동면_신예보'
path_list = sorted(glob(wind_data_path+'/*.csv'))
path_list

['/content/drive/MyDrive/Colab Notebooks/AIfactory_에너지 인공지능 경진대회/raw_data/기상청/동면_신예보/동면_1시간강수량_20210629_20220630.csv',
 '/content/drive/MyDrive/Colab Notebooks/AIfactory_에너지 인공지능 경진대회/raw_data/기상청/동면_신예보/동면_1시간기온_20210629_20220630.csv',
 '/content/drive/MyDrive/Colab Notebooks/AIfactory_에너지 인공지능 경진대회/raw_data/기상청/동면_신예보/동면_1시간적설_20210629_20220630.csv',
 '/content/drive/MyDrive/Colab Notebooks/AIfactory_에너지 인공지능 경진대회/raw_data/기상청/동면_신예보/동면_강수형태_20210629_20220630.csv',
 '/content/drive/MyDrive/Colab Notebooks/AIfactory_에너지 인공지능 경진대회/raw_data/기상청/동면_신예보/동면_강수확률_20210629_20220630.csv',
 '/content/drive/MyDrive/Colab Notebooks/AIfactory_에너지 인공지능 경진대회/raw_data/기상청/동면_신예보/동면_습도_20210629_20220630.csv',
 '/content/drive/MyDrive/Colab Notebooks/AIfactory_에너지 인


<H3>1시간 간격 신예보 데이터 전처리

*온도의 신예보 데이터는 주최측 예보데이터에서 가져올 것이라 제외

In [None]:
def preprocessing(path_list):  
    #데이터 불러오기
    data_year_precipitation = pd.read_csv(path_list[0])
    data_year_snow = pd.read_csv(path_list[2])
    data_year_precipitationform = pd.read_csv(path_list[3])
    data_year_precipitationprob = pd.read_csv(path_list[4])    
    data_year_humidity = pd.read_csv(path_list[5])
    data_year_windspeed = pd.read_csv(path_list[6])
    data_year_winddirection = pd.read_csv(path_list[7])
    data_year_cloud = pd.read_csv(path_list[8])

    # 데이터 결합 (3시간)
    data_year = data_year_precipitation[[' format: day', 'hour', 'forecast']]
    data_year['precipitation'] = data_year_precipitation[data_year_precipitation.columns[-1]] #1시간강수량 
    data_year['snow'] = data_year_snow[data_year_snow.columns[-1]] #1시간적설
    data_year['precipitationform'] = data_year_precipitationform[data_year_precipitationform.columns[-1]] #강수확률
    data_year['precipitationprob'] = data_year_precipitationprob[data_year_precipitationprob.columns[-1]] #강수형태
    data_year['humidity'] = data_year_humidity[data_year_humidity.columns[-1]] #습도
    data_year['windspeed'] = data_year_windspeed[data_year_windspeed.columns[-1]] #풍속  
    data_year['winddirection'] = data_year_winddirection[data_year_winddirection.columns[-1]] #풍향
    data_year['cloud'] = data_year_cloud[data_year_cloud.columns[-1]] #하늘상태
     
    # 일 구분 행 추출 
    day_rows = [-1]
    day_rows.extend(data_year[data_year['hour'].isna()].index)
    day_rows.append(data_year.shape[0]+1)
    
    
    # 일별 데이터 분리
    day_data = []
    for i in range(len(day_rows)-1):
        day_data.append(data_year.loc[day_rows[i]+1:day_rows[i+1]-1])    
       
    # 연월일 데이타  (첫째줄에서 첫째연월일데이타 가져오고, hour가 빈행에서 나머지 연월일 데이타 가져오기)   
    day_idx=[]
    first_day = data_year_precipitation.columns.to_list()[3].split("Start : ")[1] # 첫째줄 연월일 데이타 
    day_info = [first_day] 
    day_idx.extend(data_year[data_year['hour'].isna()].index)  
    for i in day_idx:
        day_info.append(data_year.loc[i][0].split(" Start : ")[1])    
     
    # 연월일 데이타를 date 행으로 만들고 데이타 합쳐서 dataframe만들기 
    df=None
    date_list=[]
    for i in range(len(day_data)):
        day_data[i]['date'] = day_info[i]
        df = pd.concat([df, day_data[i]],axis=0)  
    df['date'] = df['date'].str[:4] +  '-' + df['date'].str[4:6] + '-' + df['date'].str[6:8] + " " + (df['hour'].astype(int)//100).astype(str) + ':00'
    df['forecast_time'] = pd.to_datetime(df['date']) + pd.DateOffset(hours=9)    
    df = df.drop([' format: day','hour'], axis=1)

    def to_date(x):
        return pd.DateOffset(hours=x)
    df['datetime']= df['forecast_time'] + df['forecast'].map(to_date)  
    return df           

In [None]:
# 예보시간 14시만 뺌 
def make_14(df):     
    fcst_14 = df[df['forecast_time'].dt.hour==14]
    fcst_14 = fcst_14[(fcst_14['forecast']>=10) & (fcst_14['forecast']<=33)]
    fcst_14 = fcst_14.drop(['date', 'forecast_time', 'forecast'],axis=1) 
    fcst_14= fcst_14.reset_index(drop=True)
    return fcst_14

In [None]:
wind_df = preprocessing(path_list)

In [None]:
wind_df.isnull().sum()

forecast             0
precipitation        0
snow                 0
precipitationform    0
precipitationprob    0
humidity             0
windspeed            0
winddirection        0
cloud                0
date                 0
forecast_time        0
datetime             0
dtype: int64

In [None]:
wind_fcst_14 = make_14(wind_df)
wind_fcst_14 = wind_fcst_14[['datetime','precipitationform','precipitationprob','humidity','windspeed','winddirection','cloud','precipitation','snow']]
wind_fcst_14

Unnamed: 0,datetime,precipitationform,precipitationprob,humidity,windspeed,winddirection,cloud,precipitation,snow
0,2021-07-01 00:00:00,0.0,0.0,90.0,1.1,90.0,1.0,0.0,0.0
1,2021-07-01 01:00:00,0.0,0.0,90.0,1.2,90.0,1.0,0.0,0.0
2,2021-07-01 02:00:00,0.0,0.0,90.0,1.1,85.0,1.0,0.0,0.0
3,2021-07-01 03:00:00,0.0,0.0,90.0,1.1,80.0,1.0,0.0,0.0
4,2021-07-01 04:00:00,0.0,0.0,90.0,1.1,80.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...
8779,2022-07-01 19:00:00,0.0,0.0,70.0,1.8,142.0,1.0,0.0,0.0
8780,2022-07-01 20:00:00,0.0,0.0,80.0,1.5,140.0,1.0,0.0,0.0
8781,2022-07-01 21:00:00,0.0,0.0,85.0,1.5,133.0,1.0,0.0,0.0
8782,2022-07-01 22:00:00,0.0,0.0,90.0,1.4,126.0,1.0,0.0,0.0


<H3>온도 데이터 결합

*기상청 온도 예보 데이터는 결측치가 있어서, AIfatory 제공 데이터에서 온도 데이터 가져와서 추가

In [None]:
# 주최측 제공 예보 데이터 가져오기
AIfactory_wind = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/AIfactory_에너지 인공지능 경진대회/raw_data/AIfactory/wind_forecast_weather.csv')
AIfactory_wind = AIfactory_wind[(AIfactory_wind['Forecast time']>='2021-06-30 00:00:00')&(AIfactory_wind['Forecast time']<='2022-07-01 23:00:00')].copy()
AIfactory_wind

Unnamed: 0,Forecast time,forecast,temperature,humidity,windspeed,winddirection
44638,2021-06-30 02:00:00,4.0,20.0,95.0,1.1,95.0
44639,2021-06-30 02:00:00,7.0,23.0,85.0,1.9,115.0
44640,2021-06-30 02:00:00,10.0,27.0,65.0,1.7,123.0
44641,2021-06-30 02:00:00,13.0,29.0,60.0,2.1,129.0
44642,2021-06-30 02:00:00,16.0,28.0,60.0,2.3,149.0
...,...,...,...,...,...,...
230259,2022-07-01 08:00:00,60.0,26.0,80.0,2.5,78.0
230260,2022-07-01 08:00:00,61.0,26.0,85.0,2.1,76.0
230261,2022-07-01 08:00:00,62.0,25.0,85.0,1.7,65.0
230262,2022-07-01 08:00:00,63.0,25.0,85.0,1.6,43.0


In [None]:
#14시 예보 기준
def make_14_temp(df): 
    df['Forecast time'] = pd.to_datetime(df['Forecast time'])
    fcst_14 = df[df['Forecast time'].dt.hour==14]
    fcst_14 = fcst_14[(fcst_14['forecast']>=10)&(fcst_14['forecast']<=33)]
    def to_date(x):
        return pd.DateOffset(hours=x)
    fcst_14['datetime'] = fcst_14['Forecast time'] + fcst_14['forecast'].map(to_date)
    
    fcst_14_ = pd.DataFrame()
    fcst_14_['datetime'] = pd.date_range(start='2021-07-01 00:00:00', end='2022-07-01 23:00:00', freq='H')
    
    fcst_14_ = pd.merge(fcst_14_, fcst_14, on='datetime', how='outer')
    fcst_14_df = fcst_14_[['datetime','temperature']]
    fcst_14_df['temperature'] = fcst_14_df['temperature'].astype('float').interpolate()
    return fcst_14_df 

In [None]:
AIfactory_wind_14 = make_14_temp(AIfactory_wind)
AIfactory_wind_14

Unnamed: 0,datetime,temperature
0,2021-07-01 00:00:00,20.000000
1,2021-07-01 01:00:00,19.666667
2,2021-07-01 02:00:00,19.333333
3,2021-07-01 03:00:00,19.000000
4,2021-07-01 04:00:00,18.666667
...,...,...
8779,2022-07-01 19:00:00,27.000000
8780,2022-07-01 20:00:00,26.000000
8781,2022-07-01 21:00:00,25.000000
8782,2022-07-01 22:00:00,24.000000


In [None]:
wind_fcst_14_f= pd.merge(wind_fcst_14, AIfactory_wind_14, on='datetime', how='outer')
wind_new_forecast = wind_fcst_14_f[['datetime','temperature','precipitationform','precipitationprob','humidity','windspeed','winddirection','cloud','precipitation','snow']]
wind_new_forecast

Unnamed: 0,datetime,temperature,precipitationform,precipitationprob,humidity,windspeed,winddirection,cloud,precipitation,snow
0,2021-07-01 00:00:00,20.000000,0.0,0.0,90.0,1.1,90.0,1.0,0.0,0.0
1,2021-07-01 01:00:00,19.666667,0.0,0.0,90.0,1.2,90.0,1.0,0.0,0.0
2,2021-07-01 02:00:00,19.333333,0.0,0.0,90.0,1.1,85.0,1.0,0.0,0.0
3,2021-07-01 03:00:00,19.000000,0.0,0.0,90.0,1.1,80.0,1.0,0.0,0.0
4,2021-07-01 04:00:00,18.666667,0.0,0.0,90.0,1.1,80.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
8779,2022-07-01 19:00:00,27.000000,0.0,0.0,70.0,1.8,142.0,1.0,0.0,0.0
8780,2022-07-01 20:00:00,26.000000,0.0,0.0,80.0,1.5,140.0,1.0,0.0,0.0
8781,2022-07-01 21:00:00,25.000000,0.0,0.0,85.0,1.5,133.0,1.0,0.0,0.0
8782,2022-07-01 22:00:00,24.000000,0.0,0.0,90.0,1.4,126.0,1.0,0.0,0.0


In [None]:
wind_new_forecast.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8784 entries, 0 to 8783
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   datetime           8784 non-null   datetime64[ns]
 1   temperature        8784 non-null   float64       
 2   precipitationform  8784 non-null   float64       
 3   precipitationprob  8784 non-null   float64       
 4   humidity           8784 non-null   float64       
 5   windspeed          8784 non-null   float64       
 6   winddirection      8784 non-null   float64       
 7   cloud              8784 non-null   float64       
 8   precipitation      8784 non-null   float64       
 9   snow               8784 non-null   float64       
dtypes: datetime64[ns](1), float64(9)
memory usage: 754.9 KB


## 구예보+신예보 결합

In [None]:
# 구예보 가져오기
wind_old_forecast = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/AIfactory_에너지 인공지능 경진대회/new_data/wind_old_forecast14.csv')
wind_old_forecast['datetime'] = pd.to_datetime(wind_old_forecast['datetime'])
wind_old_forecast.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7344 entries, 0 to 7343
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   datetime           7344 non-null   datetime64[ns]
 1   temperature        7344 non-null   float64       
 2   precipitationform  7344 non-null   float64       
 3   precipitationprob  7344 non-null   float64       
 4   humidity           7344 non-null   float64       
 5   windspeed          7344 non-null   float64       
 6   winddirection      7344 non-null   float64       
 7   cloud              7344 non-null   float64       
 8   precipitation      7344 non-null   float64       
 9   snow               7344 non-null   float64       
dtypes: datetime64[ns](1), float64(9)
memory usage: 573.9 KB


In [None]:
wind_old_forecast

Unnamed: 0,datetime,temperature,precipitationform,precipitationprob,humidity,windspeed,winddirection,cloud,precipitation,snow
0,2020-08-29 00:00:00,26.000000,0.0,30.000000,95.000000,1.400000,126.000000,4.000000,0.0,0.0
1,2020-08-29 01:00:00,26.000000,0.0,30.000000,95.000000,1.333333,125.666667,4.000000,0.0,0.0
2,2020-08-29 02:00:00,26.000000,0.0,30.000000,95.000000,1.266667,125.333333,4.000000,0.0,0.0
3,2020-08-29 03:00:00,26.000000,0.0,30.000000,95.000000,1.200000,125.000000,4.000000,0.0,0.0
4,2020-08-29 04:00:00,26.000000,0.0,30.000000,95.000000,1.266667,123.333333,4.000000,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
7339,2021-06-30 19:00:00,26.666667,0.0,10.000000,66.666667,1.733333,147.000000,2.333333,0.0,0.0
7340,2021-06-30 20:00:00,25.333333,0.0,10.000000,73.333333,1.366667,149.000000,1.666667,0.0,0.0
7341,2021-06-30 21:00:00,24.000000,0.0,10.000000,80.000000,1.000000,151.000000,1.000000,0.0,0.0
7342,2021-06-30 22:00:00,22.666667,0.0,6.666667,83.333333,1.033333,130.666667,1.000000,0.0,0.0


In [None]:
#2020-08-29 00:00:00 ~ 2022-07-01 23:00:00	
wind_forecast_f= pd.concat([wind_old_forecast, wind_new_forecast],axis=0)
wind_forecast_f

Unnamed: 0,datetime,temperature,precipitationform,precipitationprob,humidity,windspeed,winddirection,cloud,precipitation,snow
0,2020-08-29 00:00:00,26.0,0.0,30.0,95.0,1.400000,126.000000,4.0,0.0,0.0
1,2020-08-29 01:00:00,26.0,0.0,30.0,95.0,1.333333,125.666667,4.0,0.0,0.0
2,2020-08-29 02:00:00,26.0,0.0,30.0,95.0,1.266667,125.333333,4.0,0.0,0.0
3,2020-08-29 03:00:00,26.0,0.0,30.0,95.0,1.200000,125.000000,4.0,0.0,0.0
4,2020-08-29 04:00:00,26.0,0.0,30.0,95.0,1.266667,123.333333,4.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
8779,2022-07-01 19:00:00,27.0,0.0,0.0,70.0,1.800000,142.000000,1.0,0.0,0.0
8780,2022-07-01 20:00:00,26.0,0.0,0.0,80.0,1.500000,140.000000,1.0,0.0,0.0
8781,2022-07-01 21:00:00,25.0,0.0,0.0,85.0,1.500000,133.000000,1.0,0.0,0.0
8782,2022-07-01 22:00:00,24.0,0.0,0.0,90.0,1.400000,126.000000,1.0,0.0,0.0


In [None]:
wind_forecast_f.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 16128 entries, 0 to 8783
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   datetime           16128 non-null  datetime64[ns]
 1   temperature        16128 non-null  float64       
 2   precipitationform  16128 non-null  float64       
 3   precipitationprob  16128 non-null  float64       
 4   humidity           16128 non-null  float64       
 5   windspeed          16128 non-null  float64       
 6   winddirection      16128 non-null  float64       
 7   cloud              16128 non-null  float64       
 8   precipitation      16128 non-null  float64       
 9   snow               16128 non-null  float64       
dtypes: datetime64[ns](1), float64(9)
memory usage: 1.4 MB


In [None]:
wind_forecast_f.to_csv('/content/drive/MyDrive/Colab Notebooks/AIfactory_에너지 인공지능 경진대회/new_data/wind_forecast_f.csv', index=False) 

# <태양광: 인천 청라3동 예보>

In [None]:
solar_data_path = '/content/drive/MyDrive/Colab Notebooks/AIfactory_에너지 인공지능 경진대회/raw_data/기상청/청라3동_신예보'
path_list2 = sorted(glob(solar_data_path+'/*.csv'))
path_list2

['/content/drive/MyDrive/Colab Notebooks/AIfactory_에너지 인공지능 경진대회/raw_data/기상청/청라3동_신예보/청라3동_1시간강수량_20210629_20220630.csv',
 '/content/drive/MyDrive/Colab Notebooks/AIfactory_에너지 인공지능 경진대회/raw_data/기상청/청라3동_신예보/청라3동_1시간기온_20210630_20220630.csv',
 '/content/drive/MyDrive/Colab Notebooks/AIfactory_에너지 인공지능 경진대회/raw_data/기상청/청라3동_신예보/청라3동_1시간적설_20210629_20220630.csv',
 '/content/drive/MyDrive/Colab Notebooks/AIfactory_에너지 인공지능 경진대회/raw_data/기상청/청라3동_신예보/청라3동_강수형태_20210629_20220630.csv',
 '/content/drive/MyDrive/Colab Notebooks/AIfactory_에너지 인공지능 경진대회/raw_data/기상청/청라3동_신예보/청라3동_강수확률_20210629_20220630.csv',
 '/content/drive/MyDrive/Colab Notebooks/AIfactory_에너지 인공지능 경진대회/raw_data/기상청/청라3동_신예보/청라3동_습도_20210629_20220630.csv',
 '/content/drive/MyDrive/


<H3>1시간 간격 신예보 데이터 전처리

*온도의 신예보 데이터는 주최측 예보데이터에서 가져올 것이라 제외

In [None]:
solar_df = preprocessing(path_list2)

In [None]:
solar_df.isnull().sum()

forecast             0
precipitation        0
snow                 0
precipitationform    0
precipitationprob    0
humidity             0
windspeed            0
winddirection        0
cloud                0
date                 0
forecast_time        0
datetime             0
dtype: int64

In [None]:
solar_fcst_14 = make_14(solar_df)
solar_fcst_14 = solar_fcst_14[['datetime','precipitationform','precipitationprob','humidity','windspeed','winddirection','cloud','precipitation','snow']]
solar_fcst_14

Unnamed: 0,datetime,precipitationform,precipitationprob,humidity,windspeed,winddirection,cloud,precipitation,snow
0,2021-07-01 00:00:00,0.0,20.0,85.0,0.2,153.0,3.0,0.0,0.0
1,2021-07-01 01:00:00,0.0,20.0,85.0,0.7,172.0,3.0,0.0,0.0
2,2021-07-01 02:00:00,0.0,0.0,85.0,0.5,158.0,1.0,0.0,0.0
3,2021-07-01 03:00:00,0.0,0.0,90.0,0.4,315.0,1.0,0.0,0.0
4,2021-07-01 04:00:00,0.0,20.0,85.0,0.7,8.0,3.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...
8779,2022-07-01 19:00:00,0.0,0.0,85.0,2.4,262.0,1.0,0.0,0.0
8780,2022-07-01 20:00:00,0.0,0.0,90.0,2.1,267.0,1.0,0.0,0.0
8781,2022-07-01 21:00:00,0.0,0.0,90.0,1.5,257.0,1.0,0.0,0.0
8782,2022-07-01 22:00:00,0.0,20.0,90.0,0.9,241.0,3.0,0.0,0.0


<H3>온도 데이터 결합

*기상청 온도 예보 데이터는 결측치가 있어서, AIfatory 제공 데이터에서 온도 데이터 가져와서 추가

In [None]:
# 주최측 제공 예보 데이터 가져오기
AIfactory_solar = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/AIfactory_에너지 인공지능 경진대회/raw_data/AIfactory/solar_forecast_weather.csv')
AIfactory_solar = AIfactory_solar[(AIfactory_solar['Forecast time']>='2021-06-30 00:00:00')&(AIfactory_solar['Forecast time']<='2022-07-01 23:00:00')].copy()
AIfactory_solar

Unnamed: 0,Forecast time,forecast,temperature,humidity,windspeed,winddirection
44638,2021-06-30 02:00:00,4.0,21.0,55.0,1.8,135.0
44639,2021-06-30 02:00:00,7.0,24.0,55.0,2.2,249.0
44640,2021-06-30 02:00:00,10.0,26.0,70.0,2.9,44.0
44641,2021-06-30 02:00:00,13.0,27.0,85.0,3.4,47.0
44642,2021-06-30 02:00:00,16.0,26.0,85.0,2.7,63.0
...,...,...,...,...,...,...
229243,2022-07-01 08:00:00,60.0,26.0,90.0,3.6,193.0
229244,2022-07-01 08:00:00,61.0,25.0,90.0,4.0,189.0
229245,2022-07-01 08:00:00,62.0,25.0,95.0,3.8,192.0
229246,2022-07-01 08:00:00,63.0,24.0,95.0,3.4,197.0


In [None]:
AIfactory_solar_14 = make_14_temp(AIfactory_solar)
AIfactory_solar_14

Unnamed: 0,datetime,temperature
0,2021-07-01 00:00:00,22.000000
1,2021-07-01 01:00:00,22.000000
2,2021-07-01 02:00:00,22.000000
3,2021-07-01 03:00:00,22.000000
4,2021-07-01 04:00:00,21.666667
...,...,...
8779,2022-07-01 19:00:00,26.000000
8780,2022-07-01 20:00:00,25.000000
8781,2022-07-01 21:00:00,24.000000
8782,2022-07-01 22:00:00,24.000000


In [None]:
solar_fcst_14_f= pd.merge(solar_fcst_14, AIfactory_solar_14, on='datetime', how='outer')
solar_new_forecast = solar_fcst_14_f[['datetime','temperature','precipitationform','precipitationprob','humidity','windspeed','winddirection','cloud','precipitation','snow']]
solar_new_forecast

Unnamed: 0,datetime,temperature,precipitationform,precipitationprob,humidity,windspeed,winddirection,cloud,precipitation,snow
0,2021-07-01 00:00:00,22.000000,0.0,20.0,85.0,0.2,153.0,3.0,0.0,0.0
1,2021-07-01 01:00:00,22.000000,0.0,20.0,85.0,0.7,172.0,3.0,0.0,0.0
2,2021-07-01 02:00:00,22.000000,0.0,0.0,85.0,0.5,158.0,1.0,0.0,0.0
3,2021-07-01 03:00:00,22.000000,0.0,0.0,90.0,0.4,315.0,1.0,0.0,0.0
4,2021-07-01 04:00:00,21.666667,0.0,20.0,85.0,0.7,8.0,3.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
8779,2022-07-01 19:00:00,26.000000,0.0,0.0,85.0,2.4,262.0,1.0,0.0,0.0
8780,2022-07-01 20:00:00,25.000000,0.0,0.0,90.0,2.1,267.0,1.0,0.0,0.0
8781,2022-07-01 21:00:00,24.000000,0.0,0.0,90.0,1.5,257.0,1.0,0.0,0.0
8782,2022-07-01 22:00:00,24.000000,0.0,20.0,90.0,0.9,241.0,3.0,0.0,0.0


In [None]:
solar_new_forecast.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8784 entries, 0 to 8783
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   datetime           8784 non-null   datetime64[ns]
 1   temperature        8784 non-null   float64       
 2   precipitationform  8784 non-null   float64       
 3   precipitationprob  8784 non-null   float64       
 4   humidity           8784 non-null   float64       
 5   windspeed          8784 non-null   float64       
 6   winddirection      8784 non-null   float64       
 7   cloud              8784 non-null   float64       
 8   precipitation      8784 non-null   float64       
 9   snow               8784 non-null   float64       
dtypes: datetime64[ns](1), float64(9)
memory usage: 754.9 KB


## 구예보+신예보 결합

In [None]:
# 구예보 가져오기
solar_old_forecast = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/AIfactory_에너지 인공지능 경진대회/new_data/solar_old_forecast14.csv')
solar_old_forecast['datetime'] = pd.to_datetime(solar_old_forecast['datetime'])
solar_old_forecast.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7344 entries, 0 to 7343
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   datetime           7344 non-null   datetime64[ns]
 1   temperature        7344 non-null   float64       
 2   precipitationform  7344 non-null   float64       
 3   precipitationprob  7344 non-null   float64       
 4   humidity           7344 non-null   float64       
 5   windspeed          7344 non-null   float64       
 6   winddirection      7344 non-null   float64       
 7   cloud              7344 non-null   float64       
 8   precipitation      7344 non-null   float64       
 9   snow               7344 non-null   float64       
dtypes: datetime64[ns](1), float64(9)
memory usage: 573.9 KB


In [None]:
solar_old_forecast

Unnamed: 0,datetime,temperature,precipitationform,precipitationprob,humidity,windspeed,winddirection,cloud,precipitation,snow
0,2020-08-29 00:00:00,26.000000,0.0,20.000000,90.000000,0.900000,174.000000,3.000000,0.0,0.0
1,2020-08-29 01:00:00,26.000000,0.0,23.333333,91.666667,0.900000,176.000000,3.333333,0.0,0.0
2,2020-08-29 02:00:00,26.000000,0.0,26.666667,93.333333,0.900000,178.000000,3.666667,0.0,0.0
3,2020-08-29 03:00:00,26.000000,0.0,30.000000,95.000000,0.900000,180.000000,4.000000,0.0,0.0
4,2020-08-29 04:00:00,26.000000,0.0,30.000000,95.000000,0.900000,138.000000,4.000000,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
7339,2021-06-30 19:00:00,25.000000,0.0,0.000000,73.333333,2.766667,284.666667,1.000000,0.0,0.0
7340,2021-06-30 20:00:00,24.000000,0.0,0.000000,76.666667,1.933333,296.333333,1.000000,0.0,0.0
7341,2021-06-30 21:00:00,23.000000,0.0,0.000000,80.000000,1.100000,308.000000,1.000000,0.0,0.0
7342,2021-06-30 22:00:00,22.666667,0.0,10.000000,81.666667,0.800000,256.333333,2.000000,0.0,0.0


In [None]:
#2020-08-29 00:00:00 ~ 2022-07-01 23:00:00	
solar_forecast_f= pd.concat([solar_old_forecast, solar_new_forecast],axis=0)
solar_forecast_f

Unnamed: 0,datetime,temperature,precipitationform,precipitationprob,humidity,windspeed,winddirection,cloud,precipitation,snow
0,2020-08-29 00:00:00,26.0,0.0,20.000000,90.000000,0.9,174.0,3.000000,0.0,0.0
1,2020-08-29 01:00:00,26.0,0.0,23.333333,91.666667,0.9,176.0,3.333333,0.0,0.0
2,2020-08-29 02:00:00,26.0,0.0,26.666667,93.333333,0.9,178.0,3.666667,0.0,0.0
3,2020-08-29 03:00:00,26.0,0.0,30.000000,95.000000,0.9,180.0,4.000000,0.0,0.0
4,2020-08-29 04:00:00,26.0,0.0,30.000000,95.000000,0.9,138.0,4.000000,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
8779,2022-07-01 19:00:00,26.0,0.0,0.000000,85.000000,2.4,262.0,1.000000,0.0,0.0
8780,2022-07-01 20:00:00,25.0,0.0,0.000000,90.000000,2.1,267.0,1.000000,0.0,0.0
8781,2022-07-01 21:00:00,24.0,0.0,0.000000,90.000000,1.5,257.0,1.000000,0.0,0.0
8782,2022-07-01 22:00:00,24.0,0.0,20.000000,90.000000,0.9,241.0,3.000000,0.0,0.0


In [None]:
solar_forecast_f.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 16128 entries, 0 to 8783
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   datetime           16128 non-null  datetime64[ns]
 1   temperature        16128 non-null  float64       
 2   precipitationform  16128 non-null  float64       
 3   precipitationprob  16128 non-null  float64       
 4   humidity           16128 non-null  float64       
 5   windspeed          16128 non-null  float64       
 6   winddirection      16128 non-null  float64       
 7   cloud              16128 non-null  float64       
 8   precipitation      16128 non-null  float64       
 9   snow               16128 non-null  float64       
dtypes: datetime64[ns](1), float64(9)
memory usage: 1.4 MB


In [None]:
solar_forecast_f.to_csv('/content/drive/MyDrive/Colab Notebooks/AIfactory_에너지 인공지능 경진대회/new_data/solar_forecast_f.csv', index=False) 