In [1]:
import pandas as pd
import numpy as np
import requests
from pandas.io.json import json_normalize
import json
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
import xgboost as xgb
from matplotlib import rc
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import GradientBoostingRegressor
from scipy.stats import randint
import time
import warnings
warnings.filterwarnings('ignore')

In [2]:
_API_URL = 'https://research-api.dershare.xyz'
# _API_KEY 직접 입력
_API_KEY = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJoSDROSE5yNXFiTjV4RmpBRzdHNFo4IiwiaWF0IjoxNjY4MjY5MTIzLCJleHAiOjE2Njg3ODM2MDAsInR5cGUiOiJhcGlfa2V5In0.AdL4blM81YO_Qmvk3XRL8-L7G9C8YTDprsyC_TY-i30'
_AUTH_PARAM = {'headers': {'Authorization': f'Bearer {_API_KEY}'}}

In [3]:
class make_dataset:
    def __init__(self, weather_info_csv, num):
        self.weather_info_csv = weather_info_csv
        self.num = num
        
    def make_table(self):
        weather_info_csv = self.weather_info_csv
        num = self.num
        df_electronic = pd.read_csv('gens.csv')
        df_electronic['time'] = df_electronic['time'].str.split('+').str[0]
        df_electronic['time'] = pd.to_datetime(df_electronic['time'])
        df_weather = pd.read_csv(weather_info_csv)
        df_weather = df_weather.rename(columns = {'id' : 'weather_id'})
        df_weather['time'] = df_weather['time'].str.split('+').str[0]
        df_weather['time'] = pd.to_datetime(df_weather['time'])
        df_num = df_electronic[df_electronic['id'] == num]
        df_weather['time'] = df_weather['time'].dt.round(freq = 'H')  
        df_weather.drop_duplicates(['time'], inplace = True)
        df_info = pd.merge(df_weather,df_num, how='outer')
        df_info = df_info.dropna()
        df_info = df_info.drop(['id'], axis = 1)
        return df_info
    
    def concat_table(self):
        df_info = self.make_table()
        df_11 = make_dataset('발전소11_기상정보.csv', 11)
        df_11 = df_11.make_table()
        df_12 = make_dataset('발전소12_기상정보.csv', 12)
        df_12 = df_12.make_table()
        df_13 = make_dataset('발전소13_기상정보.csv', 13)
        df_13 = df_13.make_table()
        df_14 = make_dataset('발전소14_기상정보.csv', 14)
        df_14 = df_14.make_table()    
        df_train_table = pd.concat([df_11, df_12, df_13, df_14])
        df_train_table = df_train_table.dropna()
        return df_train_table
    
    def scaling_train_data(self):
        df_train_table = self.concat_table()
        x = df_train_table.loc[:, 'temperature' : 'precip_1h']
        y = df_train_table.iloc[:, -1]
        scaler = StandardScaler()
        x = scaler.fit_transform(x)
        y = np.array(y).reshape(-1, 1)
        X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state = 1) 
        return X_train, X_test, y_train, y_test
    
    def weather_prediction_table(self):
        id = 1
        date = '2022-11-12' # 매일매일 수정해야한다.
        hour = 4
        forecasts_1 = requests.get(f'https://research-api.dershare.xyz/open-proc/cmpt-2022/weathers/1/{id}/forecasts/{date}/{hour}', headers={'Authorization': f'Bearer {_API_KEY}'}).json()
        table = pd.DataFrame(forecasts_1)
        table = table.drop('fcst_time', axis = 1)
        table['time'] = table['time'].str.split('+').str[0]
        table['time'] = pd.to_datetime(table['time'])
        return table
    
    def final_scaling(self):
        table = self.weather_prediction_table()
        final_x = table.loc[:, 'temperature' : 'precip_1h']
        return final_x
        
    def running_model(self):
        X_train, X_test, y_train, y_test = self.scaling_train_data()
        base_models = [
        ('XGB', xgb.XGBRegressor(subsample=0.6,n_estimators=120,
                                 min_child_weight=38,max_depth=131,
                                 learning_rate=0.02,gamma=0.05)),
        ('SVR',SVR()),
        ('Random Forest',RandomForestRegressor(random_state =1, max_features = 6, 
                                               n_estimators = 130, max_depth = 158)),
        ('MLP Regression',MLPRegressor(random_state=1, max_iter=500)),
        ('Gradient Boostiong', GradientBoostingRegressor(learning_rate = 0.01, max_depth = 157, 
                                                         min_samples_split = 125, n_estimators = 166))]
    
        for name, model in base_models:
            start_time = time.time()
            model.fit(X_train, y_train)
            prediction = model.predict(X_test)
            end_time = time.time()
            r2 = model.score(X_test, y_test)
            rmse = mean_squared_error(y_test, prediction, squared = False)
            print("-------{}-------".format(name))
            print("Coefficient of determination: {}".format(r2))
            print("Root Mean Squared Error: {}".format(rmse))
            print("Computation Time: {}".format(end_time - start_time))
            print("----------------------------------\n")
    
        start_time = time.time()
        stacked.fit(X_train, y_train)    
        stacked_prediction = stacked.predict(X_test)
        end_time = time.time()
        stacked_r2 = stacked.score(X_test, y_test)
        stacked_rmse = mean_squared_error(y_test, stacked_prediction, squared = False)
        print("-------Stacked Ensemble-------")
        print("Coefficient of determination: {}".format(stacked_r2))
        print("Root Mean Squared Error: {}".format(stacked_rmse))
        print("Computation Time: {}".format(end_time - start_time))
        print("----------------------------------")
    
    def final_prediction(self):
        X_train, X_test, y_train, y_test = self.scaling_train_data()
        final_x = self.final_scaling()
        scaler = StandardScaler()
        final_x = scaler.fit_transform(final_x)
        base_models = [
         ('XGB', xgb.XGBRegressor(subsample=0.6,n_estimators=1000,
                                  min_child_weight=2,max_depth=5,
                                  learning_rate=0.02,gamma=0.05)),
         ('SVR',SVR()),
         ('Random Forest',RandomForestRegressor(random_state =1, max_features = 5, 
                                                n_estimators = 103, max_depth = 189)),
         ('MLP Regression',MLPRegressor(random_state=1, max_iter=500)),
         ('Gradient Boostiong', GradientBoostingRegressor(learning_rate = 0.01, max_depth = 157, 
                                                          min_samples_split = 125, n_estimators = 166))]
        stacked = StackingRegressor(
        estimators = base_models,
        final_estimator = LinearRegression(), cv = 5)
        stacked.fit(X_train, y_train)    
        prediction = stacked.predict(final_x)
        return prediction        
#        model = xgb.XGBRegressor(subsample=0.6,n_estimators=1000,
#                                 min_child_weight=2,max_depth=5,
#                                 learning_rate=0.02,gamma=0.05)
#        model.fit(X_train, y_train)
#        prediction = model.predict(final_x)
#         return prediction
    
    def final_table(self):
        prediction = self.final_prediction()
        table = self.weather_prediction_table()
        table['predict'] = prediction
        return table

In [4]:
data = make_dataset('발전소11_기상정보.csv', 11)

In [5]:
df= data.final_table()



In [6]:
df.to_csv("postech예측_1113_15PM.csv")

In [7]:
df

Unnamed: 0,time,temperature,humidity,dew_point,wind_dir,wind_spd,uv_idx,visibility,cloudiness,ceiling,precip_prob,precip_1h,predict
0,2022-11-11 20:00:00,10.00000,91.0,8.33333,138.0,5.63270,0.0,16.0934,93.0,9144.0,0.0,0.0,-0.243931
1,2022-11-11 21:00:00,10.55560,90.0,8.88889,134.0,5.63270,0.0,16.0934,95.0,9144.0,2.0,0.0,-0.135845
2,2022-11-11 22:00:00,11.11110,90.0,9.44444,126.0,5.63270,0.0,16.0934,96.0,9144.0,7.0,0.0,-0.187157
3,2022-11-11 23:00:00,12.22220,87.0,10.00000,124.0,5.63270,0.0,16.0934,98.0,9144.0,7.0,0.0,-0.204084
4,2022-11-12 00:00:00,13.88890,77.0,10.00000,129.0,7.40298,1.0,16.0934,99.0,9144.0,7.0,0.0,32.429584
...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,2022-11-14 15:00:00,3.88889,93.0,2.77778,306.0,9.33420,0.0,16.0934,54.0,9144.0,5.0,0.0,0.198577
68,2022-11-14 16:00:00,2.77778,91.0,1.66667,302.0,11.10450,0.0,16.0934,42.0,9144.0,0.0,0.0,1.014496
69,2022-11-14 17:00:00,2.77778,90.0,1.11111,298.0,13.03570,0.0,16.0934,32.0,9144.0,0.0,0.0,1.746038
70,2022-11-14 18:00:00,2.77778,91.0,1.11111,296.0,14.80600,0.0,16.0934,23.0,9144.0,0.0,0.0,1.512704


In [9]:
data.running_model()

-------XGB-------
Coefficient of determination: 0.8768424082483696
Root Mean Squared Error: 7.85293918767872
Computation Time: 33.911524057388306
----------------------------------

-------SVR-------
Coefficient of determination: 0.8486846129066343
Root Mean Squared Error: 8.704487118361502
Computation Time: 193.31806230545044
----------------------------------

-------Random Forest-------
Coefficient of determination: 0.8932084206022788
Root Mean Squared Error: 7.3125718300329785
Computation Time: 24.612250566482544
----------------------------------

-------MLP Regression-------
Coefficient of determination: 0.8800885932771844
Root Mean Squared Error: 7.748754280242792
Computation Time: 119.7559974193573
----------------------------------

-------Gradient Boostiong-------
Coefficient of determination: 0.8540026994473522
Root Mean Squared Error: 8.550156288109676
Computation Time: 71.10402369499207
----------------------------------



NameError: ignored