In [1]:
import pandas as pd
import numpy as np
import requests
from pandas.io.json import json_normalize
import json
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
import xgboost as xgb
from matplotlib import rc
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import GradientBoostingRegressor
from scipy.stats import randint
import time
import warnings
warnings.filterwarnings('ignore')

In [2]:
_API_URL = 'https://research-api.dershare.xyz'
# _API_KEY 직접 입력
_API_KEY = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJoSDROSE5yNXFiTjV4RmpBRzdHNFo4IiwiaWF0IjoxNjY4MjYyOTgwLCJleHAiOjE2Njg3ODM2MDAsInR5cGUiOiJhcGlfa2V5In0.a_Ayq59em7dCGyWvv8oLIFfEpQG9j7zeIhQMeR6xvO4'
_AUTH_PARAM = {'headers': {'Authorization': f'Bearer {_API_KEY}'}}

In [3]:
class make_dataset:
    def __init__(self, weather_info_csv, num):
        self.weather_info_csv = weather_info_csv
        self.num = num
        
    def make_table(self):
        weather_info_csv = self.weather_info_csv
        num = self.num
        df_electronic = pd.read_csv('gens.csv')
        df_electronic['time'] = df_electronic['time'].str.split('+').str[0]
        df_electronic['time'] = pd.to_datetime(df_electronic['time'])
        df_weather = pd.read_csv(weather_info_csv)
        df_weather = df_weather.rename(columns = {'id' : 'weather_id'})
        df_weather['time'] = df_weather['time'].str.split('+').str[0]
        df_weather['time'] = pd.to_datetime(df_weather['time'])
        df_num = df_electronic[df_electronic['id'] == num]
        df_weather['time'] = df_weather['time'].dt.round(freq = 'H')  
        df_weather.drop_duplicates(['time'], inplace = True)
        df_info = pd.merge(df_weather,df_num, how='outer')
        df_info = df_info.dropna()
        df_info = df_info.drop(['id'], axis = 1)
        return df_info
    
    def concat_table(self):
        df_info = self.make_table()
        df_11 = make_dataset('발전소11_기상정보.csv', 11)
        df_11 = df_11.make_table()
        df_12 = make_dataset('발전소12_기상정보.csv', 12)
        df_12 = df_12.make_table()
        df_13 = make_dataset('발전소13_기상정보.csv', 13)
        df_13 = df_13.make_table()
        df_14 = make_dataset('발전소14_기상정보.csv', 14)
        df_14 = df_14.make_table()    
        df_train_table = pd.concat([df_11, df_12, df_13, df_14])
        df_train_table = df_train_table.dropna()
        return df_train_table
    
    def scaling_train_data(self):
        df_train_table = self.concat_table()
        x = df_train_table.loc[:, 'temperature' : 'precip_1h']
        y = df_train_table.iloc[:, -1]
        scaler = StandardScaler()
        x = scaler.fit_transform(x)
        y = np.array(y).reshape(-1, 1)
        X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state = 1) 
        return X_train, X_test, y_train, y_test
    
    def weather_prediction_table(self):
        id = 1
        date = '2022-11-13' # 매일매일 수정해야한다.
        hour = 4
        forecasts_1 = requests.get(f'https://research-api.dershare.xyz/open-proc/cmpt-2022/weathers/1/{id}/forecasts/{date}/{hour}', headers={'Authorization': f'Bearer {_API_KEY}'}).json()
        table = pd.DataFrame(forecasts_1)
        table = table.drop('fcst_time', axis = 1)
        table['time'] = table['time'].str.split('+').str[0]
        table['time'] = pd.to_datetime(table['time'])
        return table
    
    def final_scaling(self):
        table = self.weather_prediction_table()
        final_x = table.loc[:, 'temperature' : 'precip_1h']
        return final_x
        
    def running_model(self):
        X_train, X_test, y_train, y_test = self.scaling_train_data()
        base_models = [
        ('XGB', xgb.XGBRegressor(subsample=0.6,n_estimators=1000,
                                 min_child_weight=2,max_depth=5,
                                 learning_rate=0.02,gamma=0.05)),
        ('SVR',SVR()),
        ('Random Forest',RandomForestRegressor(random_state =1, max_features = 5, 
                                               n_estimators = 103, max_depth = 189)),
        ('MLP Regression',MLPRegressor(random_state=1, max_iter=500)),
        ('Gradient Boostiong', GradientBoostingRegressor(learning_rate = 0.01, max_depth = 157, 
                                                         min_samples_split = 125, n_estimators = 166))]
        stacked = StackingRegressor(
        estimators = base_models,
        final_estimator = LinearRegression(), cv = 5)
    
        for name, model in base_models:
            start_time = time.time()
            model.fit(X_train, y_train)
            prediction = model.predict(X_test)
            end_time = time.time()
            r2 = model.score(X_test, y_test)
            rmse = mean_squared_error(y_test, prediction, squared = False)
            print("-------{}-------".format(name))
            print("Coefficient of determination: {}".format(r2))
            print("Root Mean Squared Error: {}".format(rmse))
            print("Computation Time: {}".format(end_time - start_time))
            print("----------------------------------\n")
    
        start_time = time.time()
        stacked.fit(X_train, y_train)    
        stacked_prediction = stacked.predict(X_test)
        end_time = time.time()
        stacked_r2 = stacked.score(X_test, y_test)
        stacked_rmse = mean_squared_error(y_test, stacked_prediction, squared = False)
        print("-------Stacked Ensemble-------")
        print("Coefficient of determination: {}".format(stacked_r2))
        print("Root Mean Squared Error: {}".format(stacked_rmse))
        print("Computation Time: {}".format(end_time - start_time))
        print("----------------------------------")
    
    def final_prediction(self):
        X_train, X_test, y_train, y_test = self.scaling_train_data()
        final_x = self.final_scaling()
        scaler = StandardScaler()
        final_x = scaler.fit_transform(final_x)
        base_models = [
         ('XGB', xgb.XGBRegressor(subsample=0.6,n_estimators=1000,
                                  min_child_weight=2,max_depth=5,
                                  learning_rate=0.02,gamma=0.05)),
         ('SVR',SVR()),
         ('Random Forest',RandomForestRegressor(random_state =1, max_features = 5, 
                                                n_estimators = 103, max_depth = 189)),
         ('MLP Regression',MLPRegressor(random_state=1, max_iter=500)),
         ('Gradient Boostiong', GradientBoostingRegressor(learning_rate = 0.01, max_depth = 157, 
                                                          min_samples_split = 125, n_estimators = 166))]
        stacked = StackingRegressor(
        estimators = base_models,
        final_estimator = LinearRegression(), cv = 5)
        stacked.fit(X_train, y_train)    
        prediction = stacked.predict(final_x)
        return prediction        
#        model = xgb.XGBRegressor(subsample=0.6,n_estimators=1000,
#                                min_child_weight=2,max_depth=5,
#                                 learning_rate=0.02,gamma=0.05)
#        model.fit(X_train, y_train)
#        prediction = model.predict(final_x)
#        return prediction
    
    def final_table(self):
        prediction = self.final_prediction()
        table = self.weather_prediction_table()
        table['predict'] = prediction
        return table

In [4]:
data = make_dataset('발전소11_기상정보.csv', 11)

In [8]:
df2 = data.final_table()

KeyError: ignored

In [None]:
df1.to_csv("postech_1113예측.csv")