In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sqlalchemy import create_engine
import pymysql
from IPython.display import clear_output
from tqdm import tqdm

In [2]:
import tensorflow.keras as keras




In [4]:
locations = {
    "gapyeong": "(654)",
    "gangneung": "(189, 580, 581, 584)",
    "gangjin": "(982)",
    "geochang": "(274, 280)",
    "changwon": "(146, 294, 092, 289)",
    "gyeongsan": "(131, 524)",
    "gyeongju": "(133, 135, 706, 708)",
    "gyeryong": "(109)",
    "goryeong": "(241, 242)",
    "goseong": "(512, 513, 514)",
    "gochangeup": "(504, 505, 567)",
    "goheung": "(986)",
    "gokseong": "(263, 265, 266)",
    "gongju": "(575, 576, 577, 595, 603, 604, 605)",
    "gwangyang": "(272, 273, 275, 276)",
    "gwangjusi": "(261, 569, 167, 552, 553, 718, 719)",
    "gwangju": "(191, 192)",
    "goesan": "(563, 564)",
    "gurye": "(558)",
    "guri": "(062, 063)",
    "gumi": "(123, 125, 134)",
    "gunsan": "(227, 228)",
    "gunwi": "(205)",
    "gunpo": "(214, 215)",
    "geumsan": "(200, 204)",
    "gimje": "(160, 161, 501)",
    "gimcheon": "(121, 122, 130, 537)",
    "gimpo": "(068)",
    "gimhae": "(091, 147, 148, 149, 245, 252, 628, 755, 756, 757)",
    "naju": "(550, 551)",
    "namyangju": "(074, 651, 652, 764)",
    "namwon": "(247, 264, 269, 559, 586)",
    "nonsan": "(155, 606, 607)",
    "danyang": "(521, 522)",
    "damyang": "(249, 262)",
    "dangjin": "(287, 288, 571)",
    "daegu": "(230, 621, 777, 778, 779, 127, 058, 129, 201, 774, 775, 776, 623, 519, 057, 231, 232, 233, 772, 773)",
    "daejeon": "(114, 115, 119, 154, 153, 151, 152, 113)",
    "donghae": "(582, 583)",
    "mokpo": "(509)",
    "muan": "(056, 508, 547, 548)",
    "muju": "(164, 168)",
    "mungyeong": "(565)"
}

In [5]:
def combine_date(col1, col2):
    result = str(col1)
    if len(col2) < 2 :
        result += "0"
    result += str(col2)
    return result

def sum_data(data_list):
    result = 0.0
    for data in data_list:
        if data != "":
            result += data
    return result

In [7]:
for key in tqdm(locations.keys()):
    print(f'\n\n {key} 지역 모델 작업중\n\n')
    conn = create_engine("mysql+pymysql://jeogi:1234@10.10.21.86:3306/jeogi?charset=utf8")
    data_raw = pd.read_sql(f"SELECT * FROM {key}",conn,index_col="index")
    conn.dispose()
    data = data_raw.loc[data_raw.Time < 24]
    data.Date = data.Date.astype(str)
    data.Time = data.Time.astype(str)
    data["DateTime"] = data.apply(lambda row : combine_date(row["Date"],row["Time"]),axis=1)
    data["move"] = data.apply(lambda row : sum_data([row["sum(Type1)"],row["sum(Type2)"],row["sum(Type3)"],row["sum(Type4)"],row["sum(Type5)"],row["sum(Type6)"]]),axis=1)
    data = data[["DateTime","EntranceAndExit","move"]].sort_values(["DateTime"]).reset_index().iloc[:,1:]
    in_data = data.loc[data.DateTime > "2020123123"].loc[data.EntranceAndExit == 0].reset_index().iloc[:,1:].rename(columns={"move":"in"})
    out_data = data.loc[data.DateTime > "2020123123"].loc[data.EntranceAndExit == 1].reset_index().iloc[:,1:].rename(columns={"move":"out"})
    refined_data = pd.merge(in_data, out_data, how="inner", on=["DateTime"])[["DateTime","in","out"]]
    X_train = refined_data[["in","out"]].iloc[:-1,:]
    y_train = refined_data[["in","out"]].iloc[1:,:].reset_index().iloc[:,1:]

    X_val = X_train.iloc[round(len(X_train)*0.8):,:]
    X_train = X_train.iloc[:round(len(X_train)*0.8),:]
    y_val = y_train.iloc[round(len(y_train)*0.8):,:]
    y_train = y_train.iloc[:round(len(y_train)*0.8),:]
    
    
    model = keras.models.Sequential(
        [
            keras.layers.LSTM(2, input_shape=(refined_data[["in","out"]].shape[1],1)),
            keras.layers.Dense(256,activation="relu"),
            keras.layers.Dropout(0.3),
            keras.layers.Dense(64,activation="relu"),
            keras.layers.Dense(2)
        ]
    )

    earlyStoppingCB = keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)
    checkpointCB = keras.callbacks.ModelCheckpoint(f"./models/{key}.h5", save_best_only=True)
    
    model.compile(optimizer="adam", loss="MSE", metrics=["mae"])
    model.fit(X_train,y_train, validation_data=(X_val,y_val), epochs=200, callbacks=[checkpointCB])
    
    clear_output()
    
    model.evaluate(refined_data[["in","out"]].iloc[:-1,:],refined_data[["in","out"]].iloc[1:,:].reset_index().iloc[:,1:])



100%|███████████████████████████████████████████████████████████████████████████████| 43/43 [3:51:19<00:00, 322.78s/it]
