In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import csv
import datetime as dt

In [2]:
filename = "weather_refine_reduced.csv" #all data

org_data = pd.read_csv(filename, encoding = "utf-8")
data = org_data.values
labels = org_data.axes[1]

#요일 처리 
# 0: sunday ~ 6: saturday
#Day of the Week 요일 반환 func
def getDOW(year, month, day):
  a_date = dt.date(year, month, day)
  weekday = a_date.weekday()
  return weekday

def makeXy(searchLoc, h): #h: 시간 구간별로 묶을 때 사용. h=2로 설정하면 총 12구간(12개 class for classification) 생성
  X = [] # 월 요일 기상데이터~
  y = [] # 시간데이터(N시, N은 0이상 24미만)
  for idx, el in enumerate(data):
    if searchLoc in el[2]:
      DT = el[1].split(" ")
      DATE = DT[0].split("-")
      TIME = DT[1].split(":")[0]
      weekday = getDOW( int(DATE[0]), int(DATE[1]), int(DATE[2]) )
      newX = [int(DATE[1]), weekday] #월, 요일 정보
      newX.extend(el[-6:]) #기상 정보 ('기온(C)','강수량(mm)', '풍속(m/s)', '습도(%)', '일조(hr)', '일사(MJ/m2)')
      X.append(newX)
      y.append(int(TIME)) #시간 정보

  # 시간 구간별로 묶기 위한 처리!
  for idx, el in enumerate(y):
    y[idx] = ( el // h )
  
  return X,y

#Neural Net 학습을 위한 dataset 재가공
def prepXy(X, y, searchLoc):
  with open( searchLoc+".csv", "w", encoding="utf-8") as f:
    f = csv.writer(f)
    f.writerow(['월','요일','기온(C)','강수량(mm)', '풍속(m/s)', '습도(%)', '일조(hr)', '일사(MJ/m2)', '시간'])
    for idx, el in enumerate(X):
      row = []
      for i in el:
        row.append(str(i))
      row.append(y[idx])
      f.writerow(row)

  filename = searchLoc+'.csv'
  df = pd.read_csv(filename, encoding = "utf-8")
  labels = df.axes[1]

  rawX = df.drop(columns=labels[-1:]).values
  rawY = df.drop(columns=labels[:-1]).values

  newY = []
  empty = np.zeros((24,), dtype=int)

  for val in rawY:
    row = empty.copy()
    row[val]=100
    newY.append(row)

  x=np.array(rawX)
  y=np.array(newY)

  return x,y

In [3]:
#NeuralNetwork model
class Net(torch.nn.Module):
    def __init__(self, D_in, H, D_out):
        super(Net, self).__init__()
        self.l1 = nn.Linear(D_in, H)
        self.h1 = nn.Linear(H, 2*H)
        self.h2 = nn.Linear(2*H, 4*H)
        self.l2 = nn.Linear(4*H, D_out)

    def forward(self, x):
        x = F.relu(self.l1(x))
        x = F.relu(self.h1(x))
        x = F.relu(self.h2(x))
        y = F.relu(self.l2(x))
        return y
#Training
def train_NN(x, y, H, lr, epochs): #x,y - training data / H - hidden layer nodes / lr - learning rate
  model = Net(D_in, H, D_out)
  if torch.cuda.is_available():
    x=torch.tensor(x).cuda()
    y=torch.tensor(y).cuda()
    model = model.cuda()
  else:
    x=torch.tensor(x)
    y=torch.tensor(y)
  criterion = torch.nn.MSELoss(reduction='sum')
  optimizer = torch.optim.SGD(model.parameters(), lr=lr)
  for t in range(epochs):
      y_pred = model(x.float())
      loss = criterion(y_pred, y.float())
      #print(t, loss.item())
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
  return model

#evaluation
def eval_NN(x, y, model): #x.y - evaluation data
  correct = 0
  total = 0

  preds = {}
  truths = {}

  for idx, inp in enumerate(x):
    total+=1
    y_pred = model(torch.tensor(inp).cuda().float()).cpu().detach().numpy()
    y_pred = list(y_pred).index(max(y_pred))
    y_truth = list(y[idx]).index(max(y[idx]))

    if y_truth == y_pred: correct+=1
    if y_truth in truths: truths[y_truth]+=1
    else: truths[y_truth]=1
    if y_pred in preds: preds[y_pred]+=1
    else: preds[y_pred]=1

  #print(f'{correct*100/total}% correct\n')
  '''
  for i in range(24):
    try: print(f'{i}시: {truths[i]}', end=" ")
    except: print(f'{i}시: 0', end=" ")
    try: print(f'/ {preds[i]}')
    except: print('/ 0')
  '''
  return round(correct*100/total,1)

In [4]:
#RandomForest model

from sklearn.ensemble import RandomForestClassifier

def train_RF(trainX, trainY):  
  model = RandomForestClassifier()
  model.fit(trainX, trainY)
  return model

def eval_RF(testX, testY, model):
  correct = 0
  total = 0
  preds = dict() #예측값을 저장
  truths = dict() #실제값을 저장 
  right = dict() #예측=실제인 경우들 저장
  for idx, row in enumerate(testX):
    pred = model.predict( row.reshape(1,-1))[0]
    truth = testY[idx]
    if pred not in preds: preds[pred] = 0
    else: preds[pred] += 1
    if truth not in truths: truths[truth] = 0
    else: truths[truth] += 1
    total += 1
    if pred == truth:
      correct += 1
      if pred not in right: right[pred] = 0
      else: right[pred] += 1
  acc = round(100*correct/total, 2)
  #print(f'{acc}% accurate')
  return [acc, preds, truths, right]

In [5]:
def Fold(k,n, X, y): # n = 0~k-1로 바꿔가며 각각 fold dataset 만들기
  trainX = []
  trainY = []
  testX = []
  testY = []
  for idx, row in enumerate(X):
    if idx%k == n:
      testX.append(row)
      testY.append(y[idx])
    else:
      trainX.append(row)
      trainY.append(y[idx])
  return {'trainX':trainX, 'trainY':trainY, 'testX':testX, 'testY':testY}

#RandomForest k-fold evaluation
def kFoldEval_RF(k, X, y):
  folds = [] #dataset for k-fold evaluation
  for i in range(k):
    folds.append(Fold(k,i, X, y))

  results = []
  acc = 0
  for s in folds:
    model = train_RF(s['trainX'],s['trainY'])
    ev = eval_RF(s['testX'],s['testY'], model)
    acc += ev[0]
    results.append(ev)
  print(f'{round(acc/k,2)}% accuracy for {k}-fold cross validation')
  return [results, acc/k]

#NeuralNetwork k-fold evaluation
def kFoldEval_NN(k, X, y):
  folds = [] #dataset for k-fold evaluation
  for i in range(k):
    folds.append(Fold(k,i, X, y))

  results = []
  acc = 0
  for s in folds:
    model = train_NN(s['trainX'],s['trainY'], H, lr, epochs)
    ev = eval_NN(s['testX'],s['testY'], model)
    acc += ev
    results.append(ev)
  print(f'{round(acc/k,2)}% accuracy for {k}-fold cross validation')
  return [results, acc/k]

In [6]:
#검색 장소
topLocs = ['안양동', '본오동', '선부동', '중동', '태평동', '정자동', '상대원동', '산본동', '부곡동', '매탄동', '상동', '고잔동', '화정동']

In [10]:
for searchLoc in topLocs:
  x,y = makeXy(searchLoc, 1)
  X, Y = prepXy(x, y, searchLoc)
  D_in, D_out = 8, 24 #input, output 
  H, lr, epochs = 96, 1e-7, 20000
  print("\n\nAccuracies for",searchLoc,":")
  print("\tRandomForest:\t", end="")
  kFoldEval_RF(10, np.array(x), np.array(y))
  print("\tNeuralNetwork:\t", end="")
  kFoldEval_NN(10, X, Y)



Accuracies for 안양동 :
	RandomForest:	26.61% accuracy for 10-fold cross validation
	NeuralNetwork:	16.68% accuracy for 10-fold cross validation


Accuracies for 본오동 :
	RandomForest:	29.86% accuracy for 10-fold cross validation
	NeuralNetwork:	18.89% accuracy for 10-fold cross validation


Accuracies for 선부동 :
	RandomForest:	28.64% accuracy for 10-fold cross validation
	NeuralNetwork:	17.27% accuracy for 10-fold cross validation


Accuracies for 중동 :
	RandomForest:	26.01% accuracy for 10-fold cross validation
	NeuralNetwork:	13.38% accuracy for 10-fold cross validation


Accuracies for 태평동 :
	RandomForest:	31.21% accuracy for 10-fold cross validation
	NeuralNetwork:	21.65% accuracy for 10-fold cross validation


Accuracies for 정자동 :
	RandomForest:	21.02% accuracy for 10-fold cross validation
	NeuralNetwork:	12.72% accuracy for 10-fold cross validation


Accuracies for 상대원동 :
	RandomForest:	25.14% accuracy for 10-fold cross validation
	NeuralNetwork:	13.5% accuracy for 10-fold cross vali

In [12]:
inp='''Accuracies for 안양동 :
	RandomForest:	26.61% accuracy for 10-fold cross validation
	NeuralNetwork:	16.68% accuracy for 10-fold cross validation


Accuracies for 본오동 :
	RandomForest:	29.86% accuracy for 10-fold cross validation
	NeuralNetwork:	18.89% accuracy for 10-fold cross validation


Accuracies for 선부동 :
	RandomForest:	28.64% accuracy for 10-fold cross validation
	NeuralNetwork:	17.27% accuracy for 10-fold cross validation


Accuracies for 중동 :
	RandomForest:	26.01% accuracy for 10-fold cross validation
	NeuralNetwork:	13.38% accuracy for 10-fold cross validation


Accuracies for 태평동 :
	RandomForest:	31.21% accuracy for 10-fold cross validation
	NeuralNetwork:	21.65% accuracy for 10-fold cross validation


Accuracies for 정자동 :
	RandomForest:	21.02% accuracy for 10-fold cross validation
	NeuralNetwork:	12.72% accuracy for 10-fold cross validation


Accuracies for 상대원동 :
	RandomForest:	25.14% accuracy for 10-fold cross validation
	NeuralNetwork:	13.5% accuracy for 10-fold cross validation


Accuracies for 산본동 :
	RandomForest:	28.3% accuracy for 10-fold cross validation
	NeuralNetwork:	18.76% accuracy for 10-fold cross validation


Accuracies for 부곡동 :
	RandomForest:	27.43% accuracy for 10-fold cross validation
	NeuralNetwork:	18.85% accuracy for 10-fold cross validation


Accuracies for 매탄동 :
	RandomForest:	30.15% accuracy for 10-fold cross validation
	NeuralNetwork:	19.88% accuracy for 10-fold cross validation


Accuracies for 상동 :
	RandomForest:	20.81% accuracy for 10-fold cross validation
	NeuralNetwork:	13.21% accuracy for 10-fold cross validation


Accuracies for 고잔동 :
	RandomForest:	30.5% accuracy for 10-fold cross validation
	NeuralNetwork:	13.55% accuracy for 10-fold cross validation


Accuracies for 화정동 :
	RandomForest:	19.67% accuracy for 10-fold cross validation
	NeuralNetwork:	12.02% accuracy for 10-fold cross validation'''

with open('results.txt', 'w') as f:
    f.write(inp)