In [None]:
import pandas as pd
import numpy as np
import geopandas as gpd
import os, re
from glob import glob
from tqdm import tqdm
import time
import matplotlib as mpl
import matplotlib.pyplot as plt

In [None]:
!wget "https://hycms.hanyang.ac.kr/index.php?module=xn_media_content2013&act=dispXn_media_content2013DownloadContent&content_id=66ef786538650" -O "data.zip"
!unzip  -O cp949 "data.zip" -d "data"

In [None]:
sdot_data_total = pd.read_csv('data/sdot학습데이터.csv', sep='|', encoding='cp949')
"""전체 Sdot 평균기온과의 온도차 평균이 높으면 1, 낮으면 0으로 종속변수 생성"""
sdot_data_total['종속'] = sdot_data_total['온도차이'].apply(lambda x: 0 if x < 0 else 1)

sdot_data_total.columns.values

In [None]:

""" 모든 입력변수를 이용한 분석"""
tmp = sdot_data_total

x_column_name = tmp.columns.drop(['종속', '시리얼번호', '온도차이', '온도비율차이'])
y_column_name = "온도차이"
x = np.array(tmp[x_column_name].fillna(0).astype('float').values)
y = np.array(tmp['종속'].values)
y = y.reshape(y.shape[0], 1)

print(x.shape)
print(y.shape)

In [None]:
# prompt:  keras를 이용해 denselayer를 구성, 인풋 110개, 인풋노드 노멀라이징하고, 하든레이어가 5개층 각각 220, 440, 220, 110, 20개의 히든노드로 구성, 아웃풋 1개(0 or 1) 학습하는 코드 생성,

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import BatchNormalization

model = keras.Sequential([
    layers.Dense(220, activation='relu', input_shape=(110,)),
    BatchNormalization(),  # Input layer normalization
    layers.Dense(440, activation='relu'),
    layers.Dense(220, activation='relu'),
    layers.Dense(110, activation='relu'),
    layers.Dense(20, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.summary()


In [None]:

history = model.fit(x, y, epochs=100)

plt.plot(history.history['accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train'], loc='upper left')
plt.show()

# while max(history.history['accuracy']) < 0.9:
#   history = model.fit(x, y, epochs=10)
#   plt.plot(history.history['accuracy'])
#   plt.title('Model accuracy')
#   plt.ylabel('Accuracy')
#   plt.xlabel('Epoch')
#   plt.legend(['Train'], loc='upper left')
#   plt.show()



In [None]:
y_predict = model.predict(x)


In [None]:
# prompt: y_predict 분포 출력

import matplotlib.pyplot as plt

plt.hist(y_predict, bins=20)
plt.xlabel('Predicted Value')
plt.ylabel('Frequency')
plt.title('Distribution of y_predict')
plt.show()


In [None]:
# prompt: y_predict가 0.5 보다 작으면 0으로 0.5보다 크면 1로 변경

y_predict_binary = [0 if val < 0.5 else 1 for val in y_predict]

In [None]:
# prompt: y_predict_binary와 y를 비교해서 true false positive negative 테이블을 표 모양으로 생성하고, precision, recall, accuracy, f1score를 출력

from sklearn.metrics import confusion_matrix, precision_score, recall_score, accuracy_score, f1_score

# Confusion Matrix 계산
cm = confusion_matrix(y, y_predict_binary)

# DataFrame으로 표 형태로 만들기
cm_df = pd.DataFrame(cm,
                     index=['Actual Negative', 'Actual Positive'],
                     columns=['Predicted Negative', 'Predicted Positive'])

print("Confusion Matrix:")
print(cm_df)

# Precision, Recall, Accuracy, F1-Score 계산
precision = precision_score(y, y_predict_binary)
recall = recall_score(y, y_predict_binary)
accuracy = accuracy_score(y, y_predict_binary)
f1 = f1_score(y, y_predict_binary)

print("\nMetrics:")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"Accuracy: {accuracy:.4f}")
print(f"F1-Score: {f1:.4f}")


In [None]:


""" Tree 모형 분석을 위하 주변 도로 면적비율과, 대지면적 비율 만 불러옴 (도=X, 대=y)"""
tmp = sdot_data_total[['도', '대', '종속']]

""" plot으로 고온그룹과 저온그룹을 2차원에 표시"""
x1 = np.array(tmp[tmp['종속'] == 1][['도', '대']].fillna(0).astype('float').values)
y1 = np.array(tmp[tmp['종속'] == 1]['종속'].values)
y1 = y1.reshape(y1.shape[0], 1)

x2 = np.array(tmp[tmp['종속'] == 0][['도', '대']].fillna(0).astype('float').values)
y2 = np.array(tmp[tmp['종속'] == 0]['종속'].values)
y2 = y2.reshape(y2.shape[0], 1)

plt.figure(figsize=(9, 9))
plt.scatter(x=x1[:, 0], y=x1[:, 1], marker='x', color='red', label='High Temperature')
plt.scatter(x=x2[:, 0], y=x2[:, 1], marker='o', color='blue', label='Low Temperature')
plt.legend(fontsize=10)
plt.show()


In [None]:
correct_indices = [i for i, (pred, actual) in enumerate(zip(y_predict_binary, y)) if pred == actual]

""" 맞춘 데이터"""
x1 = np.array(tmp[(tmp['종속'] == 1) & (tmp.index.isin(correct_indices))][['도', '대']].fillna(0).astype('float').values)
y1 = np.array(tmp[(tmp['종속'] == 1) & (tmp.index.isin(correct_indices))]['종속'].values)
y1 = y1.reshape(y1.shape[0], 1)

x2 = np.array(tmp[(tmp['종속'] == 0) & (tmp.index.isin(correct_indices))][['도', '대']].fillna(0).astype('float').values)
y2 = np.array(tmp[(tmp['종속'] == 0) & (tmp.index.isin(correct_indices))]['종속'].values)
y2 = y2.reshape(y2.shape[0], 1)


""" 못 맞춘 데이터 """
x3 = np.array(tmp[(tmp['종속'] == 1) & ~(tmp.index.isin(correct_indices))][['도', '대']].fillna(0).astype('float').values)
y3 = np.array(tmp[(tmp['종속'] == 1) & ~(tmp.index.isin(correct_indices))]['종속'].values)
y3 = y3.reshape(y3.shape[0], 1)

x4 = np.array(tmp[(tmp['종속'] == 0) & ~(tmp.index.isin(correct_indices))][['도', '대']].fillna(0).astype('float').values)
y4 = np.array(tmp[(tmp['종속'] == 0) & ~(tmp.index.isin(correct_indices))]['종속'].values)
y4 = y4.reshape(y4.shape[0], 1)

plt.figure(figsize=(9, 9))
plt.scatter(x=x1[:, 0], y=x1[:, 1], marker='x', color='red', label='High Temperature', alpha=0.3, s=3)
plt.scatter(x=x2[:, 0], y=x2[:, 1], marker='o', color='blue', label='Low Temperature', alpha=0.3, s=3)

plt.scatter(x=x3[:, 0], y=x3[:, 1], marker='x', color='red', label='High Temperature')
plt.scatter(x=x4[:, 0], y=x4[:, 1], marker='o', color='blue', label='Low Temperature')

plt.legend(fontsize=10)
plt.show()