# Dataset Preparation

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import koreanize_matplotlib
from sklearn.preprocessing import StandardScaler
from imblearn.under_sampling import RandomUnderSampler
from sklearn.model_selection import train_test_split
import os
import time
from sklearn.impute import SimpleImputer
import numpy as np

# Path to the directory where the file is saved.
directory_path = './001.Training_data'

# Retrieve the list of files in the directory.
filelist = os.listdir(directory_path)

# Filter the file list to include only CSV files.
csv_files = [file for file in filelist if file.endswith('.csv')]
dataframes = []

for csv_file in csv_files:
    file_path = os.path.join(directory_path, csv_file)
    df = pd.read_csv(file_path)
    df = df.iloc[:,:-9]
    dataframes.append(df)

  from pandas.core import (


In [2]:
merge_df = pd.concat([dataframes[0], dataframes[1], dataframes[2], dataframes[3],dataframes[4]],axis=0)
merge_df['Fire_status'] = merge_df['Fire_status'].map({'Non-accident': 0, 'accident': 1})

In [9]:
def replace_missing(df):
    for col in df.columns:
        if df[col].dtype == np.float64 or df[col].dtype == np.int64:
            df[col].fillna(-9999, inplace=True)
        elif df[col].dtype == object:
            df[col].fillna('-9999', inplace=True)
        elif np.issubdtype(df[col].dtype, np.number) and np.isinf(df[col]).any():
            df[col].replace([np.inf, -np.inf], -9999, inplace=True)

replace_missing(merge_df)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(-9999, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna('-9999', inplace=True)


In [4]:
# Change the values in column 0 to 1 and 0.
merge_df['Fire_status'] = merge_df['Fire_status'].map({1: 'accident', 0: 'Non-accident'})

# Convert the remaining object-type columns to dummy variables.
object_cols = [col for col in merge_df.columns if merge_df[col].dtype == 'object' and col != 'Fire_status']
merge_df = pd.get_dummies(merge_df, columns=object_cols, drop_first=True)

# Extract the numerical variables.
numerical_cols = merge_df.select_dtypes(include=['int64', 'float64']).columns

scaler = StandardScaler()
merge_df[numerical_cols] = scaler.fit_transform(merge_df[numerical_cols])

# under-sampling
rus = RandomUnderSampler(sampling_strategy=1.0, random_state=42)
X_resampled, y_resampled = rus.fit_resample(merge_df.drop('Fire_status', axis=1), merge_df['Fire_status'])
y_resampled = y_resampled.apply(lambda x: 1 if x == 'accident' else 0)

# After under-sampling, split the data into a training set (80%) and a test set (20%).
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# 0. Predict Dataset

In [6]:
df5 = dataframes[-1]
object_cols = [col for col in df5.columns if df5[col].dtype == 'object' and col != 'Fire_status']
df5 = pd.get_dummies(df5, columns=object_cols, drop_first=True)

# Replace missing values using a function call.
replace_missing(df5)


# Extract the numerical variables.
numerical_cols = df5.select_dtypes(include=['int64', 'float64']).columns
scaler = StandardScaler()
df5[numerical_cols] = scaler.fit_transform(df5[numerical_cols])

In [7]:
new_columns = ['B_usg_가설건축물', 'B_usg_생활편익시설', 'L_zon2_유통상업지역', 'L_cond_발전소', 'L_cond_주상복합용', 'L_shape_삼각형', 'L_shape_역삼각형']

for column in new_columns:
    df5[column] = False

In [8]:
df5['Fire_status'] = df5['Fire_status'].apply(lambda x: 0 if x == 'Non-accident' else 1)

In [9]:
# Rearrange the columns of `df5` according to the order of the columns in `df1`.
df5_reordered = df5[merge_df.columns]

In [10]:
df5_X_train = df5_reordered.iloc[:, 1:]
df5_y_result = df5_reordered.iloc[:, 0]

 ## 1. Decision Tree

In [None]:
import time
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
import io  # Import the 'io' module instead of 'sklearn.externals.six'
from sklearn.tree import export_graphviz
import pydotplus

# Record the start time of code execution.
start_time = time.time()

# Decision Tree Model
DT = DecisionTreeClassifier()
DT.fit(X_train, y_train)
min_cp = 0.01 #min(DT.cost_complexity_pruning_path(X_train, y_train).ccp_alphas)
DT_model = DecisionTreeClassifier(ccp_alpha=min_cp)
DT_model.fit(X_train, y_train)


# Decision Tree Evaluation
real_sim_DT = DT_model.predict(X_test)

# Confusion Matrix
DT_cmtx = confusion_matrix(y_test, real_sim_DT)
accuracy = accuracy_score(y_test, real_sim_DT)
precision = precision_score(y_test, real_sim_DT)
recall = recall_score(y_test, real_sim_DT)
f1 = f1_score(y_test, real_sim_DT)

df5_predict = DT_model.predict(df5_X_train)
df5_accuracy = accuracy_score(df5_y_result, df5_predict)
df5_precision = precision_score(df5_y_result, df5_predict)
df5_recall = recall_score(df5_y_result, df5_predict)
df5_f1 = f1_score(df5_y_result, df5_predict)

# Save Results to Excel
with pd.ExcelWriter("./result_summary_DT.xlsx") as writer:
    pd.DataFrame({'Accuracy': [accuracy], 'Precision': [precision], 'Recall': [recall], 'F1 Score': [f1]}, index=['DT']).to_excel(writer, sheet_name='Summary', index_label='Model')
    pd.DataFrame(DT_cmtx, index=['Actual Negative', 'Actual Positive'], columns=['Predicted Negative', 'Predicted Positive']).to_excel(writer, sheet_name='Confusion Matrix')
    pd.DataFrame(DT_model.feature_importances_, index=X_train.columns, columns=['Importance']).to_excel(writer, sheet_name='Feature Importance')
    pd.DataFrame({'Actual': y_test, 'Predicted': real_sim_DT}).to_excel(writer, sheet_name='Test Data Predict')
    
    
df5_results = pd.DataFrame({'Accuracy': [df5_accuracy], 'Precision': [df5_precision], 'Recall': [df5_recall], 'F1 Score': [df5_f1]}, index=['DT'])
df5_results.to_csv("./11.result/score/final_result_DT.csv", index_label='Model')

df5_predictions = pd.DataFrame({'Actual': df5_y_result, 'DT_Predicted': df5_predict})
df5_predictions.to_csv("./11.result/predict/final_result_DT.csv", mode='a', header=True, index=False)

# Record the end time of code execution.
end_time = time.time()

# Calculate the execution time.
execution_time = end_time - start_time

# Print the execution time.
print(f"Record the start time : {execution_time}초")

## 2. Random Forest

In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# Record the start time of code execution.
start_time = time.time()

# Random Forest Model
RF = RandomForestClassifier()
RF.fit(X_train, y_train)

# Random Forest Evaluation
real_sim_RF = RF.predict(X_test)

# Confusion Matrix
RF_cmtx = confusion_matrix(y_test, real_sim_RF)
accuracy = accuracy_score(y_test, real_sim_RF)
precision = precision_score(y_test, real_sim_RF)
recall = recall_score(y_test, real_sim_RF)
f1 = f1_score(y_test, real_sim_RF)

df5_predict = RF.predict(df5_X_train)
df5_accuracy = accuracy_score(df5_y_result, df5_predict)
df5_precision = precision_score(df5_y_result, df5_predict)
df5_recall = recall_score(df5_y_result, df5_predict)
df5_f1 = f1_score(df5_y_result, df5_predict)


# Save Results to Excel
with pd.ExcelWriter("./result_summary_RF.xlsx") as writer:
    pd.DataFrame({'Accuracy': [accuracy], 'Precision': [precision], 'Recall': [recall], 'F1 Score': [f1]}, index=['RF']).to_excel(writer, sheet_name='Summary', index_label='Model')
    pd.DataFrame(RF_cmtx, index=['Actual Negative', 'Actual Positive'], columns=['Predicted Negative', 'Predicted Positive']).to_excel(writer, sheet_name='Confusion Matrix')
    pd.DataFrame(RF.feature_importances_, index=X_train.columns, columns=['Importance']).to_excel(writer, sheet_name='Feature Importance',)
    pd.DataFrame({'Actual': y_test, 'Predicted': real_sim_RF}).to_excel(writer, sheet_name='Test Data Predict')

    
df5_results = pd.DataFrame({'Accuracy': [df5_accuracy], 'Precision': [df5_precision], 'Recall': [df5_recall], 'F1 Score': [df5_f1]}, index=['RF'])
df5_results.to_csv("./11.result/score/final_result_RF.csv", index_label='Model')

df5_predictions = pd.DataFrame({'Actual': df5_y_result, 'RF_Predicted': df5_predict})
df5_predictions.to_csv("./11.result/predict/final_result_RF.csv", mode='a', header=True, index=False)

# Record the end time of code execution.
end_time = time.time()

# Calculate the execution time.
execution_time = end_time - start_time

# Print the execution time.
print(f"Record the start time : {execution_time}초")

## 3. Naive Bayes

In [None]:
import pandas as pd
from sklearn.naive_bayes import BernoulliNB  # Changed from GaussianNB to BernoulliNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# Record the start time of code execution.
start_time = time.time()

# Bayesian Model (Bernoulli Naive Bayes)
BNB = BernoulliNB()  # Changed to BernoulliNB
BNB.fit(X_train, y_train)

# Bayesian Model Evaluation
real_sim_BNB = BNB.predict(X_test)

# Confusion Matrix
BNB_cmtx = confusion_matrix(y_test, real_sim_BNB)
accuracy = accuracy_score(y_test, real_sim_BNB)
precision = precision_score(y_test, real_sim_BNB)
recall = recall_score(y_test, real_sim_BNB)
f1 = f1_score(y_test, real_sim_BNB)



df5_predict = BNB.predict(df5_X_train)
df5_accuracy = accuracy_score(df5_y_result, df5_predict)
df5_precision = precision_score(df5_y_result, df5_predict)
df5_recall = recall_score(df5_y_result, df5_predict)
df5_f1 = f1_score(df5_y_result, df5_predict)


# Save Results to Excel
with pd.ExcelWriter("./result_summary_BNB.xlsx") as writer:
    pd.DataFrame({'Accuracy': [accuracy], 'Precision': [precision], 'Recall': [recall], 'F1 Score': [f1]}, index=['NB']).to_excel(writer, sheet_name='Summary', index_label='Model')
    pd.DataFrame(BNB_cmtx, index=['Actual Negative', 'Actual Positive'], columns=['Predicted Negative', 'Predicted Positive']).to_excel(writer, sheet_name='Confusion Matrix')
    # Feature importance is not applicable for Bernoulli Naive Bayes
    pd.DataFrame({'Actual': y_test, 'Predicted': real_sim_BNB}).to_excel(writer, sheet_name='Test Data Predict')
    
df5_results = pd.DataFrame({'Accuracy': [df5_accuracy], 'Precision': [df5_precision], 'Recall': [df5_recall], 'F1 Score': [df5_f1]}, index=['NB'])
df5_results.to_csv("./11.result/score/final_result_NB.csv", index_label='Model')

df5_predictions = pd.DataFrame({'Actual': df5_y_result, 'NB_Predicted': df5_predict})
df5_predictions.to_csv("./11.result/predict/final_result_NB.csv", mode='a', header=True, index=False)
    
# Record the end time of code execution.
end_time = time.time()

# Calculate the execution time.
execution_time = end_time - start_time

# Print the execution time.
print(f"Record the start time : {execution_time}초")

## 4. KNN

In [None]:
import pandas as pd
import numpy as np
import joblib
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# Record the start time of code execution.
start_time = time.time()

#KNN은 numpy형태로 x_train을 만들어야합니다. 따라서 해당코드실행뒤 다른 모델을 train하기위해선 다시  데이터프레임형태로 만들어야합니다.
#X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)
# 위코드를 실행뒤 다른 코드를 진행하셔야합니다!

X_train, X_test, y_train, y_test = train_test_split(np.ascontiguousarray(X_resampled), y_resampled, test_size=0.2, random_state=42)

# KNN Model
k_value = 5  # You can choose the appropriate k value
KNN = KNeighborsClassifier(n_neighbors=k_value)
KNN.fit(X_train, y_train)

# KNN Model Evaluation
real_sim_KNN = KNN.predict(X_test)

# Confusion Matrix
KNN_cmtx = confusion_matrix(y_test, real_sim_KNN)
accuracy = accuracy_score(y_test, real_sim_KNN)
precision = precision_score(y_test, real_sim_KNN)
recall = recall_score(y_test, real_sim_KNN)
f1 = f1_score(y_test, real_sim_KNN)


df5_predict = KNN.predict((np.ascontiguousarray(df5_X_train)))
df5_accuracy = accuracy_score(df5_y_result, df5_predict)
df5_precision = precision_score(df5_y_result, df5_predict)
df5_recall = recall_score(df5_y_result, df5_predict)
df5_f1 = f1_score(df5_y_result, df5_predict)

# Save Results to Excel
with pd.ExcelWriter("./result_summary_KNN.xlsx") as writer:
    pd.DataFrame({'Accuracy': [accuracy], 'Precision': [precision], 'Recall': [recall], 'F1 Score': [f1]}, index=['KNN']).to_excel(writer, sheet_name='Summary', index_label='Model')
    pd.DataFrame(KNN_cmtx, index=['Actual Negative', 'Actual Positive'], columns=['Predicted Negative', 'Predicted Positive']).to_excel(writer, sheet_name='Confusion Matrix')
    pd.DataFrame({'Actual': y_test, 'Predicted': real_sim_KNN}).to_excel(writer, sheet_name='Test Data Predict')
    
    
df5_results = pd.DataFrame({'Accuracy': [df5_accuracy], 'Precision': [df5_precision], 'Recall': [df5_recall], 'F1 Score': [df5_f1]}, index=['KNN'])
df5_results.to_csv("./11.result/score/final_result_KNN.csv", index_label='Model')

df5_predictions = pd.DataFrame({'Actual': df5_y_result, 'KNN_Predicted': df5_predict})
df5_predictions.to_csv("./11.result/predict/final_result_KNN.csv", mode='a', header=True, index=False)

# Record the end time of code execution.
end_time = time.time()

# Calculate the execution time.
execution_time = end_time - start_time

# Print the execution time.
print(f"Record the start time : {execution_time}초")

## 5. SVM (kernel = radial)

In [None]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# Assume X_resampled and y_resampled are your feature and target columns
#X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Record the start time of code execution.
start_time = time.time()

# RBF SVM Model
rbf_SVM = SVC(kernel='rbf')
rbf_SVM.fit(X_train, y_train)

# RBF SVM Model Evaluation
real_sim_rbf_SVM = rbf_SVM.predict(X_test)

# Confusion Matrix
rbf_SVM_cmtx = confusion_matrix(y_test, real_sim_rbf_SVM)
accuracy = accuracy_score(y_test, real_sim_rbf_SVM)
precision = precision_score(y_test, real_sim_rbf_SVM)
recall = recall_score(y_test, real_sim_rbf_SVM)
f1 = f1_score(y_test, real_sim_rbf_SVM)

print('1차')

df5_predict = rbf_SVM.predict(df5_X_train)
df5_accuracy = accuracy_score(df5_y_result, df5_predict)
df5_precision = precision_score(df5_y_result, df5_predict)
df5_recall = recall_score(df5_y_result, df5_predict)
df5_f1 = f1_score(df5_y_result, df5_predict)

# Save Results to Excel
with pd.ExcelWriter("./result_summary_rbf_SVM.xlsx") as writer:
    pd.DataFrame({'Accuracy': [accuracy], 'Precision': [precision], 'Recall': [recall], 'F1 Score': [f1]}, index=['RBF SVM']).to_excel(writer, sheet_name='Summary', index_label='Model')
    pd.DataFrame(rbf_SVM_cmtx, index=['Actual Negative', 'Actual Positive'], columns=['Predicted Negative', 'Predicted Positive']).to_excel(writer, sheet_name='Confusion Matrix')
    pd.DataFrame({'Actual': y_test, 'Predicted': real_sim_rbf_SVM}).to_excel(writer, sheet_name='Test Data Predict')

df5_results = pd.DataFrame({'Accuracy': [df5_accuracy], 'Precision': [df5_precision], 'Recall': [df5_recall], 'F1 Score': [df5_f1]}, index=['RBF SVM'])
df5_results.to_csv("./11.result/score/final_result_RBF_SVM.csv", index_label='Model')

df5_predictions = pd.DataFrame({'Actual': df5_y_result, 'RBF_SVM_Predicted': df5_predict})
df5_predictions.to_csv("./11.result/predict/final_result_RBF_SVM.csv", mode='a', header=True, index=False)

    
# Record the end time of code execution.
end_time = time.time()

# Calculate the execution time.
execution_time = end_time - start_time

# Print the execution time.
print(f"Record the start time : {execution_time}초")

## 6. SVM (kernel = poly)

In [None]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# Record the start time of code execution.
start_time = time.time()

# Polynomial SVM Model
poly_SVM = SVC(kernel='poly', degree=3)  # You can adjust the degree parameter
poly_SVM.fit(X_train, y_train)

# Polynomial SVM Model Evaluation
real_sim_poly_SVM = poly_SVM.predict(X_test)

# Confusion Matrix
poly_SVM_cmtx = confusion_matrix(y_test, real_sim_poly_SVM)
accuracy = accuracy_score(y_test, real_sim_poly_SVM)
precision = precision_score(y_test, real_sim_poly_SVM)
recall = recall_score(y_test, real_sim_poly_SVM)
f1 = f1_score(y_test, real_sim_poly_SVM)

print('1차')

df5_predict = poly_SVM.predict(df5_X_train)
df5_accuracy = accuracy_score(df5_y_result, df5_predict)
df5_precision = precision_score(df5_y_result, df5_predict)
df5_recall = recall_score(df5_y_result, df5_predict)
df5_f1 = f1_score(df5_y_result, df5_predict)


# Save Results to Excel
with pd.ExcelWriter("./result_summary_poly_SVM.xlsx") as writer:
    pd.DataFrame({'Accuracy': [accuracy], 'Precision': [precision], 'Recall': [recall], 'F1 Score': [f1]}, index=['Polynomial SVM']).to_excel(writer, sheet_name='Summary', index_label='Model')
    pd.DataFrame(poly_SVM_cmtx, index=['Actual Negative', 'Actual Positive'], columns=['Predicted Negative', 'Predicted Positive']).to_excel(writer, sheet_name='Confusion Matrix')
    pd.DataFrame({'Actual': y_test, 'Predicted': real_sim_poly_SVM}).to_excel(writer, sheet_name='Test Data Predict')

    
df5_results = pd.DataFrame({'Accuracy': [df5_accuracy], 'Precision': [df5_precision], 'Recall': [df5_recall], 'F1 Score': [df5_f1]}, index=['Polynomial SVM'])
df5_results.to_csv("./11.result/score/final_result_Polynomial_SVM.csv", index_label='Model')

df5_predictions = pd.DataFrame({'Actual': df5_y_result, 'Polynomial_SVM_Predicted': df5_predict})
df5_predictions.to_csv("./11.result/predict/final_result_Polynomial_SVM.csv", mode='a', header=True, index=False)


# Record the end time of code execution.
end_time = time.time()

# Calculate the execution time.
execution_time = end_time - start_time

# Print the execution time.
print(f"Record the start time : {execution_time}초")

## 7. SVM (kernel = sigmoid)

In [None]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# Record the start time of code execution.
start_time = time.time()

# Sigmoid SVM Model
sigmoid_SVM = SVC(kernel='sigmoid')
sigmoid_SVM.fit(X_train, y_train)

# Sigmoid SVM Model Evaluation
real_sim_sigmoid_SVM = sigmoid_SVM.predict(X_test)

# Confusion Matrix
sigmoid_SVM_cmtx = confusion_matrix(y_test, real_sim_sigmoid_SVM)
accuracy = accuracy_score(y_test, real_sim_sigmoid_SVM)
precision = precision_score(y_test, real_sim_sigmoid_SVM)
recall = recall_score(y_test, real_sim_sigmoid_SVM)
f1 = f1_score(y_test, real_sim_sigmoid_SVM)

print('1차')

df5_predict = sigmoid_SVM.predict(df5_X_train)
df5_accuracy = accuracy_score(df5_y_result, df5_predict)
df5_precision = precision_score(df5_y_result, df5_predict)
df5_recall = recall_score(df5_y_result, df5_predict)
df5_f1 = f1_score(df5_y_result, df5_predict)


# Save Results to Excel
with pd.ExcelWriter("./result_summary_sigmoid_SVM.xlsx") as writer:
    pd.DataFrame({'Accuracy': [accuracy], 'Precision': [precision], 'Recall': [recall], 'F1 Score': [f1]}, index=['Sigmoid SVM']).to_excel(writer, sheet_name='Summary', index_label='Model')
    pd.DataFrame(sigmoid_SVM_cmtx, index=['Actual Negative', 'Actual Positive'], columns=['Predicted Negative', 'Predicted Positive']).to_excel(writer, sheet_name='Confusion Matrix')
    pd.DataFrame({'Actual': y_test, 'Predicted': real_sim_sigmoid_SVM}).to_excel(writer, sheet_name='Test Data Predict')

df5_results = pd.DataFrame({'Accuracy': [df5_accuracy], 'Precision': [df5_precision], 'Recall': [df5_recall], 'F1 Score': [df5_f1]}, index=['Sigmoid SVM'])
df5_results.to_csv("./11.result/score/final_result_Sigmoid_SVM.csv", index_label='Model')

df5_predictions = pd.DataFrame({'Actual': df5_y_result, 'Sigmoid_SVM_Predicted': df5_predict})
df5_predictions.to_csv("./11.result/predict/final_result_Sigmoid_SVM.csv", mode='a', header=True, index=False)



# Record the end time of code execution.
end_time = time.time()

# Calculate the execution time.
execution_time = end_time - start_time

# Print the execution time.
print(f"Record the start time : {execution_time}초")


## 8. SVM (kernel = linear)

In [None]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# Record the start time of code execution.
start_time = time.time()

# Linear SVM Model
linear_SVM = SVC(kernel='linear')
linear_SVM.fit(X_train, y_train)

# Linear SVM Model Evaluation
real_sim_linear_SVM = linear_SVM.predict(X_test)

# Confusion Matrix
linear_SVM_cmtx = confusion_matrix(y_test, real_sim_linear_SVM)
accuracy = accuracy_score(y_test, real_sim_linear_SVM)
precision = precision_score(y_test, real_sim_linear_SVM)
recall = recall_score(y_test, real_sim_linear_SVM)
f1 = f1_score(y_test, real_sim_linear_SVM)


df5_predict = linear_SVM.predict(df5_X_train)
df5_accuracy = accuracy_score(df5_y_result, df5_predict)
df5_precision = precision_score(df5_y_result, df5_predict)
df5_recall = recall_score(df5_y_result, df5_predict)
df5_f1 = f1_score(df5_y_result, df5_predict)

# Save Results to Excel
with pd.ExcelWriter("./result_summary_linear_SVM.xlsx") as writer:
    pd.DataFrame({'Accuracy': [accuracy], 'Precision': [precision], 'Recall': [recall], 'F1 Score': [f1]}, index=['Linear SVM']).to_excel(writer, sheet_name='Summary', index_label='Model')
    pd.DataFrame(linear_SVM_cmtx, index=['Actual Negative', 'Actual Positive'], columns=['Predicted Negative', 'Predicted Positive']).to_excel(writer, sheet_name='Confusion Matrix')
    pd.DataFrame({'Actual': y_test, 'Predicted': real_sim_linear_SVM}).to_excel(writer, sheet_name='Test Data Predict')

df5_results = pd.DataFrame({'Accuracy': [df5_accuracy], 'Precision': [df5_precision], 'Recall': [df5_recall], 'F1 Score': [df5_f1]}, index=['Linear SVM'])
df5_results.to_csv("./11.result/score/final_result_Linear_SVM.csv", index_label='Model')

df5_predictions = pd.DataFrame({'Actual': df5_y_result, 'Linear_SVM_Predicted': df5_predict})
df5_predictions.to_csv("./11.result/predict/final_result_Linear_SVM.csv", mode='a', header=True, index=False)


# Record the end time of code execution.
end_time = time.time()

# Calculate the execution time.
execution_time = end_time - start_time

# Print the execution time.
print(f"Record the start time : {execution_time}초")

## 9. BA(BaggingClassifier)

In [None]:
import pandas as pd
import time
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# Record the start time of code execution.
start_time = time.time()

# Bagging Classifier (Random Forest)
bagging_model = BaggingClassifier(base_estimator=DecisionTreeClassifier(), n_estimators=10, random_state=42)
bagging_model.fit(X_train, y_train)

# Bagging Model Evaluation
real_sim_bagging = bagging_model.predict(X_test)

# Confusion Matrix
bagging_cmtx = confusion_matrix(y_test, real_sim_bagging)
accuracy = accuracy_score(y_test, real_sim_bagging)
precision = precision_score(y_test, real_sim_bagging)
recall = recall_score(y_test, real_sim_bagging)
f1 = f1_score(y_test, real_sim_bagging)

df5_predict = bagging_model.predict(df5_X_train)
df5_accuracy = accuracy_score(df5_y_result, df5_predict)
df5_precision = precision_score(df5_y_result, df5_predict)
df5_recall = recall_score(df5_y_result, df5_predict)
df5_f1 = f1_score(df5_y_result, df5_predict)


# Save Results to Excel
with pd.ExcelWriter("./result_summary_bagging.xlsx") as writer:
    pd.DataFrame({'Accuracy': [accuracy], 'Precision': [precision], 'Recall': [recall], 'F1 Score': [f1]}, index=['BA']).to_excel(writer, sheet_name='Summary', index_label='Model')
    pd.DataFrame(bagging_cmtx, index=['Actual Negative', 'Actual Positive'], columns=['Predicted Negative', 'Predicted Positive']).to_excel(writer, sheet_name='Confusion Matrix')
    pd.DataFrame({'Actual': y_test, 'Predicted': real_sim_bagging}).to_excel(writer, sheet_name='Test Data Predict')

df5_results = pd.DataFrame({'Accuracy': [df5_accuracy], 'Precision': [df5_precision], 'Recall': [df5_recall], 'F1 Score': [df5_f1]}, index=['BA'])
df5_results.to_csv("./11.result/score/final_result_BA.csv", index_label='Model')

df5_predictions = pd.DataFrame({'Actual': df5_y_result, 'BA_Predicted': df5_predict})
df5_predictions.to_csv("./11.result/predict/final_result_BA.csv", mode='a', header=True, index=False)


# Record the end time of code execution.
end_time = time.time()

# Calculate the execution time.
execution_time = end_time - start_time

# Print the execution time.
print(f"Record the start time : {execution_time}초")

## 10. Adaboost

In [None]:
import pandas as pd
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# Record the start time of code execution.
start_time = time.time()

# AdaBoost Classifier
base_estimator = DecisionTreeClassifier()  # You can customize the base estimator
adaboost_model = AdaBoostClassifier(base_estimator=base_estimator, n_estimators=50, random_state=42)
adaboost_model.fit(X_train, y_train)

# AdaBoost Model Evaluation
real_sim_adaboost = adaboost_model.predict(X_test)

# Calculate metrics
accuracy = accuracy_score(y_test, real_sim_adaboost)
precision = precision_score(y_test, real_sim_adaboost)
recall = recall_score(y_test, real_sim_adaboost)
f1 = f1_score(y_test, real_sim_adaboost)

# Access feature importances of the first base estimator after fitting
first_base_estimator = adaboost_model.estimators_[0]  
feature_importance = first_base_estimator.feature_importances_
feature_names = df5_X_train.columns

df5_predict = adaboost_model.predict(df5_X_train)
df5_accuracy = accuracy_score(df5_y_result, df5_predict)
df5_precision = precision_score(df5_y_result, df5_predict)
df5_recall = recall_score(df5_y_result, df5_predict)
df5_f1 = f1_score(df5_y_result, df5_predict)


# Save Results to Excel
with pd.ExcelWriter("./result_summary_adaboost.xlsx") as writer:
    pd.DataFrame({'Accuracy': [accuracy], 'Precision': [precision], 'Recall': [recall], 'F1 Score': [f1]}, index=['AdaBoost']).to_excel(writer, sheet_name='Summary', index_label='Model')
    pd.DataFrame({'Feature': feature_names, 'Importance': feature_importance}).to_excel(writer, sheet_name='Feature Importance', index=False)
    pd.DataFrame(confusion_matrix(y_test, real_sim_adaboost), index=['Actual Negative', 'Actual Positive'], columns=['Predicted Negative', 'Predicted Positive']).to_excel(writer, sheet_name='Confusion Matrix')
    pd.DataFrame({'Actual': y_test, 'Predicted': real_sim_adaboost}).to_excel(writer, sheet_name='Test Data Predict')

    
df5_results = pd.DataFrame({'Accuracy': [df5_accuracy], 'Precision': [df5_precision], 'Recall': [df5_recall], 'F1 Score': [df5_f1]}, index=['AdaBoost'])
df5_results.to_csv("./11.result/score/final_result_ADA.csv", index_label='Model')

df5_predictions = pd.DataFrame({'Actual': df5_y_result, 'ADA_Predicted': df5_predict})
df5_predictions.to_csv("./11.result/predict/final_result_ADA.csv", mode='a', header=True, index=False)

# Record the end time of code execution.
end_time = time.time()

# Calculate the execution time.
execution_time = end_time - start_time

# Print the execution time.
print(f"Record the start time : {execution_time}초")

## 11. Catboost

In [None]:
import pandas as pd
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score


# Record the start time of code execution.
start_time = time.time()

# CatBoost Classifier
catboost_model = CatBoostClassifier(iterations=100, random_state=42)
catboost_model.fit(X_train, y_train, cat_features=[])

# CatBoost Model Evaluation
real_sim_catboost = catboost_model.predict(X_test)

# Confusion Matrix
catboost_cmtx = confusion_matrix(y_test, real_sim_catboost)
accuracy = accuracy_score(y_test, real_sim_catboost)
precision = precision_score(y_test, real_sim_catboost)
recall = recall_score(y_test, real_sim_catboost)
f1 = f1_score(y_test, real_sim_catboost)


df5_predict = catboost_model.predict(df5_X_train)
df5_accuracy = accuracy_score(df5_y_result, df5_predict)
df5_precision = precision_score(df5_y_result, df5_predict)
df5_recall = recall_score(df5_y_result, df5_predict)
df5_f1 = f1_score(df5_y_result, df5_predict)


# Save Results to Excel
with pd.ExcelWriter("./result_summary_catboost.xlsx") as writer:
    pd.DataFrame({'Accuracy': [accuracy], 'Precision': [precision], 'Recall': [recall], 'F1 Score': [f1]}, index=['CatBoost']).to_excel(writer, sheet_name='Summary', index_label='Model')
    pd.DataFrame(catboost_cmtx, index=['Actual Negative', 'Actual Positive'], columns=['Predicted Negative', 'Predicted Positive']).to_excel(writer, sheet_name='Confusion Matrix')
  
    # Save Feature Importance
    feature_importance_df = pd.DataFrame({'Feature': df5_X_train.columns, 'Importance': catboost_model.feature_importances_})
    feature_importance_df.to_excel(writer, sheet_name='Feature Importance', index=False)
    pd.DataFrame({'Actual': y_test, 'Predicted': real_sim_catboost}).to_excel(writer, sheet_name='Test Data Predict')
    
    
    
df5_results = pd.DataFrame({'Accuracy': [df5_accuracy], 'Precision': [df5_precision], 'Recall': [df5_recall], 'F1 Score': [df5_f1]}, index=['CatBoost'])
df5_results.to_csv("./11.result/score/final_result_CAT.csv", index_label='Model')

df5_predictions = pd.DataFrame({'Actual': df5_y_result, 'CAT_Predicted': df5_predict})
df5_predictions.to_csv("./11.result/predict/final_result_CAT.csv", mode='a', header=True, index=False)


# Record the end time of code execution.
end_time = time.time()

# Calculate the execution time.
execution_time = end_time - start_time

# Print the execution time.
print(f"Record the start time : {execution_time}초")

## 12. Xgboost

In [None]:
import pandas as pd
import time
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# Record the start time of code execution.
start_time = time.time()

# XGBoost Classifier
xgboost_model = XGBClassifier()
xgboost_model.fit(X_train, y_train)

# XGBoost Model Evaluation
real_sim_xgboost = xgboost_model.predict(X_test)

# Confusion Matrix
xgboost_cmtx = confusion_matrix(y_test, real_sim_xgboost)
accuracy = accuracy_score(y_test, real_sim_xgboost)
precision = precision_score(y_test, real_sim_xgboost)
recall = recall_score(y_test, real_sim_xgboost)
f1 = f1_score(y_test, real_sim_xgboost)



df5_predict = xgboost_model.predict(df5_X_train)
df5_accuracy = accuracy_score(df5_y_result, df5_predict)
df5_precision = precision_score(df5_y_result, df5_predict)
df5_recall = recall_score(df5_y_result, df5_predict)
df5_f1 = f1_score(df5_y_result, df5_predict)


# Save Results to Excel
with pd.ExcelWriter("./result_summary_xgboost.xlsx") as writer:
    # Save Summary
    summary_df = pd.DataFrame({'Accuracy': [accuracy], 'Precision': [precision], 'Recall': [recall], 'F1 Score': [f1]}, index=['XGBoost'])
    summary_df.to_excel(writer, sheet_name='Summary', index_label='Model')

    # Save Confusion Matrix
    pd.DataFrame(xgboost_cmtx, index=['Actual Negative', 'Actual Positive'], columns=['Predicted Negative', 'Predicted Positive']).to_excel(writer, sheet_name='Confusion Matrix')

    # Save Feature Importance
    feature_importance_df = pd.DataFrame({'Feature': df5_X_train.columns, 'Importance': xgboost_model.feature_importances_})
    feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)
    feature_importance_df.to_excel(writer, sheet_name='Feature Importance', index=False)

    # Save Test Data Predictions
    test_data_df = pd.DataFrame({'Actual': y_test, 'Predicted': real_sim_xgboost})
    test_data_df.to_excel(writer, sheet_name='Test Data Predict', index=False)

        
df5_results = pd.DataFrame({'Accuracy': [df5_accuracy], 'Precision': [df5_precision], 'Recall': [df5_recall], 'F1 Score': [df5_f1]}, index=['XGBoost'])
df5_results.to_csv("./11.result/score/final_result_XG.csv", index_label='Model')

df5_predictions = pd.DataFrame({'Actual': df5_y_result, 'XG_Predicted': df5_predict})
df5_predictions.to_csv("./11.result/predict/final_result_XG.csv", mode='a', header=True, index=False)



# Record the end time of code execution.
end_time = time.time()

# Calculate the execution time.
execution_time = end_time - start_time

# Print the execution time.
print(f"Record the start time : {execution_time}초")

## 13. GBM

In [None]:
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score


# Record the start time of code execution.
start_time = time.time()
# GBM Classifier
gbm_model = GradientBoostingClassifier()
gbm_model.fit(X_train, y_train)

# GBM Model Evaluation
real_sim_gbm = gbm_model.predict(X_test)

# Confusion Matrix
gbm_cmtx = confusion_matrix(y_test, real_sim_gbm)
accuracy = accuracy_score(y_test, real_sim_gbm)
precision = precision_score(y_test, real_sim_gbm)
recall = recall_score(y_test, real_sim_gbm)
f1 = f1_score(y_test, real_sim_gbm)


df5_predict = gbm_model.predict(df5_X_train)
df5_accuracy = accuracy_score(df5_y_result, df5_predict)
df5_precision = precision_score(df5_y_result, df5_predict)
df5_recall = recall_score(df5_y_result, df5_predict)
df5_f1 = f1_score(df5_y_result, df5_predict)


# Save Results to Excel
with pd.ExcelWriter("./result_summary_gbm.xlsx") as writer:
    # Save Summary
    summary_df = pd.DataFrame({'Accuracy': [accuracy], 'Precision': [precision], 'Recall': [recall], 'F1 Score': [f1]}, index=['GBM'])
    summary_df.to_excel(writer, sheet_name='Summary', index_label='Model')

    # Save Confusion Matrix
    pd.DataFrame(gbm_cmtx, index=['Actual Negative', 'Actual Positive'], columns=['Predicted Negative', 'Predicted Positive']).to_excel(writer, sheet_name='Confusion Matrix')

    # Save Feature Importance
    feature_importance_df = pd.DataFrame({'Feature': df5_X_train.columns, 'Importance': gbm_model.feature_importances_})
    feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)
    feature_importance_df.to_excel(writer, sheet_name='Feature Importance', index=False)

    # Save Test Data Predictions
    test_data_df = pd.DataFrame({'Actual': y_test, 'Predicted': real_sim_gbm})
    test_data_df.to_excel(writer, sheet_name='Test Data Predict', index=False)
    
df5_results = pd.DataFrame({'Accuracy': [df5_accuracy], 'Precision': [df5_precision], 'Recall': [df5_recall], 'F1 Score': [df5_f1]}, index=['GBM'])
df5_results.to_csv("./11.result/score/final_result_GBM.csv", index_label='Model')

df5_predictions = pd.DataFrame({'Actual': df5_y_result, 'GBM_Predicted': df5_predict})
df5_predictions.to_csv("./11.result/predict/final_result_GBM.csv", mode='a', header=True, index=False)


    # Record the end time of code execution.
end_time = time.time()

# Calculate the execution time.
execution_time = end_time - start_time

# Print the execution time.
print(f"Record the start time : {execution_time}초")

## 14. ANN

In [None]:
import pandas as pd
import numpy as np
import time
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense


# Record the start time of code execution.
start_time = time.time()

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
df5_X_train_scaled = scaler.transform(df5_X_train)  # 추가: df5_X_train도 스케일링

# DNN Model
model = Sequential()
model.add(Dense(64, activation='relu', input_dim=X_train.shape[1]))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, validation_split=0.2, verbose=1)

# DNN Model Evaluation
real_sim_dnn = (model.predict(X_test_scaled) > 0.5).astype(int).flatten()

# Confusion Matrix
dnn_cmtx = confusion_matrix(y_test, real_sim_dnn)
accuracy = accuracy_score(y_test, real_sim_dnn)
precision = precision_score(y_test, real_sim_dnn)
recall = recall_score(y_test, real_sim_dnn)
f1 = f1_score(y_test, real_sim_dnn)

# df5 데이터 예측 및 평가
df5_predict_prob = model.predict(df5_X_train_scaled)
df5_predict = (df5_predict_prob > 0.5).astype(int).flatten()

df5_accuracy = accuracy_score(df5_y_result, df5_predict)
df5_precision = precision_score(df5_y_result, df5_predict)
df5_recall = recall_score(df5_y_result, df5_predict)
df5_f1 = f1_score(df5_y_result, df5_predict)

# Save Results to Excel
with pd.ExcelWriter("./result_summary_ann.xlsx") as writer:
    # Save Summary
    summary_df = pd.DataFrame({'Accuracy': [accuracy], 'Precision': [precision], 'Recall': [recall], 'F1 Score': [f1]}, index=['ANN'])
    summary_df.to_excel(writer, sheet_name='Summary', index_label='Model')

    # Save Confusion Matrix
    pd.DataFrame(dnn_cmtx, index=['Actual Negative', 'Actual Positive'], columns=['Predicted Negative', 'Predicted Positive']).to_excel(writer, sheet_name='Confusion Matrix')

    # ... (other sheets if needed)
    
df5_results = pd.DataFrame({'Accuracy': [df5_accuracy], 'Precision': [df5_precision], 'Recall': [df5_recall], 'F1 Score': [df5_f1]}, index=['ANN'])
df5_results.to_csv("./11.result/score/final_result_ANN.csv", index_label='Model')

df5_predictions = pd.DataFrame({'Actual': df5_y_result, 'ANN_Predicted': df5_predict})
df5_predictions.to_csv("./11.result/predict/final_result_ANN.csv", mode='a', header=True, index=False)

    

# Record the end time of code execution.
end_time = time.time()

# Calculate the execution time.
execution_time = end_time - start_time

# Print the execution time.
print(f"Record the start time : {execution_time}초")

## 14-1. DNN

In [None]:
import pandas as pd
import numpy as np
import time
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense


# Record the start time of code execution.
start_time = time.time()

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
df5_X_train_scaled = scaler.transform(df5_X_train)  # 추가: df5_X_train도 스케일링

# DNN Model (Deeper Neural Network)
model = Sequential()
model.add(Dense(64, activation='relu', input_dim=X_train.shape[1]))
model.add(Dense(128, activation='relu'))  # Additional hidden layer
model.add(Dense(64, activation='relu'))   # Additional hidden layer
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, validation_split=0.2, verbose=1)

# DNN Model Evaluation
real_sim_dnn = (model.predict(X_test_scaled) > 0.5).astype(int).flatten()

# Confusion Matrix
dnn_cmtx = confusion_matrix(y_test, real_sim_dnn)
accuracy = accuracy_score(y_test, real_sim_dnn)
precision = precision_score(y_test, real_sim_dnn)
recall = recall_score(y_test, real_sim_dnn)
f1 = f1_score(y_test, real_sim_dnn)

# df5 데이터 예측 및 평가
df5_predict_prob = model.predict(df5_X_train_scaled)
df5_predict = (df5_predict_prob > 0.5).astype(int).flatten()

df5_accuracy = accuracy_score(df5_y_result, df5_predict)
df5_precision = precision_score(df5_y_result, df5_predict)
df5_recall = recall_score(df5_y_result, df5_predict)
df5_f1 = f1_score(df5_y_result, df5_predict)


# Save Results to Excel
with pd.ExcelWriter("./result_summary_dnn.xlsx") as writer:
    # Save Summary
    summary_df = pd.DataFrame({'Accuracy': [accuracy], 'Precision': [precision], 'Recall': [recall], 'F1 Score': [f1]}, index=['DNN'])
    summary_df.to_excel(writer, sheet_name='Summary', index_label='Model')

    # Save Confusion Matrix
    pd.DataFrame(dnn_cmtx, index=['Actual Negative', 'Actual Positive'], columns=['Predicted Negative', 'Predicted Positive']).to_excel(writer, sheet_name='Confusion Matrix')

    # ... (other sheets if needed)
    
df5_results = pd.DataFrame({'Accuracy': [df5_accuracy], 'Precision': [df5_precision], 'Recall': [df5_recall], 'F1 Score': [df5_f1]}, index=['DNN'])
df5_results.to_csv("./11.result/score/final_result_DNN.csv", index_label='Model')

df5_predictions = pd.DataFrame({'Actual': df5_y_result, 'DNN_Predicted': df5_predict})
df5_predictions.to_csv("./11.result/predict/final_result_DNN.csv", mode='a', header=True, index=False)

    

# Record the end time of code execution.
end_time = time.time()

# Calculate the execution time.
execution_time = end_time - start_time

# Print the execution time.
print(f"Record the start time : {execution_time}초")

## 15. LSTM

In [None]:
import pandas as pd
import numpy as np
import time
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense


# Record the start time of code execution.
start_time = time.time()

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
df5_X_train_scaled = scaler.transform(df5_X_train)  # 추가: df5_X_train도 스케일링

# Reshape data for LSTM (assuming X_train and X_test are 2D arrays)
X_train_reshaped = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
X_test_reshaped = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))
df5_X_train_reshaped = df5_X_train_scaled.reshape((df5_X_train_scaled.shape[0], 1, df5_X_train_scaled.shape[1]))

# LSTM Model
model = Sequential()
model.add(LSTM(50, input_shape=(X_train_reshaped.shape[1], X_train_reshaped.shape[2])))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train_reshaped, y_train, epochs=7, batch_size=64, validation_split=0.2, verbose=1)

# LSTM Model Evaluation
real_sim_lstm = (model.predict(X_test_reshaped) > 0.5).astype(int).flatten()

# Confusion Matrix
lstm_cmtx = confusion_matrix(y_test, real_sim_lstm)
accuracy = accuracy_score(y_test, real_sim_lstm)
precision = precision_score(y_test, real_sim_lstm)
recall = recall_score(y_test, real_sim_lstm)
f1 = f1_score(y_test, real_sim_lstm)

# df5 데이터 예측 및 평가
df5_predict_prob = model.predict(df5_X_train_reshaped)
df5_predict = (df5_predict_prob > 0.5).astype(int).flatten()

df5_accuracy = accuracy_score(df5_y_result, df5_predict)
df5_precision = precision_score(df5_y_result, df5_predict)
df5_recall = recall_score(df5_y_result, df5_predict)
df5_f1 = f1_score(df5_y_result, df5_predict)


# Save Results to Excel
with pd.ExcelWriter("./result_summary_lstm.xlsx") as writer:
    # Save Summary
    summary_df = pd.DataFrame({'Accuracy': [accuracy], 'Precision': [precision], 'Recall': [recall], 'F1 Score': [f1]}, index=['LSTM'])
    summary_df.to_excel(writer, sheet_name='Summary', index_label='Model')

    # Save Confusion Matrix
    pd.DataFrame(lstm_cmtx, index=['Actual Negative', 'Actual Positive'], columns=['Predicted Negative', 'Predicted Positive']).to_excel(writer, sheet_name='Confusion Matrix')

    
df5_results = pd.DataFrame({'Accuracy': [df5_accuracy], 'Precision': [df5_precision], 'Recall': [df5_recall], 'F1 Score': [df5_f1]}, index=['LSTM'])
df5_results.to_csv("./11.result/score/final_result_LSTM.csv", index_label='Model')

df5_predictions = pd.DataFrame({'Actual': df5_y_result, 'LSTM_Predicted': df5_predict})
df5_predictions.to_csv("./11.result/predict/final_result_LSTM.csv", mode='a', header=True, index=False)

# Record the end time of code execution.
end_time = time.time()

# Calculate the execution time.
execution_time = end_time - start_time

# Print the execution time.
print(f"Record the start time : {execution_time}초")

## 16. LGBM

In [None]:
import time
import pandas as pd
from lightgbm import LGBMClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

# Record the start time of code execution.
start_time = time.time()

# LightGBM Classifier
lgbm_model = LGBMClassifier()
lgbm_model.fit(X_train, y_train)

# LightGBM Model Evaluation
real_sim_lgbm = lgbm_model.predict(X_test)

# Confusion Matrix
lgbm_cmtx = confusion_matrix(y_test, real_sim_lgbm)
accuracy = accuracy_score(y_test, real_sim_lgbm)
precision = precision_score(y_test, real_sim_lgbm)
recall = recall_score(y_test, real_sim_lgbm)
f1 = f1_score(y_test, real_sim_lgbm)

df5_predict = lgbm_model.predict(df5_X_train)
df5_accuracy = accuracy_score(df5_y_result, df5_predict)
df5_precision = precision_score(df5_y_result, df5_predict)
df5_recall = recall_score(df5_y_result, df5_predict)
df5_f1 = f1_score(df5_y_result, df5_predict)

# Save Results to Excel
with pd.ExcelWriter("result_summary_lgbm.xlsx") as writer:
    # Save Summary
    summary_df = pd.DataFrame({'Accuracy': [accuracy], 'Precision': [precision], 'Recall': [recall], 'F1 Score': [f1]}, index=['LightGBM'])
    summary_df.to_excel(writer, sheet_name='Summary', index_label='Model')

    # Save Confusion Matrix
    pd.DataFrame(lgbm_cmtx, index=['Actual Negative', 'Actual Positive'], columns=['Predicted Negative', 'Predicted Positive']).to_excel(writer, sheet_name='Confusion Matrix')

    # Save Feature Importance
    feature_importance_df = pd.DataFrame({'Feature': df5_X_train.columns, 'Importance': lgbm_model.feature_importances_})
    feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)
    feature_importance_df.to_excel(writer, sheet_name='Feature Importance', index=False)

    # Save Test Data Predictions
    test_data_df = pd.DataFrame({'Actual': y_test, 'Predicted': real_sim_lgbm})
    test_data_df.to_excel(writer, sheet_name='Test Data Predict', index=False)

df5_results = pd.DataFrame({'Accuracy': [df5_accuracy], 'Precision': [df5_precision], 'Recall': [df5_recall], 'F1 Score': [df5_f1]}, index=['LightGBM'])
df5_results.to_csv("./11.result/score/score_result_LGBM.csv", index_label='Model')

df5_predictions = pd.DataFrame({'Actual': df5_y_result, 'LGBM_Predicted': df5_predict})
df5_predictions.to_csv("./11.result/predict/final_result_LGBM.csv", mode='a', header=True, index=False)

# Record the end time of code execution.
end_time = time.time()

# Calculate the execution time.
execution_time = end_time - start_time

# Print the execution time.
print(f"Record the start time : {execution_time}초")
