In [None]:
import pandas as pd
import numpy as np
import warnings
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
df = pd.read_excel('/content/b2_scld.xlsx')
df = df.dropna()

In [None]:
def reg(x, y):
  x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=256)

  lr = LinearRegression().fit(x_train, y_train)
  ridge = Ridge(alpha=1.0).fit(x_train, y_train)
  lasso = Lasso().fit(x_train, y_train)
  elastic = ElasticNet(alpha=1.0, l1_ratio=0.5).fit(x_train, y_train)

  lr_preds = lr.predict(x_test)
  ridge_preds = ridge.predict(x_test)
  lasso_preds = lasso.predict(x_test)
  elastic_preds = elastic.predict(x_test)

  lr_mse = mean_squared_error(y_test, lr_preds)
  ridge_mse = mean_squared_error(y_test, ridge_preds)
  lasso_mse = mean_squared_error(y_test, lasso_preds)
  elastic_mse = mean_squared_error(y_test, elastic_preds)

  lr_rmse = np.sqrt(lr_mse)
  ridge_rmse = np.sqrt(ridge_mse)
  lasso_rmse = np.sqrt(lasso_mse)
  elastic_rmse = np.sqrt(elastic_mse)

  print("PCA_clus")
  print("lr")
  print('MSE: {0:.3f} , RMSE: {1:.3F}'.format(lr_mse, lr_rmse))
  print('Variance score: {0:.3f}'.format(r2_score(y_test, lr_preds)))
  print('-'*30)
  print("ridge")
  print('MSE: {0:.3f} , RMSE: {1:.3F}'.format(ridge_mse, ridge_rmse))
  print('Variance score: {0:.3f}'.format(r2_score(y_test, ridge_preds)))
  print('-'*30)
  print("lasso")
  print('MSE: {0:.3f} , RMSE: {1:.3F}'.format(lasso_mse, lasso_rmse))
  print('Variance score: {0:.3f}'.format(r2_score(y_test, lasso_preds)))
  print('-'*30)
  print("elastic")
  print('MSE: {0:.3f} , RMSE: {1:.3F}'.format(elastic_mse, elastic_rmse))
  print('Variance score: {0:.3f}'.format(r2_score(y_test, elastic_preds)), '\n\n')

## Cluster

In [None]:
clus = df[[
'시간단위습도_scld',
'시간단위기온_scld'
]]
clus = clus.dropna()

In [None]:
warnings.filterwarnings('ignore')
k_values = range(1,11)
inertia_values = []

for k in k_values:
    kmeans = KMeans(n_clusters=k, random_state=0)
    cluster_res = kmeans.fit(clus)
    inertia_values.append(kmeans.inertia_)

plt.plot(k_values, inertia_values, marker='o')
plt.xlabel('cluster_num')
plt.ylabel('Inertia')
plt.title('Elbow Method')
plt.show()

In [None]:
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(clus)
label = kmeans.labels_
label = pd.Series(label)
clus['label'] = label.values
clus = pd.concat((df, clus['label']), axis=1)

In [None]:
df_to_split= clus.copy()

df_clus1 = df_to_split[df_to_split['label'] == 0]
df_clus2 = df_to_split[df_to_split['label'] == 1]
df_clus3 = df_to_split[df_to_split['label'] == 2]

print(df_clus1.shape)
print(df_clus2.shape)
print(df_clus3.shape)

In [None]:
y_data_c1 = df_clus1['재산피해금액']
x_data_c1 = df_clus1.drop(['재산피해금액'],axis=1,inplace=False)
reg(x_data_c1, y_data_c1)

In [None]:
y_data_c2 = df_clus2['재산피해금액']
x_data_c2 = df_clus2.drop(['재산피해금액'],axis=1,inplace=False)
reg(x_data_c2, y_data_c2)

In [None]:
y_data_c3 = df_clus3['재산피해금액']
x_data_c3 = df_clus3.drop(['재산피해금액'],axis=1,inplace=False)
reg(x_data_c3, y_data_c3)

## Non_Cluster

In [None]:
df = pd.read_excel('/content/b2_scld.xlsx')
df = df.dropna()

y_data = df['사망인명피해수_scld']
x_data = df.drop(['사망인명피해수_scld'],axis=1,inplace=False)
reg(x_data, y_data)