In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt


In [None]:
df = pd.read_csv('train.csv')

In [None]:
df.head()

In [None]:
df.info()

In [None]:
def check_null_count(df):
    null_sum = df.isnull().sum()
    return len(null_sum[null_sum!=0])

check_null_count(df)

In [None]:
def check_uniform_col(df):
    for col in df.columns:
        if df[col].nunique()==1:
            print(col)

check_uniform_col(df)

In [None]:
df = df.drop(['X_04', 'X_23', 'X_47', 'X_48'], axis=1)
df.columns

In [None]:
def check_multicolinear(df):
    
    df_corr = df.corr()
    rows, cols = np.where(df_corr==1)
    row_names = df_corr.index[rows]
    col_names = df_corr.columns[cols]
    
    for row, col in zip(row_names, col_names):
        if row != col:
            print(f"행: {row}, 열: {col}") 

check_multicolinear(df)

In [None]:
x_feature = df.filter(regex='^X_*') # Input : X Featrue
y_feature = df.filter(regex='^Y_*') # Output : Y Feature

In [None]:
from sklearn.model_selection import train_test_split

x_train_val, x_test, y_train_val, y_test = train_test_split(x_feature, y_feature, test_size=0.2, random_state=0)
x_train, x_val, y_train, y_val =train_test_split(x_train_val, y_train_val, test_size = 0.25, random_state=0)

print(f'x_train: {x_train.shape}, x_val: {x_val.shape}, x_test: {x_test.shape}')
print(f'y_train: {x_train.shape}, y_val: {x_val.shape}, y_test: {y_test.shape}')

In [None]:
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler, Normalizer

scaler = StandardScaler()
scaler.fit(x_train)

x_train_zscale = scaler.transform(x_train)
x_val_zscale = scaler.transform(x_val)
x_test_zscale = scaler.transform(x_test) 

In [None]:
from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error

for alpha in [0, 0.5, 1, 3, 5, 10, 100, 1000]:
    regressor = MultiOutputRegressor(Ridge(alpha=alpha, random_state=0)).fit(x_train_zscale, y_train)
    x_val_pred = regressor.predict(x_val_zscale)
    mse = mean_squared_error(y_val, x_val_pred)
    print(f"{alpha, mse}") 

In [None]:
for solver in ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']:
    regressor = MultiOutputRegressor(Ridge(solver=solver, random_state=0)).fit(x_train_zscale, y_train)
    x_val_pred = regressor.predict(x_val_zscale)
    mse = mean_squared_error(y_val, x_val_pred)
    print(f"{solver, mse}") 