In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error as mse
import copy

# for sqrt 
import math
from feature_selector import FeatureSelector

In [None]:
def predict_test_y(x,y):
    beta = cycliccoorddescent(x,y)[-1, :]
    return np.dot(x,beta)

In [None]:
def rmse(true,predicted):
    return math.sqrt(mse(np.log(true),np.log(predicted)))

In [None]:
#load datasets
test_id = list(np.arange(0, 2930,3))
data = pd.read_csv('Ames_data.csv')
train_id = list( 
    set(np.arange(0, 2930))-set(test_id))

train = data.iloc[train_id,:]
test = data.iloc[test_id,:]

In [None]:
#extract train-test target variable
train_target = pd.DataFrame(train['Sale_Price'])
test_target = pd.DataFrame(test['Sale_Price'])

#drop the tatget variable from train-test
train = train.drop(columns=['Sale_Price',
                           'Street', 
                            'Utilities','Land_Slope', 
                            'Condition_2', 'Roof_Matl', 'Heating'
                            , 'Pool_QC', 'Misc_Feature','Low_Qual_Fin_SF'
                            , 'Three_season_porch','Pool_Area','Misc_Val'
                            , 'Longitude','Latitude'])
test = test.drop(columns=['Sale_Price',
                           'Street', 
                            'Utilities','Land_Slope', 
                            'Condition_2', 'Roof_Matl', 'Heating'
                            , 'Pool_QC', 'Misc_Feature','Low_Qual_Fin_SF'
                            , 'Three_season_porch','Pool_Area','Misc_Val'
                            , 'Longitude','Latitude'])

#dummy coding process
categorical_features = [col for col in train.columns if train[col].dtypes =='object']
train = pd.get_dummies(train,columns = categorical_features)
test = pd.get_dummies(test,columns = categorical_features)

#make sure train-test has same shape and columns
train_features,test_features = train.align(test,join = 'inner',axis=1)


In [None]:
#drop highly correlated variables
train_features_s = train_features.drop(columns = ['Garage_Yr_Blt'])
    
    # Calculate the correlation matrix
corr_matrix = train_features_s.corr()

iters = range(len(corr_matrix.columns) - 1)
drop_cols = []
    # Iterate through the correlation matrix and compare correlations
for i in iters:
    for j in range(i):
        item = corr_matrix.iloc[j:(j+1), (i+1):(i+2)]
        col = item.columns
        row = item.index
        val = abs(item.values)
            
            # If correlation exceeds the threshold
        if val >= 0.8:
                # Print the correlated features and the correlation value
                # print(col.values[0], "|", row.values[0], "|", round(val[0][0], 2))
            drop_cols.append(col.values[0])

# Drop one of each pair of correlated columns
drops = set(drop_cols)
train_features = train_features.drop(columns = drops)

#make sure train-test features has same shape and columns
train_features,test_features = train_features.align(test_features,join = 'inner',axis=1)


In [None]:
fs = FeatureSelector(data = train_features, labels = train_target)

fs.identify_all(selection_params = {'missing_threshold': 0.8, 'correlation_threshold': 0.8, 
                                    'task': 'regression', 'eval_metric': 'l2', 
                                     'cumulative_importance': 0.95})


In [None]:
zero_importance_features = fs.ops['zero_importance']

In [None]:
train_features = train_features.drop(columns=zero_importance_features)
train_features,test_features =train_features.align(test_features,join = 'inner',axis=1)

In [None]:
test_features.shape

In [None]:
# Extract feature names
# for to create zero-importance purpose
# feature_names = list(train_features.columns)

# the end of pandas process

# start numpy preprocessing process 

In [None]:
#impute the data
from sklearn.preprocessing import StandardScaler,Imputer

im = Imputer(strategy = 'median')
im.fit(train_features)
train_features = im.fit_transform(train_features)
test_features = im.fit_transform(test_features)

print(np.where(~np.isfinite(train_features)))
print(np.where(~np.isfinite(test_features)))

#scale the data
scaler = StandardScaler()
# Fit on training set only.
scaler.fit(train_features)
# Apply transform to both the training set and the test set.
train_features = scaler.transform(train_features)
test_features = scaler.transform(test_features)
train_features_pd = pd.DataFrame(train_features)
test_features_pd = pd.DataFrame(test_features)

In [None]:
## Convert y to one-dimensional array (vector)
y = np.array(train_target).reshape((-1, ))
y_test = np.array(test_target).reshape((-1, ))
# normalize TRAIN Y-data and eshape
scaler.fit(train_target)
y = scaler.transform(train_target)
y = np.array(y).reshape((-1, ))
# start building coordinate descent
lambda_optimal= 0.003
# beta_star = beta_star

def soft_threshold(a,lambda_optimal = 0.01567752):
    """
    Solving l1-norm gradient problem
    """
    if a < -lambda_optimal:
        return a+lambda_optimal
    elif a > lambda_optimal:
        return a-lambda_optimal
    else:
        return 0
    
    
def min_beta_multivariate(x, y, beta, j):
    """
    Solving partial minimization problem with respect to beta_j for any j = 1...d.
    """
    n = len(y)
    selector = [i for i in range(x.shape[1]) if i != j]
    norm_x_j = np.linalg.norm(x[:, j])
    a = x[:, j].dot(y[:, np.newaxis] - x[:, selector].dot(beta[:, np.newaxis][selector, :]))
    passin = lambda_optimal*n/2
    res = soft_threshold(a, passin)
    return res/(norm_x_j**2)


def predict_cd(beta,x):
    """
    Compute objective value with certain beta.
    """
    return np.dot(x,beta)

def cycliccoorddescent(x, y, beta_init,max_iter = 10):
    """
    cycliccoorddescent that implements the cyclic coordinate descent algorithm. The cyclic
    coordinate descent algorithm proceeds sequentially. 
    """
    beta = copy.deepcopy(beta_init)
    beta_vals = beta
    d = np.size(x, 1)
    iteration = 0
    while iteration < max_iter:
        for j in range(d):
            min_beta_j = min_beta_multivariate(x, y, beta, j)
            beta[j] = min_beta_j
            beta_vals = np.vstack((beta_vals, beta))
        iteration += 1
        if iteration % 100 == 0:
            print('Coordinate descent iteration', iteration)
    return beta_vals
scaler.fit(train_target)

# Apply transform t
y = scaler.transform(train_target)
y = np.array(y).reshape((-1, ))
y.shape
def predicted(x,y,beta_init):
    beta = cycliccoorddescent(x,y,beta_init)[-1, :]
    return predict_cd(beta,x)
prediction = predicted(train_features,y,beta_init = np.zeros(np.size(train_features, 1)))
rmse(scaler.inverse_transform(prediction),
             np.array(train_target).reshape((-1, )))
print(rmse(scaler.inverse_transform(predict_cd(cycliccoorddescent(train_features,y,beta_init = np.zeros(np.size(train_features, 1)))[-1, :],test_features)),
             np.array(test_target).reshape((-1, ))))

In [None]:
#y_test = scalery.transform(test_target)
#y = np.array(y).reshape((-1, ))
#y_test = np.array(y_test).reshape((-1, ))

# start building coordinate descent 