In [88]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import datasets

In [89]:
data = np.genfromtxt('train.csv', delimiter=',')
x_test = np.genfromtxt('test.csv', delimiter=',')

In [90]:
x_train = data[:, :-1]
y_train = data[:, -1]
x_train.shape, y_train.shape

((7176, 4), (7176,))

In [91]:
def addFeatures3Degree(x):
    df_train = pd.DataFrame(x)
    i = 0
    n = len(df_train.columns)
    l = df_train.columns
    while i < n:
        j = i
        while j < n:
            df_train[str(l[i]) + '*' + str(l[j])] = df_train[l[i]]*df_train[l[j]]
            k = j
            while k < n:
                df_train[str(l[i]) + '*' + str(l[j]) + '*' + str(l[k])] = df_train[l[i]]*df_train[l[j]]*df_train[l[k]]
                p = j
                while p < n:
                    df_train[str(l[i]) + '*' + str(l[j]) + '*' + str(l[k]) + '*' + str(l[p])] = df_train[l[i]]*df_train[l[j]]*df_train[l[k]]*df_train[l[p]]
                    p += 1
                k += 1
            j += 1
        i += 1
    return np.array(df_train)

In [92]:
x_train = addFeatures3Degree(x_train)
x_test = addFeatures3Degree(x_test)

In [93]:
x_train.shape, x_test.shape

((7176, 84), (2392, 84))

In [94]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [95]:
x_train = np.insert(x_train, 0, 1, axis=1)
x_test = np.insert(x_test, 0, 1, axis=1)

In [96]:
def cost(x, y, coef):
    return ((y - x.dot(coef))**2).mean()

In [97]:
def score(x_test, y_test, m):
        y_pred = predict(x_test, m)
        num = np.sum((y_pred - y_test) ** 2, axis=0)
        den = np.sum((y_test - y_test.mean()) ** 2, axis=0)
        return 1 - num/den

In [98]:
def predict(x_test, m):
    return x_test.dot(m)

In [109]:
def step_gradient(x, y, learning_rate, m):
    k = x.shape[0]
    l = x.shape[1]
    slope_m = np.zeros(l)
    for i in range(k):
        for j in range(l):
            slope_m[j] += (2/k)*x[i][j]*((x[i].dot(m)) - y[i])
        if i%20 == 0 or i == k-1:
            m = m - learning_rate*slope_m
            slope_m = np.zeros(l)
    return m

In [110]:
def gradient_descent(x, y, learning_rate=0.001, num_iter=1000):
    coef = np.zeros((x.shape[1],))
    
    print("start : ", cost(x, y, coef))
    
    for i in range(num_iter):
        coef = step_gradient(x, y, learning_rate, coef)
        if i % (num_iter//10) == 0:
            print(i, " : ", cost(x, y, coef))
    return coef

In [111]:
def run(x, y):
    learning_rate = 0.1
    num_iter = 100
    return gradient_descent(x, y, learning_rate, num_iter)

In [112]:
m = run(x_train, y_train)

start :  206801.35285512818
0  :  138435.06416141335
10  :  2547.6335252494164
20  :  65.55825204682368
30  :  19.874377963488524
40  :  18.9227892349825
50  :  18.856335245470007
60  :  18.82668056841477
70  :  18.805100182037062
80  :  18.78715720840602
90  :  18.771480941215348


In [117]:
m

array([ 4.54428596e+02, -1.95811087e+00, -7.00965985e-01,  2.61768309e-01,
        5.24688645e-01, -1.01521018e+00, -1.27694404e-01,  6.34371342e-01,
        7.40106737e-01, -9.14626008e-02,  7.31711975e-01, -6.36682756e-02,
        7.40106737e-01,  6.65374964e-01, -3.01519552e-02,  6.36547757e-01,
       -9.76360333e-01, -9.14626008e-02, -3.01519552e-02, -9.37657439e-01,
       -1.92705368e-01, -2.27108126e-01,  7.31711975e-01,  6.36547757e-01,
       -1.92705368e-01,  2.60990260e-01, -8.43371294e-01, -1.13318402e-01,
        4.67638161e-01, -8.29195375e-02,  4.11309666e-01, -8.08553099e-01,
       -8.29195375e-02, -7.73972933e-01, -1.61842188e-01, -1.88317628e-01,
        4.11309666e-01, -1.61842188e-01,  1.22539995e-01, -1.92310507e+00,
       -1.88842004e+00, -1.85408049e+00, -1.06735176e+00, -1.08451071e+00,
       -1.06735176e+00, -4.74080956e-01, -1.10242202e+00, -4.72472219e-01,
       -2.39223574e-01, -4.55841445e-01, -1.39342941e-01,  2.29407841e-01,
       -1.12423401e-01,  

In [118]:
score(x_train, y_train, m)

0.9360971365499177

In [119]:
y_pred = predict(x_test, m)
y_pred

array([471.14135153, 472.57449158, 434.011007  , ..., 437.37543254,
       450.34622272, 445.69529471])

In [120]:
np.savetxt('y_test.csv', y_pred, delimiter=',')