In [21]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [22]:
df = pd.read_csv("train.csv")

In [23]:
obj_cols = df.select_dtypes(include='object')
num_cols = df.select_dtypes(include='number')

In [24]:
for col in obj_cols:
    df[col] = df[col].fillna(df[col].mode()[0])

for col in num_cols:
    df[col] = df[col].fillna(df[col].mean())

df = pd.get_dummies(df)

In [25]:
import numpy as np
import pandas as pd

def initialize_parameters(n_features):
    w = np.zeros(n_features)
    b = 0
    return w, b

def predict(X, w, b):
    return np.dot(X, w) + b

def gradient_descent(X, y, y_pred, w, b, a, l1, l2):
    m, n = X.shape
    reg_term = np.where(w > 0, l1, -l1)
    
    dw = (-2 * np.dot(X.T, (y - y_pred)) + reg_term + 2 * l2 * w) / m
    db = -2 * np.sum(y - y_pred) / m

    w = w - a * dw
    b = b - a * db

    return w, b

def train(X, y, a, iters, l1, l2):
    w, b = initialize_parameters(X.shape[1])
    for _ in range(iters):
        y_pred = predict(X, w, b)
        w, b = gradient_descent(X, y, y_pred, w, b, a, l1, l2)
    return w, b

def accuracy(y, y_pred):
    ss_res = np.sum((y - y_pred) ** 2)
    ss_tot = np.sum((y - np.mean(y)) ** 2)
    r2 = 1 - (ss_res / ss_tot)
    return round(r2, 4)

In [26]:
X = df.drop(columns=["SalePrice"]).values
y = df["SalePrice"].values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
y_scaled = scaler.fit_transform(y.reshape(-1, 1)).flatten()

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)
w, b = train(X_train, y_train, a=0.01, iters=1000, l1=500, l2=0.1)
y_pred = predict(X_test, w, b)

print("R² Score :", accuracy(y_test, y_pred))

R² Score : 0.696


In [27]:
from sklearn.linear_model import ElasticNet
from sklearn.metrics import r2_score, mean_squared_error

model = ElasticNet(alpha = 0.1, l1_ratio = 0.5)
model.fit(X_train, y_train)

y_pred = model.predict(X_train)

print("R² Score          :", r2_score(y_train, y_pred))

R² Score          : 0.8656476560274007


In [28]:
#T-sne

In [None]:
def high_dims(X, sigma=1.0):
    D = np.square(X[:, np.newaxis] - X).sum(axis=2)
    D = np.clip(D, 1e-7, 1e3)  # Prevent overflow/underflow
    P = np.exp(-D / (2 * sigma ** 2))
    np.fill_diagonal(P, 0)
    P = P / np.sum(P)
    return P

def low_dims(Y):
    D = np.square(Y[:, np.newaxis] - Y).sum(axis=2)
    D = np.clip(D, 1e-7, 1e3)
    Q = 1 / (1 + D)
    np.fill_diagonal(Q, 0)
    Q = Q / np.sum(Q)
    return Q

def tsne(X, no_dims=2, max_iter=1000, learning_rate=100.0, sigma=1.0):
    X = X - np.mean(X, axis=0)
    Y = np.random.randn(X.shape[0], no_dims)
    P = high_dims(X, sigma)

    for iter in range(max_iter):
        Q = low_dims(Y)
        PQ = P - Q
        dY = np.zeros_like(Y)

        for i in range(X.shape[0]):
            dY[i] = 4 * np.sum((PQ[:, i][:, None] * (Y[i] - Y)), axis=0)

        Y -= learning_rate * dY

        if iter % 100 == 0:
            cost = np.sum(P * np.log((P + 1e-12) / (Q + 1e-12)))
            print(f"Iteration {iter}: KL divergence = {cost:.4f}")

    return Y

Y = tsne(X_scaled, no_dims=2, max_iter=500, learning_rate=0.0001, sigma=1.0)

plt.figure(figsize=(8, 6))
plt.scatter(Y[:, 0], Y[:, 1], s=50)
plt.title("t-SNE Projection")
plt.grid(True)
plt.show()

Iteration 0: KL divergence = 11.7150
