In [3]:
from typing import TypeAlias
from typing import Optional, Any    

Number: TypeAlias = int | float

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.axes as axes
from IPython.display import display
from copy import deepcopy

In [5]:
def normalize_z(array: np.ndarray, 
                columns_means: Optional[np.ndarray]=None, 
                columns_stds: Optional[np.ndarray]=None) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
    array_new = array.copy()
    if columns_means is None:
        columns_means = array_new.mean(axis=0)
        #columns_means = columns_means.reshape(1,-1)
        
    #columns_stds not defined    
    if columns_stds is None:
        columns_stds = array_new.std(axis=0)
        #columns_stds = columns_stds.reshape(1,-1)
    
    out = (array_new - columns_means)/columns_stds
    return out, columns_means, columns_stds


def get_features_targets(df: pd.DataFrame, 
                         feature_names: list[str], 
                         target_names: list[str]) -> tuple[pd.DataFrame, pd.DataFrame]:
    ###get columns for features and targets 
    ### YOUR CODE HERE
    '''df_feature = df[feature_names]
    df_target = df[target_names]'''
    df_feature = df.loc[::,feature_names]
    df_target = df.loc[::,target_names]
    return df_feature, df_target

def prepare_feature(np_feature: np.ndarray) -> np.ndarray:
    ones_columns = np.ones((np_feature.shape[0],1))
    return np.hstack((ones_columns, np_feature))

def compute_cost_linreg(X: np.ndarray, y: np.ndarray, beta: np.ndarray) -> np.ndarray:
    ### matrix of linear eq and actual target value
    ###
    #define m
    m = X.shape[0]
    squared = (calc_linreg(X, beta) - y)**2
    J= (1/(2*m))*np.sum(squared)
    
    return np.squeeze(J)

def predict_linreg(df_feature: pd.DataFrame, 
                   beta: np.ndarray, 
                   means: Optional[np.ndarray]=None, 
                   stds: Optional[np.ndarray]=None) -> np.ndarray:
    normalized_feature = normalize_z(df_feature, means, stds)
    feature_array = prepare_feature(normalized_feature[0])
    pred_y = calc_linreg(feature_array, beta)
    
    return pred_y

def calc_linreg(X: np.ndarray, beta: np.ndarray) -> np.ndarray:
    return np.matmul(X, beta)

def split_data(df_feature: pd.DataFrame, df_target: pd.DataFrame, 
               random_state: Optional[int]=None, 
               test_size: float=0.5) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    
    np.random.seed(random_state)
    total_rows = df_feature.shape[0]
    
    #no of rows for test 
    #choose indices for test out of total no. of row indices
    num_test_rows = int(total_rows*test_size)
    test_indices = np.random.choice(total_rows, size= num_test_rows, replace = False )
    
    df_feature_test = df_feature.iloc[test_indices]
    df_target_test = df_target.iloc[test_indices]
    
    df_feature_train = df_feature.drop(test_indices)
    df_target_train = df_target.drop(test_indices)
    
    return df_feature_train, df_feature_test, df_target_train, df_target_test
  
def r2_score(y: np.ndarray, ypred: np.ndarray) -> float:
    y_mean = y.mean()
    #find ssres
    ssres = np.sum((y-ypred)**2)
    #find sstot
    sstot = np.sum((y-y_mean)**2)
    
    r2 = 1- (ssres/sstot)
    return r2

#n is the number of data points
#k is the number of predictors
def adj_r_squared(n,k,target,pred):
    r_squared = r2_score(target,pred)
    return 1-((1-r_squared)*(n-1)/(n-k-1))

def mean_squared_error(target: np.ndarray, pred: np.ndarray) -> float:
    #find (y- yhat) squared array 
    array = (target-pred)**2
    return array.mean()

In [6]:
def gradient_descent_linreg(X: np.ndarray, 
                            y: np.ndarray, 
                            beta: np.ndarray, 
                            alpha: float, 
                            num_iters: int) -> tuple[np.ndarray, np.ndarray]:
    ###beta coeff array
    ###J storage is cost value of each iter
    ###
    m = X.shape[0] #number of samples
    #n = X.shape[1] #number of features
    
    J_storage = np.zeros(num_iters)
    
    for i in range(num_iters):
        #y predicted
        y_pred = calc_linreg(X,beta)
        
        #find updating array
        updating_array = (1/m)*(np.matmul(X.T, (y_pred-y)))
        #update beta
        beta = beta -alpha*updating_array
        
        #cost
        J = compute_cost_linreg(X,y,beta)
        J_storage[i] = J
        
    return beta, J_storage