In [3]:
import pandas as pd
import numpy as np
import matplotlib
matplotlib.use("Agg")
from matplotlib import pyplot as plt

np.random.seed(42)

class Scaler():
    # hint: https://machinelearningmastery.com/standardscaler-and-minmaxscaler-transforms-in-python/
    def __init__(self):
        self.min=None
        self.max=None
    def __call__(self,features, is_train=False):
        m=np.size(feature_matrix,0)
        n=np.size(feature_matrix,1)
        if is_train:
            self.min = np.min(features,axis=0,keepdims=True)
            self.max = np.min(features,axis=0,keepdims=True)
            
        assert self.min is not None and self.max is not None
        featurse = (features - self.min)/(self.max-self.min + 1e-20)
        ones = np.ones[m,1]
        features = np.concatenate([ones,features],1)
        return features

In [4]:
def get_features(csv_path,is_train=False,scaler=None):
    '''
    Description:
    read input feature columns from csv file
    manipulate feature columns, create basis functions, do feature scaling etc.
    return a feature matrix (numpy array) of shape m x n 
    m is number of examples, n is number of features
    return value: numpy array
    '''

    '''
    Arguments:
    csv_path: path to csv file
    is_train: True if using training data (optional)
    scaler: a class object for doing feature scaling (optional)
    '''

    '''
    help:
    useful links: 
        * https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html
        * https://www.geeksforgeeks.org/python-read-csv-using-pandas-read_csv/
    '''
    data = pd.read_csv(csv_path)
    Satellite = {'Terra': 1,'Aqua': 2}
    data.satellite = [Satellite[item] for item in data.satellite]
    Daynight = {'D': 1,'N': 2}
    data.daynight = [Daynight[item] for item in data.daynight]
    data1 = data.to_numpy()
    df1 = data.loc[:, ["acq_date"]]
    df1['year'] = pd.DatetimeIndex(df1['acq_date']).year
    df1['month'] = pd.DatetimeIndex(df1['acq_date']).month
    df=df1.to_numpy()
    data = data1[:,1:6]
    data = np.column_stack((data, df[:,1:3]))
    data = np.column_stack((data, data1[:,7]))
    data = np.column_stack((data, data1[:,8]))
    data = np.column_stack((data, data1[:,10]))
    data = np.column_stack((data, data1[:,12]))
    data = np.column_stack((data, data1[:,14]))
    data=data.astype(float)
    #data = np.column_stack((data, data1[:,-1]))
    return data

In [5]:
def get_targets(csv_path):
    '''
    Description:
    read target outputs from the csv file
    return a numpy array of shape m x 1
    m is number of examples
    '''
    data = pd.read_csv(csv_path)
    data=data['frp']
    m=len(data)
    data = data.to_numpy()
    data = data.astype(float)
    data= data.reshape(m,1) 
    return data

In [32]:
def analytical_solution(feature_matrix, targets, C=0.0):
    '''
    Description:
    implement analytical solution to obtain weights
    as described in lecture 5d
    return value: numpy array
    '''

    '''
    Arguments:
    feature_matrix: numpy array of shape m x n
    targets: numpy array of shape m x 1
    '''
    m, n = np.shape(feature_matrix)
    feature_matrix = np.hstack((np.ones((m, 1)), feature_matrix))
    w = np.matmul(feature_matrix.T,feature_matrix) + C * np.eye(n)
    w = np.matmul(np.linalg.inv(w),feature_matrix.T)
    w = np.matmul(w,targets)
    return w    

In [49]:
def get_predictions(feature_matrix, weights):
    '''
    description
    return predictions given feature matrix and weights
    return value: numpy array
    '''

    '''
    Arguments:
    feature_matrix: numpy array of shape m x n
    weights: numpy array of shape n x 1
    '''
    m=np.size(feature_matrix,0)
    n=np.size(feature_matrix,1)
    feature_matrix = np.column_stack((np.ones(m).T,feature_matrix))
    predictions = np.matmul(feature_matrix,weights)
    return predictions

In [16]:
def mse_loss(feature_matrix, weights, targets):
    '''
    Description:
    Implement mean squared error loss function
    return value: float (scalar)
    '''

    '''
    Arguments:
    feature_matrix: numpy array of shape m x n
    weights: numpy array of shape n x 1
    targets: numpy array of shape m x 1
    '''
    loss = np.square(np.matmul(feature_matrix,weights) - targets)
    loss = np.mean(loss)
    return loss


In [7]:
def l2_regularizer(weights):
    '''
    Description:
    Implement l2 regularizer
    return value: float (scalar)
    '''

    '''
    Arguments
    weights: numpy array of shape n x 1
    '''
    w_regularized = np.sum(np.square(weights))
    return w_regularized

In [8]:
def loss_fn(feature_matrix, weights, targets, C=0.0):
    '''
    Description:
    compute the loss function: mse_loss + C * l2_regularizer
    '''

    '''
    Arguments:
    feature_matrix: numpy array of shape m x n
    weights: numpy array of shape n x 1
    targets: numpy array of shape m x 1
    C: weight for regularization penalty
    return value: float (scalar)
    '''
    loss = mse_loss(feature_matrix, weights, targets) + C*l2_regularizer(weights)
    return loss

In [25]:
def compute_gradients(feature_matrix, weights, targets, C=0.0):
    '''
    Description:
    compute gradient of weights w.r.t. the loss_fn function implemented above
    '''

    '''
    Arguments:
    feature_matrix: numpy array of shape m x n
    weights: numpy array of shape n x 1
    targets: numpy array of shape m x 1
    C: weight for regularization penalty
    return value: numpy array
    '''
    m=np.size(feature_matrix,0)
    n=np.size(feature_matrix,1)
    term1 = np.matmul(feature_matrix.T,(get_predictions(feature_matrix, weights)-targets))
    term2 = C*weights
    gradients = 2*((term1/m)+term2)
    return gradients

In [10]:
def sample_random_batch(feature_matrix, targets, batch_size):
    '''
    Description
    Batching -- Randomly sample batch_size number of elements from feature_matrix and targets
    return a tuple: (sampled_feature_matrix, sampled_targets)
    sampled_feature_matrix: numpy array of shape batch_size x n
    sampled_targets: numpy array of shape batch_size x 1
    '''

    '''
    Arguments:
    feature_matrix: numpy array of shape m x n
    targets: numpy array of shape m x 1
    batch_size: int
    '''    
    index = np.random.randint(0,len(feature_matrix),batch_size)
    X_tmp = np.array([feature_matrix[i] for i in index])
    y_tmp = np.array([targets[i] for i in index])
    return(X_tmp,y_tmp)

In [54]:
def initialize_weights(n):
    '''
    Description:
    initialize weights to some initial values
    return value: numpy array of shape n x 1
    '''

    '''
    Arguments
    n: int
    '''
    w_tmp = np.random.uniform(0,0.01,(n+1,1))
    return w_tmp  

In [12]:
def update_weights(weights, gradients, lr):
    '''
    Description:
    update weights using gradient descent
    retuen value: numpy matrix of shape nx1
    '''

    '''
    Arguments:
    # weights: numpy matrix of shape nx1
    # gradients: numpy matrix of shape nx1
    # lr: learning rate
    '''    
    weights = weights - lr*gradients
    return weights


In [13]:
def early_stopping(patience, step, patience_threshold, min_steps):
    # allowed to modify argument list as per your need
    # return True or False
     if step < min_steps:
        return False
        if patience >= patience_threshold:
            return True
        else:
            return False
   

In [14]:
def plot_trainsize_losses():
    '''
    Description:
    plot losses on the development set instances as a function of training set size 
    '''

    '''
    Arguments:
    # you are allowed to change the argument list any way you like 
    '''    

    

In [57]:
def do_gradient_descent(train_feature_matrix,  
                        train_targets, 
                        dev_feature_matrix,
                        dev_targets,
                        lr=1.0,
                        C=0.0,
                        batch_size=32,
                        max_steps=10000,
                        eval_steps=5):
    '''
    feel free to significantly modify the body of this function as per your needs.
    ** However **, you ought to make use of compute_gradients and update_weights function defined above
    return your best possible estimate of LR weights

    a sample code is as follows -- 
    '''
    m=np.size(train_feature_matrix,0)
    n=np.size(train_feature_matrix,1)
    weights = initialize_weights(n)
    dev_loss = mse_loss(dev_feature_matrix, weights, dev_targets)
    train_loss = mse_loss(train_feature_matrix, weights, train_targets)
    #best_dev_loss = dev_loss
    #best_weights = weights
    #patience = 0

    print("step {} \t dev loss: {} \t train loss: {}".format(0,dev_loss,train_loss))
    for step in range(1,max_steps+1):

        #sample a batch of features and gradients
        features,targets = sample_random_batch(train_feature_matrix,train_targets,batch_size)
        
        #compute gradients
        gradients = compute_gradients(features, weights, targets, C)
        
        #update weights
        weights = update_weights(weights, gradients, lr)

        if step%eval_steps == 0:
            dev_loss = mse_loss(dev_feature_matrix, weights, dev_targets)
            train_loss = mse_loss(train_feature_matrix, weights, train_targets)
            print("step {} \t dev loss: {} \t train loss: {}".format(step,dev_loss,train_loss))

        '''
        implement early stopping etc. to improve performance.
        
        '''
        if step%eval_steps == 0:
            dev_loss = mse_loss(dev_feature_matrix, weights, dev_targets)
            train_loss = mse_loss(train_feature_matrix, weights, train_targets)
            print("step {} \t dev loss: {} \t train loss: {}".format(step,dev_loss,train_loss))
            '''
            if dev_loss < best_dev_loss:
                patience = 0
                best_dev_loss = dev_loss
                best_weights = weights
            else:
                patience +=1
                if early_stopping(patience,step,patience_threshold=1000,min_steps=(2*m)/batch_size):
                    print('Stopping Early at step: {}'.format(step))
                    break
        '''
    return weights

In [19]:
def do_evaluation(feature_matrix, targets, weights):
    predictions = get_predictions(feature_matrix, weights)
    loss =  mse_loss(feature_matrix, weights, targets)
    return loss

In [70]:
scaler = Scaler() #use of scaler is optional
path = 'C:/Users/bhara/Documents/desk/MachineLearning/CS725/Assingment_01/cs725-2021a-assgmt1'
print('train features')
train_features, train_targets = get_features(path + '/train.csv',True), get_targets(path + '/train.csv')
print('dev features')
dev_features, dev_targets = get_features(path + '/dev.csv',False), get_targets(path + '/dev.csv')
print('analytical solution')
a_solution = analytical_solution(train_features, train_targets, C=1e-8)
print(a_solution.shape)
#train_loss=do_evaluation(train_features, train_targets, a_solution)
feature_matrix = train_features
print(feature_matrix.shape)
m=np.size(feature_matrix,0)
print("m:",m)
n=np.size(feature_matrix,1)
print("n:",n)
weights = initialize_weights(n)
print("weights:",weights.shape)
feature_matrix = np.column_stack((np.ones(m).T,feature_matrix))
print("feature_matrix:",feature_matrix.shape)
predictions = np.matmul(feature_matrix,weights)
print("predictions:",predictions.shape)
loss =  mse_loss(feature_matrix, weights, train_targets)

train features
dev features
analytical solution
(12, 1)
(25001, 12)
m: 25001
n: 12
weights: (13, 1)
feature_matrix: (25001, 13)
predictions: (25001, 1)


59858.74143893175

In [71]:
if __name__ == '__main__':
    scaler = Scaler() #use of scaler is optional
    path = 'C:/Users/bhara/Documents/desk/MachineLearning/CS725/Assingment_01/cs725-2021a-assgmt1'
    print('train features')
    train_features, train_targets = get_features(path + '/train.csv',True), get_targets(path + '/train.csv')
    print('dev features')
    dev_features, dev_targets = get_features(path + '/dev.csv',False), get_targets(path + '/dev.csv')
    print('analytical solution')
    a_solution = analytical_solution(train_features, train_targets, C=1e-8)
    print('evaluating analytical_solution...')
    dev_loss=do_evaluation(dev_features, dev_targets, a_solution)
    train_loss=do_evaluation(train_features, train_targets, a_solution)
    print('analytical_solution \t train loss: {}, dev_loss: {} '.format(train_loss, dev_loss))

    print('training LR using gradient descent...')
    gradient_descent_soln = do_gradient_descent(train_features, 
                        train_targets, 
                        dev_features,
                        dev_targets,
                        lr=1.0,
                        C=0.001,
                        batch_size=32,
                        max_steps=200000,
                        eval_steps=5)

    print('evaluating iterative_solution...')
    dev_loss=do_evaluation(dev_features, dev_targets, gradient_descent_soln)
    train_loss=do_evaluation(train_features, train_targets, gradient_descent_soln)
    print('gradient_descent_soln \t train loss: {}, dev_loss: {} '.format(train_loss, dev_loss))
    #plot_trainsize_losses()   


train features
dev features
analytical solution
evaluating analytical_solution...


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 12 is different from 13)

In [None]:
print('Hello')

In [1]:
import tensorflow as tf

In [19]:
path = 'C:/Users/bhara/Documents/desk/MachineLearning/CS725/Assingment_01/cs725-2021a-assgmt1'
print('train features')
train_features, train_targets = get_features(path + '/train.csv',True), get_targets(path + '/train.csv')
print('dev features')
dev_features, dev_targets = get_features(path + '/dev.csv',False), get_targets(path + '/dev.csv')
print('analytical solution')

train features
dev features
analytical solution


In [20]:
train_features

array([[-25.117, 149.245, 363.1  , ..., 100.   , 316.6  ,   1.   ],
       [-32.263, 123.294, 349.3  , ...,  95.   , 307.2  ,   1.   ],
       [-36.918, 146.782, 336.7  , ..., 100.   , 293.9  ,   2.   ],
       ...,
       [-28.895, 128.98 , 322.5  , ..., 100.   , 303.   ,   2.   ],
       [-34.925, 150.489, 316.9  , ...,  94.   , 296.1  ,   2.   ],
       [-28.457, 128.735, 357.3  , ...,  96.   , 317.4  ,   1.   ]])

In [34]:
a_solution = analytical_solution(train_features, train_targets, C=1e-8)
a_solution

ValueError: operands could not be broadcast together with shapes (13,13) (12,12) 

In [35]:
sol=np.matmul(dev_features,a_solution)

In [36]:
sol.shape

(4001, 1)

In [37]:
sol

array([[279.57430413],
       [ 40.08955047],
       [ 12.66079577],
       ...,
       [-66.01352996],
       [ 11.27644315],
       [ 13.07262517]])

In [38]:
loss = np.mean(np.square(sol - dev_targets))
loss

39055.12661285697

In [25]:
def ols(X, y, fit_intercept=True):
    """Ordinary Least Squares (OLS) Regression model with intercept term.
    Fits an OLS regression model using the closed-form OLS estimator equation.
    Intercept term is included via design matrix augmentation.
    Params:
        X - NumPy matrix, size (N, p), of numerical predictors
        y - NumPy array, length N, of numerical response
        fit_intercept - Boolean indicating whether to include an intercept term
    Returns:
        NumPy array, length p + 1, of fitted model coefficients
    """
    m, n = np.shape(X)
    if fit_intercept:
        X = np.hstack((np.ones((m, 1)), X))
    return np.linalg.solve(np.dot(X.T, X), np.dot(X.T, y))

In [26]:
sol_2 = ols(train_features, train_targets, fit_intercept=True)

In [27]:
sol_2

array([[ 1.21445991e+05],
       [-4.75242181e-01],
       [ 4.47925513e-01],
       [ 6.80872714e+00],
       [ 4.05248334e+01],
       [ 1.37303182e+02],
       [-6.15538269e+01],
       [-5.34613598e+00],
       [ 6.54181968e-03],
       [-3.21911507e+00],
       [-1.38864074e+00],
       [ 1.02301260e+00],
       [ 1.19874699e+02]])

In [29]:
m, n = np.shape(dev_features)
X = np.hstack((np.ones((m, 1)), dev_features))
sol=np.matmul(X,sol_2)

In [30]:
sol

array([[278.44279953],
       [ 38.78666831],
       [ 10.28515989],
       ...,
       [-71.31674613],
       [ 10.12821476],
       [  9.14312166]])

In [31]:
loss = np.mean(np.square(sol - dev_targets))
loss

39051.43309839717