In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time

In [10]:
dat = pd.read_csv('rookie_df.csv')
print(dat.head())

   Unnamed: 0  Year Drafted  GP   MIN   PTS  FGM   FGA   FG%  3P Made  3PA  \
0           0          2013  70  34.5  16.7  6.1  15.1  40.5      0.8  3.0   
1           1          2013  70  32.3  12.8  4.9  12.8  38.0      1.6  4.8   
2           2          2013  80  31.1  13.8  4.9  11.7  41.9      0.9  2.8   
3           3          2013  82  26.7   8.8  3.1   8.3  37.6      1.2  3.6   
4           4          2013  77  24.6   6.8  2.2   5.4  41.4      0.5  1.5   

   ...   FT%  OREB  DREB  REB  AST  STL  BLK  TOV   EFF  target  
0  ...  70.3   1.4   4.8  6.2  6.3  1.9  0.6  3.5  17.6       1  
1  ...  90.3   0.5   2.4  3.0  5.7  0.6  0.1  1.9  12.1       1  
2  ...  78.0   0.5   3.6  4.1  4.1  1.6  0.5  3.2  13.2       1  
3  ...  80.4   0.6   2.2  2.9  1.0  0.6  0.2  1.2   6.7       1  
4  ...  68.3   1.0   3.4  4.4  1.9  0.8  0.8  1.6   9.1       1  

[5 rows x 23 columns]


In [11]:
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.model_selection import train_test_split

In [12]:
Y = np.array(dat['PTS'])
X = np.array(dat[['OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV']])


In [13]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=42)

scaler = StandardScaler().fit(x_train)

x_train_scaled = scaler.transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [14]:
### Next we will build our probabilistic logistic regression model
import time
from tqdm import tqdm
import math
import scipy

In [15]:
class ProbLinReg:
    
    def __init__(self, optimizer, learning_rate=0.01, max_iter=1000000, decay_rate = True, tol=1e-4, alpha=1, beta=1):
        self.optimizer = optimizer if optimizer in ['first_order', 'second_order'] else 'first_order'
        self.beta = beta
        self.alpha = alpha
        self.max_iter = max_iter
        self.w = None
        self.m_0 = None
        self.learning_rate = learning_rate
        self.tol = tol
        self.loss_history = []
        
        import time
        self.total_iter = 0
        self.total_time = 0
        print(f'Probabilistic Linear Regression Model Initialized with {self.optimizer} optimizer')
    
    def fit(self, X, y):
        N, M = X.shape
        self.w = np.random.randn(M)
        self.m_0 = self.w
        consecutive_tolerance = 20
        no_improvement_count = 0
      
        if self.optimizer == 'first_order':
            start = time.time()
            for iter in tqdm(range(self.max_iter), desc='Training Progress'):    
                self.total_iter+=1
                y_pred = (X @ self.w)
                gradient = X.T @ (y_pred - y) + self.alpha * self.w
                #self.w -= (self.learning_rate / (1 + 0.01 * self.total_iter * self.learning_rate)) * gradient #decaying learning rate
                self.w -= self.learning_rate * gradient #normal learning rate
                self.loss_history.append(self.log_posterior(X, y_pred))
                
                if math.isnan(np.linalg.norm(gradient)):
                    print('Summary for first order GD and step size =', self.learning_rate)
                    print('Time to converge: nan')
                    print('Total Iterations: nan')
                    return self
                
                if (self.total_iter % 100000 == 0):
                    print('Score at', self.total_iter, 'iterations:', self.loss_history[-1])
                
                if iter > 1 and np.abs(self.loss_history[-1] - self.loss_history[-2]) < self.tol:
                    no_improvement_count += 1
                else:
                    no_improvement_count = 0
                
                if (iter > 1 and no_improvement_count == 20) or self.total_iter == self.max_iter - 1:
                    end = time.time()
                    self.total_time = end - start
                    print('Summary for first order GD and step size =', self.learning_rate)
                    print('Time to converge:', self.total_time)
                    print('Total Iterations:', self.total_iter)
                    return self
             
        elif self.optimizer == 'second_order':
            start = time.time()
            for iter in tqdm(range(self.max_iter), desc='Training Progress'):
                self.total_iter+=1
                y_pred = X @ self.w
                gradient = X.T @ (y_pred - y) + self.alpha * self.w
                hessian = self.beta * X.T @ X + self.alpha * np.eye(M)
                finalGrad = np.linalg.inv(hessian) @ gradient
                self.w -= self.learning_rate * finalGrad #normal learning rate
                self.loss_history.append(self.log_posterior(X, y_pred))
                
                if (self.total_iter % 100000 == 0):
                    print('Gradient at', self.total_iter, 'iterations:', np.linalg.norm(gradient))
                
                if math.isnan(np.linalg.norm(gradient)):
                    print('Summary for second order GD and step size =', self.learning_rate)
                    print('Time to converge: nan')
                    print('Total Iterations: nan')
                    return self
                
                if iter > 3 and np.abs(self.loss_history[-1] - self.loss_history[-2]) < self.tol:
                    no_improvement_count += 1
                else:
                    no_improvement_count = 0
                
                if (iter > 1 and no_improvement_count == 20) or self.total_iter == self.max_iter - 1:
                    end = time.time()
                    self.total_time = end - start
                    print('Summary for second order GD and step size =', self.learning_rate)
                    print('Time to converge:', self.total_time)
                    print('Total Iterations:', self.total_iter)
                    return self
        else:
            print('Unknown optimizer')
        return self
    
    def predict(self, X):
        return X @ self.w
        
    def log_posterior(self, X, t):
        N, M = X.shape
        S_0 = (1 / self.alpha) * np.eye(M)
        S_N = np.linalg.inv(np.linalg.inv(S_0) + self.beta * (X.T @ X))
        m_N = S_N @ (np.linalg.inv(S_0) @ self.m_0 + self.beta * X.T @ t)
        epsilon=1e-8
        
        log_proba = scipy.stats.multivariate_normal.logpdf(self.w, mean=m_N, cov=S_N)
        avg_neg_log_proba = (-1 * (np.sum(log_proba) + epsilon)) / (M + epsilon)
        return avg_neg_log_proba

In [16]:
import scipy

learn_rate_iters = {}
times_firstOrd = {}
final_scores_firstOrd = {}
learn_rates = np.logspace(-5.0, 0.0, num=6)

for lr in learn_rates:
    test_model = ProbLinReg('first_order', learning_rate=lr, max_iter=10000000, tol=1e-8)
    test_model.fit(x_train_scaled, y_train)
    learn_rate_iters[lr] = test_model.total_iter
    times_firstOrd[lr] = test_model.total_time
    final_scores_firstOrd[lr] = test_model.log_posterior(x_test_scaled, y_test)
    

Probabilistic Linear Regression Model Initialized with first_order optimizer


Training Progress:   0%|          | 1209/10000000 [00:00<1:47:27, 1550.73it/s]


Summary for first order GD and step size = 1e-05
Time to converge: 0.7769036293029785
Total Iterations: 1210
Probabilistic Linear Regression Model Initialized with first_order optimizer


Training Progress:   0%|          | 403/10000000 [00:00<3:09:40, 878.69it/s]


Summary for first order GD and step size = 0.0001
Time to converge: 0.4631071090698242
Total Iterations: 404
Probabilistic Linear Regression Model Initialized with first_order optimizer


  maha = np.sum(np.square(np.dot(dev, prec_U)), axis=-1)
  if iter > 1 and np.abs(self.loss_history[-1] - self.loss_history[-2]) < self.tol:
  gradient = X.T @ (y_pred - y) + self.alpha * self.w
  m_N = S_N @ (np.linalg.inv(S_0) @ self.m_0 + self.beta * X.T @ t)
  m_N = S_N @ (np.linalg.inv(S_0) @ self.m_0 + self.beta * X.T @ t)
  y_pred = (X @ self.w)
  gradient = X.T @ (y_pred - y) + self.alpha * self.w
Training Progress:   0%|          | 491/10000000 [00:00<3:54:37, 710.31it/s]


Summary for first order GD and step size = 0.001
Time to converge: nan
Total Iterations: nan
Probabilistic Linear Regression Model Initialized with first_order optimizer


Training Progress:   0%|          | 180/10000000 [00:00<3:22:59, 821.06it/s]


Summary for first order GD and step size = 0.01
Time to converge: nan
Total Iterations: nan
Probabilistic Linear Regression Model Initialized with first_order optimizer


Training Progress:   0%|          | 114/10000000 [00:00<3:17:49, 842.49it/s]


Summary for first order GD and step size = 0.1
Time to converge: nan
Total Iterations: nan
Probabilistic Linear Regression Model Initialized with first_order optimizer


  y_pred = (X @ self.w)
Training Progress:   0%|          | 83/10000000 [00:00<4:15:01, 653.53it/s]


Summary for first order GD and step size = 1.0
Time to converge: nan
Total Iterations: nan
Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 23/1000000000000 [00:00<609942:40:49, 455.42it/s]

Summary for second order GD and step size = 1e-06
Time to converge: 0.058866262435913086
Total Iterations: 24





Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 23/1000000000000 [00:00<625917:52:37, 443.79it/s]


Summary for second order GD and step size = 1e-06
Time to converge: 0.04871654510498047
Total Iterations: 24
Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 23/1000000000000 [00:00<485429:26:28, 572.23it/s]

Summary for second order GD and step size = 1e-06
Time to converge: 0.042555809020996094
Total Iterations: 24
Probabilistic Linear Regression Model Initialized with second_order optimizer







Training Progress:   0%|          | 23/1000000000000 [00:00<666103:30:04, 417.02it/s]


Summary for second order GD and step size = 1e-06
Time to converge: 0.04989171028137207
Total Iterations: 24
Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 23/1000000000000 [00:00<490756:25:35, 566.02it/s]


Summary for second order GD and step size = 1e-06
Time to converge: 0.048874855041503906
Total Iterations: 24
Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 0/1000000000000 [00:00<?, ?it/s]

Summary for second order GD and step size = 1e-05
Time to converge: 

Training Progress:   0%|          | 23/1000000000000 [00:00<907067:30:55, 306.24it/s]


0.07610917091369629
Total Iterations: 24
Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 23/1000000000000 [00:00<465725:21:18, 596.44it/s]

Summary for second order GD and step size = 1e-05
Time to converge: 0.04604506492614746
Total Iterations: 24





Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 0/1000000000000 [00:00<?, ?it/s]

Summary for second order GD and step size = 

Training Progress:   0%|          | 23/1000000000000 [00:00<629283:57:20, 441.42it/s]


1e-05
Time to converge: 0.049034833908081055
Total Iterations: 24
Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 6/1000000000000 [00:00<6660543:06:22, 41.70it/s]

Summary for second order GD and step size =

Training Progress:   0%|          | 23/1000000000000 [00:00<2799840:21:53, 99.21it/s]


 1e-05
Time to converge: 0.19042754173278809
Total Iterations: 24
Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 23/1000000000000 [00:00<581392:55:09, 477.78it/s]


Summary for second order GD and step size = 1e-05
Time to converge: 0.05112743377685547
Total Iterations: 24
Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 23/1000000000000 [00:00<605704:07:40, 458.60it/s]


Summary for second order GD and step size = 0.0001
Time to converge: 0.05160927772521973
Total Iterations: 24
Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 23/1000000000000 [00:00<542814:01:44, 511.74it/s]


Summary for second order GD and step size = 0.0001
Time to converge: 0.045979976654052734
Total Iterations: 24
Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 0/1000000000000 [00:00<?, ?it/s]

Summary for second order GD and step size =

Training Progress:   0%|          | 23/1000000000000 [00:00<777641:55:55, 357.21it/s]


 0.0001
Time to converge: 0.06939482688903809
Total Iterations: 24
Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 23/1000000000000 [00:00<480954:46:25, 577.55it/s]


Summary for second order GD and step size = 0.0001
Time to converge: 0.042815208435058594
Total Iterations: 24
Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 23/1000000000000 [00:00<584252:12:52, 475.44it/s]

Summary for second order GD and step size = 0.0001
Time to converge: 0.05328869819641113
Total Iterations: 24





Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 23/1000000000000 [00:00<439343:49:15, 632.26it/s]


Summary for second order GD and step size = 0.001
Time to converge: 0.04390764236450195
Total Iterations: 24
Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 0/1000000000000 [00:00<?, ?it/s]

Summary for second order GD and step size =

Training Progress:   0%|          | 23/1000000000000 [00:00<540219:38:35, 514.19it/s]


 0.001
Time to converge: 0.05172610282897949
Total Iterations: 24
Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 23/1000000000000 [00:00<589723:10:20, 471.03it/s]


Summary for second order GD and step size = 0.001
Time to converge: 0.05704140663146973
Total Iterations: 24
Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 23/1000000000000 [00:00<609830:22:54, 455.50it/s]


Summary for second order GD and step size = 0.001
Time to converge: 0.06648087501525879
Total Iterations: 24
Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 0/1000000000000 [00:00<?, ?it/s]

Summary for second order GD and step size = 0.001
Time to converge: 0.06424617767333984
Total Iterations: 24

Training Progress:   0%|          | 23/1000000000000 [00:00<578182:19:51, 480.43it/s]



Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 325/1000000000000 [00:00<526380:07:52, 527.71it/s]


Summary for second order GD and step size = 0.01
Time to converge: 0.6120896339416504
Total Iterations: 326
Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 274/1000000000000 [00:00<355095:45:31, 782.26it/s]


Summary for second order GD and step size = 0.01
Time to converge: 0.35206079483032227
Total Iterations: 275
Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 145/1000000000000 [00:00<390249:39:01, 711.80it/s]


Summary for second order GD and step size = 0.01
Time to converge: 0.21382474899291992
Total Iterations: 146
Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 23/1000000000000 [00:00<558964:52:19, 496.95it/s]


Summary for second order GD and step size = 0.01
Time to converge: 0.056952476501464844
Total Iterations: 24
Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 23/1000000000000 [00:00<712785:10:04, 389.71it/s]

Summary for second order GD and step size = 0.01
Time to converge: 0.05810856819152832
Total Iterations: 24





In [18]:
#SECOND ORDER TEST

learn_rate_iters_2 = {}
times_secondOrd = {}
final_scores_secondOrd = {}
learn_rates = np.logspace(-5.0, 0.0, num=6)

for lr in learn_rates:
    test_model = ProbLinReg('second_order', learning_rate=lr, max_iter=100000000, tol=1e-4)
    test_model.fit(x_train_scaled, y_train)
    learn_rate_iters[lr] = test_model.total_iter
    times_secondOrd[lr] = test_model.total_time
    final_scores_secondOrd[lr] = test_model.log_posterior(x_test_scaled, y_test)

Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 23/100000000 [00:00<80:13:01, 346.28it/s]


Summary for second order GD and step size = 1e-05
Time to converge: 0.07253623008728027
Total Iterations: 24
Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 23/100000000 [00:00<279:49:34, 99.27it/s]


Summary for second order GD and step size = 0.0001
Time to converge: 0.23009395599365234
Total Iterations: 24
Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 23/100000000 [00:00<102:31:28, 270.94it/s]


Summary for second order GD and step size = 0.001
Time to converge: 0.09394192695617676
Total Iterations: 24
Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 237/100000000 [00:00<29:23:39, 945.01it/s]


Summary for second order GD and step size = 0.01
Time to converge: 0.2520620822906494
Total Iterations: 238
Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 68/100000000 [00:00<29:58:01, 926.95it/s]


Summary for second order GD and step size = 0.1
Time to converge: 0.0678558349609375
Total Iterations: 69
Probabilistic Linear Regression Model Initialized with second_order optimizer


Training Progress:   0%|          | 23/100000000 [00:00<30:42:16, 904.68it/s]

Summary for second order GD and step size = 1.0
Time to converge: 0.027433156967163086
Total Iterations: 24



