Predicting baby name sex using PyTorch for Logistic Regression

### References

- [Logistic Regression with PyTorch](https://proai.org/pytorch-logistic-regression) by [Denny Loevlie](https://twitter.com/DennisLoevlie)
- [PyTorch OO design compared with SkLearn](https://jeancochrane.com/blog/pytorch-functional-api) by [Jean Cochrane](https://JeanCochrane.com)
- [Building Your First Network in PyTorch](https://t.co/m9I4e0tfrC) by [Ta-Ying Cheng](https://www.linkedin.com/in/tim-ta-ying-cheng-411857139/)
- [data.world US baby names since 1880](https://data.world/ssa/baby-names-for-us-states-territories)



In [1]:
import numpy as np
import pandas as pd
# neither year nor len are statistically significant predictors of sex
from pathlib import Path

In [2]:
CWD = Path('.').absolute().resolve()
DATA_DIR = '.nlpia2-data'
DATA_FILE = 'baby-names-region.csv.gz'
CWD

PosixPath('/home/hobs/code/tangibleai/nlpia2/src/nlpia2/ch05')

In [3]:
parent = CWD
data_dir = parent / DATA_DIR 
filepath = data_dir / DATA_FILE
for i in range(10):
    print(filepath)
    if filepath.is_file():
        break
    parent = parent.parent
    data_dir = parent / DATA_DIR 
    filepath = data_dir / DATA_FILE
filepath

/home/hobs/code/tangibleai/nlpia2/src/nlpia2/ch05/.nlpia2-data/baby-names-region.csv.gz
/home/hobs/code/tangibleai/nlpia2/src/nlpia2/.nlpia2-data/baby-names-region.csv.gz
/home/hobs/code/tangibleai/nlpia2/src/.nlpia2-data/baby-names-region.csv.gz
/home/hobs/code/tangibleai/nlpia2/.nlpia2-data/baby-names-region.csv.gz


PosixPath('/home/hobs/code/tangibleai/nlpia2/.nlpia2-data/baby-names-region.csv.gz')

In [4]:
df = pd.read_csv(filepath)

In [5]:
np.random.seed(451)
df = df.sample(10_000)
df.head()

Unnamed: 0,region,sex,year,name,count,freq
6139665,WV,F,1987,Brittani,10,3e-06
2565339,MD,F,1954,Ida,18,5e-06
22297,AK,M,1988,Maxwell,5,1e-06
5114650,TN,F,1972,Charlene,24,8e-06
2126395,KS,M,1954,Todd,11,3e-06


In [6]:
names = df['name'].unique()
names[:10]

array(['Brittani', 'Ida', 'Maxwell', 'Charlene', 'Todd', 'Aubrey',
       'Arianna', 'Otis', 'Trenton', 'Faustino'], dtype=object)

In [7]:
len(names) / len(df)

0.4025

In [8]:
# df = pd.get_dummies(df, columns=['region'])
# df.head()

In [9]:
df = df.groupby(['name', 'sex']).sum()
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,year,count,freq
name,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Aaden,M,2008,51,1.5e-05
Aahana,F,2018,26,9e-06
Aahil,M,2019,5,2e-06
Aaleyah,F,2010,17,5e-06
Aalia,F,4033,13,4e-06


In [10]:
df['name'] = df.index.get_level_values('name')
df['sex'] = df.index.get_level_values('sex')
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,year,count,freq,name,sex
name,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Aaden,M,2008,51,1.5e-05,Aaden,M
Aahana,F,2018,26,9e-06,Aahana,F
Aahil,M,2019,5,2e-06,Aahil,M
Aaleyah,F,2010,17,5e-06,Aaleyah,F
Aalia,F,4033,13,4e-06,Aalia,F


In [11]:
df.query('name == "Chris"')


Unnamed: 0_level_0,Unnamed: 1_level_0,year,count,freq,name,sex
name,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Chris,F,1983,5,2e-06,Chris,F
Chris,M,7850,239,6.9e-05,Chris,M


In [12]:
df.loc[pd.IndexSlice['Chris', :]]

Unnamed: 0_level_0,year,count,freq,name,sex
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
F,1983,5,2e-06,Chris,F
M,7850,239,6.9e-05,Chris,M


In [13]:
df['istrain'] = np.random.rand(len(df)) < .9
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,year,count,freq,name,sex,istrain
name,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Aaden,M,2008,51,1.5e-05,Aaden,M,True
Aahana,F,2018,26,9e-06,Aahana,F,True
Aahil,M,2019,5,2e-06,Aahil,M,True
Aaleyah,F,2010,17,5e-06,Aaleyah,F,True
Aalia,F,4033,13,4e-06,Aalia,F,True


In [14]:
istrain = df['istrain']
del df['istrain']
istrain.sum() / len(istrain)

0.9042000943841435

In [15]:
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(analyzer='char', ngram_range=(1, 3), lowercase=False)
vectorizer

TfidfVectorizer(analyzer='char', lowercase=False, ngram_range=(1, 3))

In [16]:
vectorizer.fit(df['name'][istrain])

TfidfVectorizer(analyzer='char', lowercase=False, ngram_range=(1, 3))

In [17]:
vecs = vectorizer.transform(df['name'])
vecs = pd.DataFrame.sparse.from_spmatrix(vecs)
vecs.head()


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,3653,3654,3655,3656,3657,3658,3659,3660,3661,3662
0,0.193687,0.395438,0.506137,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.182726,0.373059,0.0,0.45487,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.185518,0.378759,0.0,0.461821,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.166921,0.34079,0.0,0.0,0.38949,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.214766,0.438473,0.0,0.0,0.501131,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [18]:
vecs.columns = vectorizer.get_feature_names_out()
vecs.index = df.index
vecs.head()[vecs.columns[:5]]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,Aa,Aad,Aah,Aal
name,sex,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Aaden,M,0.193687,0.395438,0.506137,0.0,0.0
Aahana,F,0.182726,0.373059,0.0,0.45487,0.0
Aahil,M,0.185518,0.378759,0.0,0.461821,0.0
Aaleyah,F,0.166921,0.34079,0.0,0.0,0.38949
Aalia,F,0.214766,0.438473,0.0,0.0,0.501131


In [19]:
vecs.shape

(4238, 3663)

In [26]:
import torch
torch

<module 'torch' from '/home/hobs/anaconda3/envs/nlpia2/lib/python3.8/site-packages/torch/__init__.py'>

In [27]:
class LogisticRegressionNN(torch.nn.Module):

    def __init__(self, num_features, num_outputs=1):
         super().__init__()
         self.linear = torch.nn.Linear(num_features, num_outputs)

    def forward(self, X):
        return torch.sigmoid(self.linear(X))

In [28]:
def make_tensor(X):
    """ Convert numpy ndarray to torch.Tensor """
    X = getattr(X, 'values', X)
    return X if isinstance(X, torch.Tensor) else torch.Tensor(X)

def make_array(x):
    """ Convert torch.Tensor to numpy 1-D array """
    if hasattr(x, 'detach'):
        return torch.squeeze(x).detach().numpy()
    return x

In [29]:
num_features = vecs.shape[1]  # number of unique n-grams in our "vocabulary"
num_outputs = 1    # number of nesses (sexes) to predict, we're predicting only femaleness

In [30]:
from tqdm import tqdm
import time
import json
import copy

# Fraction of the tensors y_pred and y that are the same 
# (y_pred == y).sum() / len(y)
def measure_binary_accuracy(y_pred, y):
    """ Round y_pred and y then count the preds that are equal to the truth to compute fraction correct """
    y_pred = make_array(y_pred).round()
    y = make_array(y).round()
    num_correct = (y_pred == y).sum()
    return num_correct / len(y)

In [31]:
def measure_performance(model, X_train, X_test, y_train, y_test, criterion):
    with torch.no_grad():
        # Calculating the loss and accuracy for the train dataset
        accuracy_train = measure_binary_accuracy(model(X_train), y_train)
        outputs_test = torch.squeeze(model(X_test))
        accuracy_test = measure_binary_accuracy(outputs_test, y_test)
        loss_test = criterion(outputs_test, y_test)
        return dict(i=i, 
                    # loss_train=loss.item(),
                    accuracy_train=accuracy_train,
                    loss_test=loss_test.item(),
                    accuracy_test=accuracy_test)

In [32]:
model = LogisticRegressionNN(num_features=vecs.shape[1], num_outputs=1)
model

LogisticRegressionNN(
  (linear): Linear(in_features=3663, out_features=1, bias=True)
)

In [36]:
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
optimizer

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.01
    momentum: 0
    nesterov: False
    weight_decay: 0
)

In [37]:
# BCE: Binary Cross Entropy
criterion = torch.nn.BCELoss()
criterion

BCELoss()

In [38]:
def rand_range(min_value=0.001, max_value=1):
    scale = max_value - min_value
    return scale * np.random.rand() + min_value

In [39]:
def rand_range_log(min_value=0.001, max_value=1):
    min_log = np.log(min_value)
    max_log = np.log(max_value)
    return np.exp(rand_range(np.log(min_value), np.log(max_value)))

Create random hyperparameter table for optimizer learning_rate and momentum

In [54]:
# lr: learning_rate
hyperparam_ranges = dict(lr=[0.001, 1.0], momentum=[0.00001, 1.0])
hyperparam_table = []
num_attempts = 30
for i in range(num_attempts):
    hyperparam_values = dict()
    for k, v in hyperparam_ranges.items():
        hyperparam_values[k] = rand_range_log(*hyperparam_ranges[k])
    hyperparam_table.append(hyperparam_values)
pd.DataFrame(hyperparam_table)

Unnamed: 0,lr,momentum
0,0.010532,0.009981
1,0.001659,1.5e-05
2,0.002781,0.003996
3,0.082535,0.000478
4,0.006154,0.739336
5,0.212478,0.028372
6,0.052874,0.346604
7,0.00303,0.036922
8,0.001394,0.002256
9,0.001012,0.002702


In [55]:
model = LogisticRegressionNN(num_features=vecs.shape[1], num_outputs=1)
model

LogisticRegressionNN(
  (linear): Linear(in_features=3663, out_features=1, bias=True)
)

In [56]:
optimizer = torch.optim.SGD(model.parameters(), **hyperparam_table[0])
optimizer

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.010531913197492276
    momentum: 0.009981264069486132
    nesterov: False
    weight_decay: 0
)

In [57]:
# pbar = tqdm(hyperparam_table, desc='Training attempt', total=len(hyperparam_table))
num_epochs=200

t0 = time.time()
for i, hyperparam_values in enumerate(hyperparam_table):
    t1 = time.time()
    model = LogisticRegressionNN(num_features=vecs.shape[1], num_outputs=1)
    optimizer = torch.optim.SGD(model.parameters(), **hyperparam_values)
    criterion = torch.nn.BCELoss()  # BCE: Binary Cross Entropy
    X = vecs.values
    y = (df[['sex']] == 'F').values
    X_train = torch.Tensor(X[istrain])
    X_test = torch.Tensor(X[~istrain])
    y_train = torch.Tensor(y[istrain])
    y_test = torch.Tensor(y[~istrain])

    pbar_epochs = tqdm(range(num_epochs), desc='Epoch:', total=num_epochs)
    results = [None] * num_epochs
    for epoch in pbar_epochs:
        optimizer.zero_grad() # Setting our stored gradients equal to zero
        outputs = model(X_train)
        loss_train = criterion(outputs, y_train) 
        loss_train.backward() # Computes the gradient of the given tensor w.r.t. the weights/bias
        loss_train = loss_train.item()
        optimizer.step() # Updates weights and biases with the optimizer (SGD)
        # print(f'Train loss: {np.round(loss_train.detach().numpy(), 4):0.4f}')
        outputs_test = model(X_test)
        loss_test = criterion(outputs_test, y_test).item()
        accuracy_test = measure_binary_accuracy(outputs_test, y_test)
        results[epoch] = dict(loss_train=loss_train, loss_test=loss_test, accuracy_test=accuracy_test)
        # pbar_epochs.set_description(f'loss_train/test: {loss_train:.4f}/{loss_test:.4f}')
    t2 = time.time()
    results[-1]['time_per_attempt'] = t2 - t1
    results[-1]['total_time'] = t2 - t0
    hyperparam_table[i].update(results[-1])
    print(f'attempt {i:04d}/{len(hyperparam_table)}')
    for k, v in hyperparam_table[i].items():
        print(f'{k}: {v}')
#         num_learning_rate_steps = 20
#         if not i % np.round(num_epochs / num_learning_rate_steps):
#             results.append(
#                 measure_performance(model=model,
#                                     X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test,
#                                     criterion=criterion))
#             results[-1]['loss_train'] = loss_train.item()
#             results[-1]['learning_rate'] = learning_rate
#             pbar.set_description(
#                 f'LR: {np.round(learning_rate, 2)}'
#                 f', Test loss: {np.round(results[-1]["loss_test"], 2)}'
#                 f', Train loss: {np.round(results[-1]["loss_train"], 3)}'
#                 f', Test acc: {np.round(results[-1]["accuracy_test"], 2)}'
#                 f', Train acc: {np.round(results[-1]["accuracy_train"], 3)}'
#             )
#             # learning_rate *= .9

Epoch:: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:02<00:00, 95.95it/s]


attempt 0000/30
lr: 0.010531913197492276
momentum: 0.009981264069486132
loss_train: 0.6813332438468933
loss_test: 0.6781478524208069
accuracy_test: 0.6108374384236454
time_per_attempt: 2.2417185306549072
total_time: 2.2420995235443115


Epoch:: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:02<00:00, 99.43it/s]


attempt 0001/30
lr: 0.0016586595418271253
momentum: 1.5154185879708016e-05
loss_train: 0.690472424030304
loss_test: 0.6902285218238831
accuracy_test: 0.6083743842364532
time_per_attempt: 2.1919734477996826
total_time: 4.435043573379517


Epoch:: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:02<00:00, 98.70it/s]


attempt 0002/30
lr: 0.0027807000203903328
momentum: 0.003996354776463239
loss_train: 0.689594030380249
loss_test: 0.688784658908844
accuracy_test: 0.6108374384236454
time_per_attempt: 2.205629825592041
total_time: 6.640822410583496


Epoch:: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:02<00:00, 98.48it/s]


attempt 0003/30
lr: 0.08253523517912457
momentum: 0.0004775537192634205
loss_train: 0.6692911386489868
loss_test: 0.6635659337043762
accuracy_test: 0.6108374384236454
time_per_attempt: 2.211170196533203
total_time: 8.8521728515625


Epoch:: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:02<00:00, 96.72it/s]


attempt 0004/30
lr: 0.006154080936123693
momentum: 0.739336117243038
loss_train: 0.6762019395828247
loss_test: 0.6712818145751953
accuracy_test: 0.6108374384236454
time_per_attempt: 2.2566583156585693
total_time: 11.108977794647217


Epoch:: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:02<00:00, 99.31it/s]


attempt 0005/30
lr: 0.21247813473361446
momentum: 0.02837173293222428
loss_train: 0.6574523448944092
loss_test: 0.6542186141014099
accuracy_test: 0.6108374384236454
time_per_attempt: 2.1944239139556885
total_time: 13.303560733795166


Epoch:: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:02<00:00, 99.44it/s]


attempt 0006/30
lr: 0.0528735221574289
momentum: 0.346603898803374
loss_train: 0.6693680286407471
loss_test: 0.6629351377487183
accuracy_test: 0.6108374384236454
time_per_attempt: 2.187392234802246
total_time: 15.491156339645386


Epoch:: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:02<00:00, 99.06it/s]


attempt 0007/30
lr: 0.003029631036620123
momentum: 0.0369217078541256
loss_train: 0.6893351078033447
loss_test: 0.6883962750434875
accuracy_test: 0.6108374384236454
time_per_attempt: 2.173962116241455
total_time: 17.665290594100952


Epoch:: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:01<00:00, 101.18it/s]


attempt 0008/30
lr: 0.0013944808859037918
momentum: 0.0022559726288485927
loss_train: 0.6910763382911682
loss_test: 0.6905299425125122
accuracy_test: 0.6083743842364532
time_per_attempt: 2.148728847503662
total_time: 19.814265727996826


Epoch:: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:01<00:00, 101.77it/s]


attempt 0009/30
lr: 0.0010115305113284673
momentum: 0.0027020228012453623
loss_train: 0.6903941631317139
loss_test: 0.6898396015167236
accuracy_test: 0.6108374384236454
time_per_attempt: 2.1527600288391113
total_time: 21.967150926589966


Epoch:: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:02<00:00, 96.74it/s]


attempt 0010/30
lr: 0.9075887512225034
momentum: 0.000490615489925614
loss_train: 0.6109689474105835
loss_test: 0.6173533797264099
accuracy_test: 0.6773399014778325
time_per_attempt: 2.2278687953948975
total_time: 24.195502281188965


Epoch:: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:02<00:00, 99.69it/s]


attempt 0011/30
lr: 0.0010518522572146387
momentum: 0.10042009531113368
loss_train: 0.6914648413658142
loss_test: 0.6909220218658447
accuracy_test: 0.6009852216748769
time_per_attempt: 2.1953492164611816
total_time: 26.39101243019104


Epoch:: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:02<00:00, 98.11it/s]


attempt 0012/30
lr: 0.6203973133993583
momentum: 5.9618513271624134e-05
loss_train: 0.628156840801239
loss_test: 0.6311936974525452
accuracy_test: 0.625615763546798
time_per_attempt: 2.2322776317596436
total_time: 28.623478651046753


Epoch:: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:02<00:00, 97.01it/s]


attempt 0013/30
lr: 0.23162325901226105
momentum: 0.00024237676152209122
loss_train: 0.6563721299171448
loss_test: 0.6528714895248413
accuracy_test: 0.6108374384236454
time_per_attempt: 2.2301251888275146
total_time: 30.853761434555054


Epoch:: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:02<00:00, 97.34it/s]


attempt 0014/30
lr: 0.1724003662200313
momentum: 0.004947538146300958
loss_train: 0.6609938740730286
loss_test: 0.6564319133758545
accuracy_test: 0.6108374384236454
time_per_attempt: 2.240877866744995
total_time: 33.094791889190674


Epoch:: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:02<00:00, 97.85it/s]


attempt 0015/30
lr: 0.6886457428992886
momentum: 0.6068473634476456
loss_train: 0.5737476944923401
loss_test: 0.5901632308959961
accuracy_test: 0.7019704433497537
time_per_attempt: 2.2312560081481934
total_time: 35.32620882987976


Epoch:: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:01<00:00, 101.30it/s]


attempt 0016/30
lr: 0.020780234118358967
momentum: 0.0011207464631830166
loss_train: 0.6770431399345398
loss_test: 0.6725366115570068
accuracy_test: 0.6108374384236454
time_per_attempt: 2.1487839221954346
total_time: 37.4751136302948


Epoch:: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:01<00:00, 101.14it/s]


attempt 0017/30
lr: 0.1237402297294879
momentum: 0.19238456074496166
loss_train: 0.6627039909362793
loss_test: 0.6574880480766296
accuracy_test: 0.6108374384236454
time_per_attempt: 2.1456029415130615
total_time: 39.62087559700012


Epoch:: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:02<00:00, 97.83it/s]


attempt 0018/30
lr: 0.25622839123015534
momentum: 0.9982259858469285
loss_train: 0.3782990276813507
loss_test: 0.5378589630126953
accuracy_test: 0.7536945812807881
time_per_attempt: 2.243138074874878
total_time: 41.86417770385742


Epoch:: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:01<00:00, 101.73it/s]


attempt 0019/30
lr: 0.023108374975817195
momentum: 4.0553362869346e-05
loss_train: 0.6760981678962708
loss_test: 0.6713544130325317
accuracy_test: 0.6108374384236454
time_per_attempt: 2.1388845443725586
total_time: 44.00324821472168


Epoch:: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:02<00:00, 96.99it/s]


attempt 0020/30
lr: 0.004364411242604326
momentum: 0.01907172855266233
loss_train: 0.6872634291648865
loss_test: 0.6862260699272156
accuracy_test: 0.6108374384236454
time_per_attempt: 2.2277987003326416
total_time: 46.231282472610474


Epoch:: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:02<00:00, 99.82it/s]


attempt 0021/30
lr: 0.02626952586209998
momentum: 2.4839946689904976e-05
loss_train: 0.6752415895462036
loss_test: 0.6701885461807251
accuracy_test: 0.6108374384236454
time_per_attempt: 2.185215473175049
total_time: 48.416646242141724


Epoch:: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:02<00:00, 98.65it/s]


attempt 0022/30
lr: 0.04214603521086344
momentum: 0.0004304886258151614
loss_train: 0.6734163165092468
loss_test: 0.667460024356842
accuracy_test: 0.6108374384236454
time_per_attempt: 2.2056822776794434
total_time: 50.622485399246216


Epoch:: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:02<00:00, 97.65it/s]


attempt 0023/30
lr: 0.001121395983366414
momentum: 7.90868582958225e-05
loss_train: 0.69173264503479
loss_test: 0.6914060115814209
accuracy_test: 0.5886699507389163
time_per_attempt: 2.2185659408569336
total_time: 52.841230392456055


Epoch:: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:02<00:00, 99.91it/s]


attempt 0024/30
lr: 0.0020252298273964477
momentum: 0.8350747502296219
loss_train: 0.6804060339927673
loss_test: 0.6770479679107666
accuracy_test: 0.6108374384236454
time_per_attempt: 2.171377182006836
total_time: 55.012773752212524


Epoch:: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:02<00:00, 97.82it/s]


attempt 0025/30
lr: 0.008930556444337125
momentum: 0.00026023290538038075
loss_train: 0.6821268796920776
loss_test: 0.6791496276855469
accuracy_test: 0.6108374384236454
time_per_attempt: 2.2374818325042725
total_time: 57.25041651725769


Epoch:: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:02<00:00, 99.14it/s]


attempt 0026/30
lr: 0.05162496291842082
momentum: 0.000786732728092719
loss_train: 0.6721504926681519
loss_test: 0.6662967205047607
accuracy_test: 0.6108374384236454
time_per_attempt: 2.176692485809326
total_time: 59.42728590965271


Epoch:: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:01<00:00, 100.91it/s]


attempt 0027/30
lr: 0.04472570043161925
momentum: 0.0030687685477052092
loss_train: 0.6727815866470337
loss_test: 0.6669037342071533
accuracy_test: 0.6108374384236454
time_per_attempt: 2.1671082973480225
total_time: 61.59461569786072


Epoch:: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:02<00:00, 98.59it/s]


attempt 0028/30
lr: 0.20573990255932134
momentum: 0.006307015987151483
loss_train: 0.6585432291030884
loss_test: 0.6545463800430298
accuracy_test: 0.6108374384236454
time_per_attempt: 2.2068309783935547
total_time: 63.80167770385742


Epoch:: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:02<00:00, 96.88it/s]

attempt 0029/30
lr: 0.023271132996731645
momentum: 0.018224393085118442
loss_train: 0.6761126518249512
loss_test: 0.6713346838951111
accuracy_test: 0.6108374384236454
time_per_attempt: 2.2198901176452637
total_time: 66.02174043655396





In [58]:
hyperparam_table[-1]['loss_train'].item()



AttributeError: 'float' object has no attribute 'item'

In [132]:
hyperparam_df = pd.DataFrame(hyperparam_table)
hyperparam_df

Unnamed: 0,lr,momentum,loss_train
0,0.006897,0.000149,"tensor(0.6831, grad_fn=<BinaryCrossEntropyBack..."
1,0.003799,0.002150,"tensor(0.6836, grad_fn=<BinaryCrossEntropyBack..."
2,0.101760,0.005800,"tensor(0.6828, grad_fn=<BinaryCrossEntropyBack..."
3,0.580667,0.237036,"tensor(0.6832, grad_fn=<BinaryCrossEntropyBack..."
4,0.845265,0.000022,"tensor(0.6834, grad_fn=<BinaryCrossEntropyBack..."
...,...,...,...
95,0.042495,0.006755,"tensor(0.6835, grad_fn=<BinaryCrossEntropyBack..."
96,0.014527,0.123773,"tensor(0.6824, grad_fn=<BinaryCrossEntropyBack..."
97,0.008243,0.000038,"tensor(0.6825, grad_fn=<BinaryCrossEntropyBack..."
98,0.005621,0.001474,"tensor(0.6826, grad_fn=<BinaryCrossEntropyBack..."


In [131]:
def fit(model=model, X=vecs.values, y=(df[['sex']] == 'F').values, optimizer=None,
        num_epochs=30, learning_rate=.1, criterion=criterion, optimizer=optimizer):
    pbar = tqdm(range(num_epochs), desc='Epoch', total=num_epochs)
    X_train = torch.Tensor(X[istrain])
    X_test = torch.Tensor(X[~istrain])
    y_train = torch.Tensor(y[istrain])
    y_test = torch.Tensor(y[~istrain])

    results = []
    for i in pbar:
        optimizer.zero_grad() # Setting our stored gradients equal to zero
        outputs = model(X_train)
        loss_train = criterion(outputs, y_train) 
        loss_train.backward() # Computes the gradient of the given tensor w.r.t. the weights/bias
        optimizer.step() # Updates weights and biases with the optimizer (SGD)
        # print(f'Train loss: {np.round(loss_train.detach().numpy(), 4):0.4f}')
#         num_learning_rate_steps = 20
#         if not i % np.round(num_epochs / num_learning_rate_steps):
#             results.append(
#                 measure_performance(model=model,
#                                     X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test,
#                                     criterion=criterion))
#             results[-1]['loss_train'] = loss_train.item()
#             results[-1]['learning_rate'] = learning_rate
#             pbar.set_description(
#                 f'LR: {np.round(learning_rate, 2)}'
#                 f', Test loss: {np.round(results[-1]["loss_test"], 2)}'
#                 f', Train loss: {np.round(results[-1]["loss_train"], 3)}'
#                 f', Test acc: {np.round(results[-1]["accuracy_test"], 2)}'
#                 f', Train acc: {np.round(results[-1]["accuracy_train"], 3)}'
#             )
#             # learning_rate *= .9
    return results

SyntaxError: duplicate argument 'optimizer' in function definition (3048412353.py, line 1)

In [46]:
results = fit()

Epoch:  67%|██████████████████████████████████████████████████████████████████████████████████████████▋                                             | 20/30 [00:00<00:00, 102.87it/s]

Train loss: 0.6725
Train loss: 0.6725
Train loss: 0.6725
Train loss: 0.6725
Train loss: 0.6725
Train loss: 0.6725
Train loss: 0.6725
Train loss: 0.6725
Train loss: 0.6725
Train loss: 0.6725
Train loss: 0.6725
Train loss: 0.6725
Train loss: 0.6725
Train loss: 0.6725
Train loss: 0.6725
Train loss: 0.6725
Train loss: 0.6725
Train loss: 0.6725
Train loss: 0.6724
Train loss: 0.6724
Train loss: 0.6724
Train loss: 0.6724
Train loss: 0.6724
Train loss: 0.6724
Train loss: 0.6724
Train loss: 0.6724
Train loss: 0.6724


Epoch: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 106.04it/s]

Train loss: 0.6724
Train loss: 0.6724
Train loss: 0.6724





In [None]:
pd.DataFrame(results)

In [None]:
# model.score(vecs[~istrain], y[~istrain], sample_weight=df['count'][~istrain])

In [None]:
# model.classes_


In [None]:
names = ['Dewey', 'Kemal', 'Copeland', 'Vishvesh']
ourvecs = vectorizer.transform(names)
ourvecs = pd.DataFrame.sparse.from_spmatrix(ourvecs)
ourvecs.columns = vectorizer.get_feature_names_out()
ourvecs.index = list(zip(names, 'M'*len(names)))
ourvecs

In [None]:
ourtensors = 

In [None]:
names = ['Maria', 'Syndee', 'Aditi', 'Constance']
vecs = vectorizer.transform(names)
vecs = pd.DataFrame.sparse.from_spmatrix(vecs)
vecs.columns = vectorizer.get_feature_names_out()
vecs.index = list(zip(names, 'M'*len(names)))
pd.DataFrame(model.predict_proba(vecs)[:,0], index=vecs.index)

In [None]:
class LogisticRegressionNumpyNN(LogisticRegressionNN):

    def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)

    def predict_proba(self, X):
        return self.forward(make_tensor(X))
    
    def predict(self, X):
        return (np.array(self.forward(make_tesnor(X))) > 0.5).astype(int)
    
# ', '.join([v for v in dir(LogisticRegression) if v[0] != '_'])