
## Passwords Auditor with contraints


In [1]:

## !pip install zxcvbn
## !pip install password-strength
## !pip install passlib


In [2]:


import string
import torch
import torch.nn as nn
import torch.nn.functional as F
from zxcvbn import zxcvbn
from password_strength import PasswordStats
import math


In [3]:


VOCAB = list(string.ascii_letters + string.digits + "!@#$%^&*")
V     = len(VOCAB)
V


70

In [4]:

MASK_UPPER = torch.tensor([c in string.ascii_uppercase for c in VOCAB]).float()
MASK_LOWER = torch.tensor([c in string.ascii_lowercase for c in VOCAB]).float()
MASK_DIGIT = torch.tensor([c in string.digits          for c in VOCAB]).float()
MASK_SPEC  = torch.tensor([c in "!@#$%^&*"             for c in VOCAB]).float()

print(MASK_SPEC.shape)

MASK_SPEC


torch.Size([70])


tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.])

In [5]:



def score_with_password_strength(passwords):
    stats = [PasswordStats(pw) for pw in passwords]
    return {
        "average_strength": sum(s.strength() for s in stats) / len(stats),
        "weak%": sum(s.strength() < 0.3 for s in stats) / len(stats) * 100,
        "strong%": sum(s.strength() > 0.7 for s in stats) / len(stats) * 100
    }



In [6]:



def estimate_entropy(passwd):
    charset_size = 0
    if any(c.islower() for c in passwd): charset_size += 26
    if any(c.isupper() for c in passwd): charset_size += 26
    if any(c.isdigit() for c in passwd): charset_size += 10
    if any(c in "!@#$%^&*()-_=+[{]};:'\",<.>/?\\|" for c in passwd): charset_size += 32
    if charset_size == 0: charset_size = 1
    return len(passwd) * math.log2(charset_size)

def average_entropy(passwords):
    return sum(estimate_entropy(p) for p in passwords) / len(passwords)



In [7]:



def estimate_entropy(password):
    charset_size = 0
    if any(c.islower() for c in password): charset_size += 26
    if any(c.isupper() for c in password): charset_size += 26
    if any(c.isdigit() for c in password): charset_size += 10
    if any(c in "!@#$%^&*()-_=+[]{};:'\",.<>/?\\|" for c in password): charset_size += 32
    return len(password) * math.log2(charset_size or 1)





In [8]:

def entropy_stats(passwords):
    entropies = [estimate_entropy(pw) for pw in passwords]
    return {
        "avg_entropy_bits": sum(entropies) / len(entropies),
        "min": min(entropies),
        "max": max(entropies)
    }


In [9]:

class TinyDecoder(nn.Module):
    
    def __init__(self, z_dim=128, T=12, V=V):
        super().__init__()
        self.T = T
        self.V = V
        self.linear = nn.Linear(z_dim, T * V)

    def forward(self, z):
        logits = self.linear(z)                        # [B, T*V]
        return logits.view(z.size(0), self.T, self.V)  # [B, T, V]



In [10]:

def st_gumbel_softmax(logits, tau):
    
    eps = 1e-9
    
    g = -torch.log(-torch.log(torch.rand_like(logits) + eps) + eps)
    
    y_soft = F.softmax((logits + g) / tau, dim=-1)          # [B, T, V]
    
    y_hard = F.one_hot(
                 y_soft.argmax(-1), 
                 y_soft.size(-1)
    ).float()
    
    return y_hard + (y_soft - y_soft.detach())              # hard forward, soft grad


In [11]:


def class_present(p_pos):
    
    # p_pos: [B, T] probability per position of being in the class
    
    return 1.0 - torch.prod(1.0 - p_pos + 1e-6, dim=1)      # [B]



In [12]:

def entropy_bits_per_string(y_probs):
    # y_probs: [B, T, V], probs per token
    p = y_probs.clamp_min(1e-9)
    H_t = -(p * p.log()).sum(dim=-1)                         # nats, [B, T]
    H = H_t.mean(dim=1)                                      # [B]
    return H / torch.log(torch.tensor(2.0))                  # bits


In [13]:

# --- Decode one-hot/probabilities to strings ---

def decode(y_probs):
    idx = y_probs.argmax(dim=-1).cpu()                       # [B, T]
    out = []
    for row in idx.tolist():
        out.append("".join(VOCAB[i] for i in row))
    return out


In [14]:

def generate_passwords(WpU, WpL, WpD, WpS, batch=16, steps=300, T=12, H_min_bits=6.0, seed=0, use_gumbel=True):
    torch.manual_seed(seed)

    model = TinyDecoder(T=T)

    for p in model.parameters():
        p.requires_grad = False  # decoder is fixed

    weak = [
        "password", "qwerty", "letmein", "admin",
        "welcome", "123456", "iloveyou", "guest",
        "hello123", "abc123", "monkey", "test",
        "summer", "dragon", "football", "name"
    ][:batch]

    weak = [pw[:T].ljust(T, "a") for pw in weak]

    z_init = torch.full((batch, T, V), -6.0)
    for b, pw in enumerate(weak):
        for t, c in enumerate(pw):
            if c in VOCAB:
                z_init[b, t, VOCAB.index(c)] = 6.0

    z = z_init.clone().detach().requires_grad_(True)

    opt = torch.optim.Adam([z], lr=0.05)

    for step in range(steps):
        tau = max(0.8 - 0.003 * step, 0.2)

        if use_gumbel and step < 500:
            gumbel_noise = -torch.empty_like(z).exponential_().log()
            logits = (z + gumbel_noise) / tau
            y_probs = F.softmax(logits, dim=-1)
        else:
            y_probs = F.softmax(z / tau, dim=-1)

        pU = (y_probs * MASK_UPPER).sum(dim=-1)
        pL = (y_probs * MASK_LOWER).sum(dim=-1)
        pD = (y_probs * MASK_DIGIT).sum(dim=-1)
        pS = (y_probs * MASK_SPEC).sum(dim=-1)

        class_loss = (
            WpU*F.relu(1.0 - class_present(pU)).mean() +
            WpL*F.relu(1.0 - class_present(pL)).mean() +
            WpD*F.relu(1.0 - class_present(pD)).mean() +
            WpS*F.relu(1.0 - class_present(pS)).mean()
        )

        H_bits = entropy_bits_per_string(y_probs)
        entropy_loss = F.relu(H_min_bits - H_bits).mean()

        loss = class_loss + entropy_loss*0.0

        opt.zero_grad()
        loss.backward()
        opt.step()

    return decode(y_probs.detach())



In [15]:


def evaluate_passwords(password_list):
    """
    Takes a list of passwords and returns:
    - average score (0 to 4)
    - score distribution
    - crack time estimates
    """
    scores = []
    crack_times = []

    for pw in password_list:
        result = zxcvbn(pw)
        scores.append(result['score'])
        crack_times.append(result['crack_times_seconds']['offline_fast_hashing_1e10_per_second'])

    avg_score = sum(scores) / len(scores)
    score_distribution = {
        score: scores.count(score) for score in range(5)
    }

    return {
        'average_score': avg_score,
        'score_distribution': score_distribution,
        'avg_crack_time_secs': sum(crack_times) / len(crack_times)
    }


In [16]:

def eval_func_metrics(constraintsGeneratedPasswords):
    i = 0
    for pass_generated in constraintsGeneratedPasswords[12:32]:
        print(pass_generated)
    
    result = evaluate_passwords(constraintsGeneratedPasswords)
    
    print("Average score:", result['average_score'])
    print("Score distribution:", result['score_distribution'])
    print("Average crack time (seconds):", result['avg_crack_time_secs'])
    
    pw = constraintsGeneratedPasswords

    print(score_with_password_strength(pw))
    print(average_entropy(pw))
    print(entropy_stats(pw))
    



## Experiments


In [17]:

num_passwords = 2048



## All constraints


In [18]:

constraintsGeneratedPasswords = generate_passwords(
                                     WpU=1.0,
                                     WpL=1.0,
                                     WpD=1.0,
                                     WpS=1.0,
                                     batch=num_passwords, 
                                     steps=2000, 
                                     T=12, 
                                     H_min_bits=3.0
)



eval_func_metrics(constraintsGeneratedPasswords)


sN7meraaaa@a
dGTg2n^aaaa&
foo$ba5^B1aa
nUmea1^a5aTD
6#l830*Pq^8J
*E0&3!J&$6nd
4&G0#s7az#uU
H9l#f#O7n399
z@FA6!#^gh01
qFp&1^4@$AQ9
!tYsAH4u$971
Y@09Rv%$^u8n
2J&wZC29*C^F
12!J*%&gbl9L
%q8$0p@t*G$W
#FcKX^99AsK9
S06vW70LO!&a
M5^^%rel%qp9
^Q03hG^&fKK7
0&k!1$WD!p5B
Average score: 3.998046875
Score distribution: {0: 0, 1: 0, 2: 0, 3: 4, 4: 2044}
Average crack time (seconds): 97.148903138766943359375
{'average_strength': 0.5144425006959712, 'weak%': 0.0, 'strong%': 0.0}
78.65506622013055
{'avg_entropy_bits': 78.65506622013055, 'min': 78.65506622013166, 'max': 78.65506622013166}



## No Special Chars


In [19]:

constraintsGeneratedPasswords = generate_passwords(
                                     WpU=1.0,
                                     WpL=1.0,
                                     WpD=1.0,
                                     WpS=0.0,
                                     batch=num_passwords, 
                                     steps=2000, 
                                     T=12, 
                                     H_min_bits=3.0
)



eval_func_metrics(constraintsGeneratedPasswords)


sW6meraaaaFa
d8ag2nFaaaaN
fooVba1Wa1aa
nUmeaa7aaaaF
iJw034Xw225S
6E3865L5l3d8
I0N4vcMQ7Uu8
F1hKSwu02523
FJvt93k1PKS2
4W4X7tu78HxK
6243DK66cnIz
3aU5RiH22d8Y
xLhTv1k9Iw3N
Op9K7RIzKb94
xq583qGjFa8d
XT3s10M9bp00
q8aTWzpy1260
JMeO5XbC9q52
PQmvC169fL0m
6Oqs1TOlLI5h
Average score: 3.99609375
Score distribution: {0: 0, 1: 0, 2: 2, 3: 4, 4: 2042}
Average crack time (seconds): 92.44699810311508789510141924
{'average_strength': 0.5100117175939882, 'weak%': 0.0, 'strong%': 0.0}
71.45035572463856
{'avg_entropy_bits': 71.45035572463856, 'min': 71.45035572464249, 'max': 71.45035572464249}



## No Digits


In [20]:

constraintsGeneratedPasswords = generate_passwords(
                                     WpU=1.0,
                                     WpL=1.0,
                                     WpD=0.0,
                                     WpS=1.0,
                                     batch=num_passwords, 
                                     steps=2000, 
                                     T=12, 
                                     H_min_bits=3.0
)



eval_func_metrics(constraintsGeneratedPasswords)


sRmmeraaaa&a
d%$gDnFaaaaa
foo$baPKaaaa
n&meaN@aaaaY
k%mw!w*Zv!a$
G%#p!Kr!$iA$
X&jXPz&%F%u@
$SJ*#%@lnw&^
@Qq^$s#*$KbB
qYeHy^k@&AkR
P@!MJglV$k*a
Q%#O!f#j!YIM
q$HySY!f^H$#
NRfJ%*Iymc@P
%q*v@%$l&O&W
bR*#@m^^#s@&
r@&oVfQ$P*G#
MG%^%!$@%upQ
e@*@gl@&@rRV
^^m!O!@^wU&B
Average score: 3.99560546875
Score distribution: {0: 0, 1: 0, 2: 1, 3: 7, 4: 2040}
Average crack time (seconds): 96.536714554293603515625
{'average_strength': 0.5018354511386897, 'weak%': 0.0, 'strong%': 0.0}
76.710661500807
{'avg_entropy_bits': 76.710661500807, 'min': 76.70780907334513, 'max': 78.65506622013166}



## No lower case letters


In [21]:

constraintsGeneratedPasswords = generate_passwords(
                                     WpU=1.0,
                                     WpL=0.0,
                                     WpD=1.0,
                                     WpS=1.0,
                                     batch=num_passwords, 
                                     steps=2000, 
                                     T=12, 
                                     H_min_bits=3.0
)



eval_func_metrics(constraintsGeneratedPasswords)



sN7meraaaa@a
dGTg2n^aaaa&
foo$ba5^B1aa
nUmea1^a5aTD
461$3T*@@^BR
#$0@3#L!01$A
6&GKP&!H!76%
&5X*&$O1!J^X
V^!@*FI^7&@1
9W1E7%*##4D1
S!$650Z*T@%V
##N9!!82#O7Y
6**6JM9$Q*^%
15&!@%G%84JT
$27$1F%E!O&D
!#UEP&!8^8!0
$5!!VBW6D0V#
@8^*%O7Q1^8O
L446O6^&8X0Q
7T!!D!&^^940
Average score: 3.998046875
Score distribution: {0: 0, 1: 0, 2: 0, 3: 4, 4: 2044}
Average crack time (seconds): 95.698352951753662109375
{'average_strength': 0.49887797670101397, 'weak%': 0.0, 'strong%': 0.0}
73.0933471584819
{'avg_entropy_bits': 73.0933471584819, 'min': 73.04955409500407, 'max': 78.65506622013166}



## No upper case letters


In [22]:

constraintsGeneratedPasswords = generate_passwords(
                                     WpU=0.0,
                                     WpL=1.0,
                                     WpD=1.0,
                                     WpS=1.0,
                                     batch=num_passwords, 
                                     steps=2000, 
                                     T=12, 
                                     H_min_bits=3.0
)



eval_func_metrics(constraintsGeneratedPasswords)



sum@eraaaa1a
9rag3n#aaaaa
fo@t@a1laaaa
namea7$a9a1a
3zn27#!$q!2%
*f0&6!*!36js
5&60h@3f8mr%
$5p*o#^5^e84
i@a&r4!^1h09
^r!97#47$!*@
!@!s%9&7$9hn
q@ub%$r2^^8*
1w*k&0l$#333
2%7p@%c&2!&1
%t%51!#tk0@*
%n3#1f$2$s!3
$1!vl7!*4b3f
@rh^%5^@%6g2
^006hi^@fr23
68m11^&!^@92
Average score: 3.99365234375
Score distribution: {0: 0, 1: 1, 2: 1, 3: 8, 4: 2038}
Average crack time (seconds): 93.85370313604392903349773857
{'average_strength': 0.4987833083522382, 'weak%': 0.0, 'strong%': 0.0}
73.04417789672422
{'avg_entropy_bits': 73.04417789672422, 'min': 62.039100017307746, 'max': 73.04955409500407}



## No special characters and No digits


In [23]:

constraintsGeneratedPasswords = generate_passwords(
                                     WpU=1.0,
                                     WpL=1.0,
                                     WpD=0.0,
                                     WpS=0.0,
                                     batch=num_passwords, 
                                     steps=2000, 
                                     T=12, 
                                     H_min_bits=3.0
)



eval_func_metrics(constraintsGeneratedPasswords)


sBmmeraaaaaa
dOagonaaaaaa
fooEballaaaa
nUmeaaaaaaaa
xYRgCMXsqqby
fOnxvdLSJsJA
QDnCRslDeWoU
vDmkbwuLHDkJ
FQntcrImWKkj
WHurWafWBkrT
WTBkKhaUAskV
QOylRcyXcBfY
alNzqCgRxSCV
OwqlFDbzdqEA
tEasOyyEAjNH
NbyFfTGLKejK
OoBBhecPDrdm
DGGBLkbJPddk
RxblFYlGfXBa
QbqbWnWPdpdl
Average score: 3.990234375
Score distribution: {0: 0, 1: 0, 2: 4, 3: 12, 4: 2032}
Average crack time (seconds): 94.907177591399365234375
{'average_strength': 0.5161081391720453, 'weak%': 0.0, 'strong%': 0.0}
68.40973718278993
{'avg_entropy_bits': 68.40973718278993, 'min': 68.4052766176931, 'max': 71.45035572464249}



## No special characters, and No digits, and No lower case letters


In [24]:


constraintsGeneratedPasswords = generate_passwords(
                                     WpU=1.0,
                                     WpL=0.0,
                                     WpD=0.0,
                                     WpS=0.0,
                                     batch=num_passwords, 
                                     steps=2000, 
                                     T=12, 
                                     H_min_bits=3.0
)



eval_func_metrics(constraintsGeneratedPasswords)


sBmmeraaaaaa
dOagonaaaaaa
fooEballaaaa
nUmeaaaaaaaa
CDQDGROVSHAZ
GSAMTPFFRRJC
KSGKRSJWQEKV
ILSMDVKXIUAT
INVAQSWPYTBN
OJFBIESPKGCP
BZDIPKNUXPAV
TBBCCZNMZHFO
SRXMSDQYAZMK
WEESCEVNFYUA
OQGMIIWQCBKW
XNOFNPKRTLYO
OGCQTCOATPAG
JGJRDGOZOGPX
ABGGFWWAVKTM
QUJILMOWVYWA
Average score: 3.9912109375
Score distribution: {0: 0, 1: 0, 2: 4, 3: 10, 4: 2034}
Average crack time (seconds): 88.77400157576362305131589209
{'average_strength': 0.4876855668168611, 'weak%': 0.0, 'strong%': 0.0}
56.50348718278915
{'avg_entropy_bits': 56.50348718278915, 'min': 56.405276617693104, 'max': 71.45035572464249}
