In [2]:
import pandas as pd
import numpy as np
import math as m
from collections import Counter

## Generate Dataframe of Keyboard Characters

In [3]:
characters = ""
separated_characters = ""
for i in range(33,123):
    characters += chr(i)
for char in "`.,?-_[]{}<>()|'\"\\/":
    characters = characters.replace(char, '')
for char in characters:
    char += ' '
    separated_characters += char
character_list = separated_characters.split(' ')
character_list.remove('')
print(character_list)

['!', '#', '$', '%', '&', '*', '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '=', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '^', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


In [4]:
def give_type(character):
    if character.isnumeric():
        return 'Number'
    elif character.islower():
        return 'Lowercase'
    elif character.isupper():
        return 'Uppercase'
    else:
        return 'Special'

In [5]:
character_type_df = pd.DataFrame(character_list, columns=['Character'])
character_type_df['Character Type'] = character_type_df['Character'].apply(give_type)

In [6]:
character_type_df

Unnamed: 0,Character,Character Type
0,!,Special
1,#,Special
2,$,Special
3,%,Special
4,&,Special
...,...,...
69,v,Lowercase
70,w,Lowercase
71,x,Lowercase
72,y,Lowercase


In [7]:
character_type_df['Character Type'].value_counts()

Character Type
Lowercase    26
Uppercase    26
Special      12
Number       10
Name: count, dtype: int64

## Generate Matrix with the Layout of Characters on a Keyboard

In [8]:
keyboard_matrix = np.array([[['1','!'], ['2','@'], ['3','#'], ['4','$'], ['5','%'], ['6','^'], ['7','&'], ['8','*'], ['9','?'], ['0','?'], ['?','?'], ['=','+']],
                 [['q','Q'], ['w','W'], ['e','E'], ['r','R'], ['t','T'], ['y','Y'], ['u','U'], ['i','I'], ['o','O'], ['p','P'], ['?','?'], ['?','?']],
                 [['a','A'], ['s','S'], ['d','D'], ['f','F'], ['g','G'], ['h','H'], ['j','J'], ['k','K'], ['l','L'], [';',':'], ['?','?'], ['?','?']],
                 [['z','Z'], ['x','X'], ['c','C'], ['v','V'], ['b','B'], ['n','N'], ['m','M'], ['?','?'], ['?','?'], ['?','?'], ['?','?'], ['?','?']]])

In [9]:
keyboard_matrix[0]

array([['1', '!'],
       ['2', '@'],
       ['3', '#'],
       ['4', '$'],
       ['5', '%'],
       ['6', '^'],
       ['7', '&'],
       ['8', '*'],
       ['9', '?'],
       ['0', '?'],
       ['?', '?'],
       ['=', '+']], dtype='<U1')

In [10]:
keyboard_matrix.shape

(4, 12, 2)

In [11]:
keyboard_matrix[np.where(keyboard_matrix == 'f')][0][0]

'f'

In [12]:
np.where(keyboard_matrix == 'f')[0][0].item()

2

In [13]:
np.where(keyboard_matrix == 'f')[0][0].item()+np.where(keyboard_matrix == 'f')[1][0].item()

5

In [14]:
96*95

9120

## Generate a Pandas Dataframe to Display the Manhattan Distance between Every Possible Pair of Characters

In [15]:
def make_matrix(list, array):
    start_chars = []
    end_chars = []
    distances = []
    for char in list:
        for compchar in list:
            if char == compchar:
                continue
            start_chars.append(char)
            end_chars.append(compchar)
            distances.append(abs(np.where(array == char)[0][0].item() - np.where(array == compchar)[0][0].item()) + abs(np.where(array == char)[1][0].item() - np.where(array == compchar)[1][0].item()))
    return pd.DataFrame({'Start Character': start_chars, 'End Character': end_chars, 'Distance': distances})

In [16]:
character_distance_df = make_matrix(character_list, keyboard_matrix)

In [17]:
character_distance_df

Unnamed: 0,Start Character,End Character,Distance
0,!,#,2
1,!,$,3
2,!,%,4
3,!,&,6
4,!,*,7
...,...,...,...
5397,z,u,8
5398,z,v,3
5399,z,w,3
5400,z,x,1


In [18]:
character_distance_df[character_distance_df['Distance'] == 0]

Unnamed: 0,Start Character,End Character,Distance
7,!,1,0
82,#,3,0
156,$,4,0
230,%,5,0
305,&,7,0
...,...,...,...
5079,v,V,0
5153,w,W,0
5227,x,X,0
5301,y,Y,0


In [19]:
character_distance_df[(character_distance_df['Start Character'] == '+')&(character_distance_df['Distance'] == 3)]

Unnamed: 0,Start Character,End Character,Distance
453,+,9,3
473,+,P,3
500,+,p,3


In [20]:
character_distance_df[character_distance_df['End Character'] == '?']

Unnamed: 0,Start Character,End Character,Distance


In [21]:
character_distance_df[character_distance_df['Distance'] == 14]

Unnamed: 0,Start Character,End Character,Distance
483,+,Z,14
510,+,z,14
1432,=,Z,14
1459,=,z,14
3364,Z,+,14
3377,Z,=,14
5335,z,+,14
5348,z,=,14


In [49]:
character_info_df = character_distance_df.merge(character_type_df, left_on='End Character', right_on='Character', how='left')

[OLD APPROACH]  
Approach for selecting first character will be a completely random choice and the next character after this seed will be narrowed down in 2 steps. The first step is narrowing down the options to a specific character type with weighted probabilities based on composition of previously selected password characters. The second step is narrowing down the options to a specific distance from the most recently selected character.

[Current Approach]  
Approach for selecting characters will happen in a sequence where the 4 character types will have an equal chance of being selected and after a character type has been selected it will have its probability of being selected reduced and the percentage it was reduced by will be split evenly between the other 3 character types and this process will continue until the desired number of characters has been selected for the generated sequence of character types

In [22]:
test = [m.floor(i/25)+1 for i in range(100)]

In [23]:
test

[1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4]

In [24]:
Counter(test)

Counter({1: 25, 2: 25, 3: 25, 4: 25})

In [25]:
np.random.shuffle(test)

In [26]:
test

[3,
 1,
 3,
 2,
 2,
 2,
 2,
 1,
 4,
 3,
 1,
 4,
 4,
 1,
 2,
 3,
 3,
 4,
 2,
 2,
 1,
 3,
 3,
 3,
 1,
 2,
 1,
 4,
 4,
 2,
 1,
 2,
 4,
 3,
 1,
 4,
 3,
 3,
 2,
 2,
 4,
 2,
 1,
 3,
 2,
 2,
 3,
 4,
 3,
 3,
 1,
 2,
 4,
 2,
 1,
 1,
 1,
 1,
 4,
 2,
 2,
 1,
 1,
 3,
 2,
 2,
 4,
 1,
 1,
 4,
 2,
 3,
 3,
 3,
 4,
 4,
 1,
 4,
 3,
 3,
 3,
 1,
 4,
 2,
 4,
 4,
 4,
 3,
 2,
 3,
 4,
 4,
 1,
 4,
 1,
 3,
 4,
 1,
 2,
 1]

In [27]:
rng = np.random.randint(1, 5)

In [28]:
list(Counter(test).keys())

[3, 1, 2, 4]

In [29]:
Counter(test)[4]

25

## Generating the Character Type Sequence that the Code Will Follow to Make Random Passowrds

In [None]:
def generate_character_type_sequence(character_length):
    sequence = []
    start_weights = [0.3, 0.3, 0.2, 0.2]
    weight_deltas = [i/2 for i in start_weights]
    character_types = ["Uppercase", "Lowercase", "Number", "Special"]

    char = 0
    while char < character_length:
        while char >= character_length - 2:
            while len(set(sequence)) < 4:
                selected_type = np.random.choice(list({"Uppercase", "Lowercase", "Number", "Special"} - set(sequence))).item()
                sequence.append(selected_type)
                char += 1
            break

        selected_type = np.random.choice(character_types, p=start_weights).item()
        sequence.append(selected_type)

        for type in character_types:
            if type == selected_type:
                if start_weights[character_types.index(type)] - weight_deltas[character_types.index(selected_type)] < 0:
                    start_weights[character_types.index(type)] -= start_weights[character_types.index(type)] 
                else:
                    start_weights[character_types.index(type)] -= weight_deltas[character_types.index(selected_type)]
            else:
                if start_weights[character_types.index(selected_type)] - weight_deltas[character_types.index(selected_type)] < 0:
                    start_weights[character_types.index(type)] += start_weights[character_types.index(selected_type)]/3
                else:
                    start_weights[character_types.index(type)] += weight_deltas[character_types.index(selected_type)]/3
        char += 1
        start_weights = start_weights / np.sum(start_weights, dtype=np.float64)
        weight_deltas = [i/2 for i in start_weights]
    return sequence

In [31]:
print(generate_character_type_sequence(10))

['Lowercase', 'Special', 'Number', 'Special', 'Lowercase', 'Uppercase', 'Special', 'Number', 'Uppercase', 'Special']


In [200]:
def deprecated_generate_character_type_sequence(character_length):
    prob_distribution = [m.floor(i/25)+1 for i in range(100)]
    num_sequence = []
    type_dict = {1: 'Number', 2: 'Lowercase', 3: 'Uppercase', 4: 'Special'}
    
    char = 0
    while char < character_length:
        
        while char >= character_length-2:
            while len(set(num_sequence)) < 4:
                rng = np.random.choice(list({1,2,3,4}-set(num_sequence))).item()
                # while rng not in num_sequence:
                #     rng = prob_distribution[np.random.randint(0, 100)] 
                num_sequence.append(rng)
                char += 1
            break

        if char >= character_length:    
            break
        
        rng = prob_distribution[np.random.randint(0, 100)]
        count = Counter(prob_distribution)
        
        for i in list(count.keys()):
            if i == rng:
                if count[i] >= 9:
                    for j in range(9):
                        prob_distribution.remove(i)
                else:
                    for j in range(count[i]):
                        prob_distribution.remove(i)
            else:
                for j in range(3):
                    prob_distribution.append(i)
        
        while len(prob_distribution) > 100:
            rng_excess = np.random.randint(1, 5)
            if rng_excess != rng:
                prob_distribution.remove(rng_excess)
        np.random.shuffle(prob_distribution)
        num_sequence.append(rng)
        char += 1
    
    sequence = [type_dict[num] for num in num_sequence]
    return num_sequence

In [32]:
Counter(test)

Counter({3: 25, 1: 25, 2: 25, 4: 25})

In [106]:
len(set(test))

4

In [33]:
print(generate_character_type_sequence(16))

['Lowercase', 'Special', 'Special', 'Uppercase', 'Number', 'Number', 'Special', 'Uppercase', 'Number', 'Lowercase', 'Lowercase', 'Special', 'Lowercase', 'Special', 'Uppercase', 'Number']


In [34]:
print(sorted(generate_character_type_sequence(12)))

['Lowercase', 'Lowercase', 'Lowercase', 'Number', 'Number', 'Number', 'Special', 'Special', 'Special', 'Special', 'Uppercase', 'Uppercase']


In [76]:
print(rng)
Counter(test)

3


Counter({1: 40, 4: 28, 2: 16, 3: 16})

In [46]:
def make_random_password(sequence, character_length):
    password = ""
    current_chartype = sequence[0]
    current_character = np.random.choice(character_type_df[character_type_df['Character Type'] == current_chartype]['Character'])
    password += current_character
    for i in range(1, character_length):
        filtered_df = character_info_df[(character_info_df['Start Character'] == current_character) & (character_info_df['Character Type'] == sequence[i])]
        current_character = np.random.choice(filtered_df['End Character'])
        password += current_character
    return password

In [42]:
character_type_df

Unnamed: 0,Character,Character Type
0,!,Special
1,#,Special
2,$,Special
3,%,Special
4,&,Special
...,...,...
69,v,Lowercase
70,w,Lowercase
71,x,Lowercase
72,y,Lowercase


In [43]:
character_distance_df

Unnamed: 0,Start Character,End Character,Distance
0,!,#,2
1,!,$,3
2,!,%,4
3,!,&,6
4,!,*,7
...,...,...,...
5397,z,u,8
5398,z,v,3
5399,z,w,3
5400,z,x,1


In [50]:
character_info_df

Unnamed: 0,Start Character,End Character,Distance,Character,Character Type
0,!,#,2,#,Special
1,!,$,3,$,Special
2,!,%,4,%,Special
3,!,&,6,&,Special
4,!,*,7,*,Special
...,...,...,...,...,...
5397,z,u,8,u,Lowercase
5398,z,v,3,v,Lowercase
5399,z,w,3,w,Lowercase
5400,z,x,1,x,Lowercase


In [55]:
make_random_password(generate_character_type_sequence(8), 8)

'cD6nR#;p'

In [64]:
make_random_password(generate_character_type_sequence(9), 9)

'A3kl&E^3H'

In [65]:
character_info_df['Character'].value_counts()

Character
#    73
$    73
%    73
&    73
*    73
     ..
w    73
x    73
y    73
z    73
!    73
Name: count, Length: 74, dtype: int64

In [66]:
check = character_info_df.copy().sort_values(by=['Start Character', 'Distance'])

In [67]:
check

Unnamed: 0,Start Character,End Character,Distance,Character,Character Type
7,!,1,0,1,Number
8,!,2,1,2,Number
19,!,@,1,@,Special
36,!,Q,1,Q,Uppercase
63,!,q,1,q,Lowercase
...,...,...,...,...,...
5365,z,P,11,P,Uppercase
5392,z,p,11,p,Lowercase
5336,z,0,12,0,Number
5335,z,+,14,+,Special


In [68]:
check['Distance'].value_counts()

Distance
3     840
4     808
2     736
5     692
6     568
1     462
7     440
8     328
9     220
10    124
0      72
11     60
12     28
13     16
14      8
Name: count, dtype: int64

In [71]:
check['Distance'].value_counts().keys()[0].item()

3

Equation for probability density function for skew normal distribution:
2/sqrt(2pi)*e^(-(x-E)^2/2w^2)/w

In [None]:
def check_pdf(x, location, scale):
    return 2/m.sqrt(2*m.pi)*m.e**(-((x-location)**2)/(2*scale**2))/scale

In [72]:
def pdf(x, location, scale):
    return 0.8*m.exp(-((x-location)**2)/(2*scale**2))/scale

In [103]:
for i in range(1, 15):
    print(f"x={i}, pdf={pdf(i, 1, 5)}")

x=1, pdf=0.16
x=2, pdf=0.15683178772908085
x=3, pdf=0.14769861542186175
x=4, pdf=0.1336432338258035
x=5, pdf=0.11618384593179057
x=6, pdf=0.09704490555402136
x=7, pdf=0.07788036095359548
x=8, pdf=0.060049775816223935
x=9, pdf=0.04448596807251106
x=10, pdf=0.03166379185337835
x=11, pdf=0.021653645317858034
x=12, pdf=0.014227458793501815
x=13, pdf=0.008981562053461397
x=14, pdf=0.005447592757535895


In [74]:
def npdf(x):
    return 0.4*m.exp(-(x**2)/2)

In [76]:
for i in range(-15, 15):
    print(f"x={i}, npdf={npdf(i)}")

x=-15, npdf=5.545373174564683e-50
x=-14, npdf=1.0995140031640859e-43
x=-13, npdf=8.020035127846617e-38
x=-12, npdf=2.1520744640084554e-32
x=-11, npdf=2.1244368998716383e-27
x=-10, npdf=7.714999391855672e-23
x=-9, npdf=1.0307028436619925e-18
x=-8, npdf=5.065666219637671e-15
x=-7, npdf=9.158939382582212e-12
x=-6, npdf=6.091991897885052e-09
x=-5, npdf=1.4906612688314684e-06
x=-4, npdf=0.00013418505116100476
x=-3, npdf=0.004443598615296923
x=-2, npdf=0.054134113294645084
x=-1, npdf=0.2426122638850534
x=0, npdf=0.4
x=1, npdf=0.2426122638850534
x=2, npdf=0.054134113294645084
x=3, npdf=0.004443598615296923
x=4, npdf=0.00013418505116100476
x=5, npdf=1.4906612688314684e-06
x=6, npdf=6.091991897885052e-09
x=7, npdf=9.158939382582212e-12
x=8, npdf=5.065666219637671e-15
x=9, npdf=1.0307028436619925e-18
x=10, npdf=7.714999391855672e-23
x=11, npdf=2.1244368998716383e-27
x=12, npdf=2.1520744640084554e-32
x=13, npdf=8.020035127846617e-38
x=14, npdf=1.0995140031640859e-43


In [84]:
val = 0
for i in range(-4, 4):
    val += npdf(i)
print(val)

1.0025141366411519


In [104]:
val = 0
for i in range(1, 15):
    val += pdf(i, 1, 5)
    # print(val)
print(val)

1.075792544080624


In [105]:
skew_prob_check = [pdf(i, 1, 5) for i in range(1, 15)]

In [106]:
skew_prob_check

[0.16,
 0.15683178772908085,
 0.14769861542186175,
 0.1336432338258035,
 0.11618384593179057,
 0.09704490555402136,
 0.07788036095359548,
 0.060049775816223935,
 0.04448596807251106,
 0.03166379185337835,
 0.021653645317858034,
 0.014227458793501815,
 0.008981562053461397,
 0.005447592757535895]

In [107]:
norm_skew_prob_check = skew_prob_check/np.sum(skew_prob_check)

In [108]:
norm_skew_prob_check

array([0.14872756, 0.14578256, 0.13729284, 0.1242277 , 0.10799837,
       0.09020783, 0.07239348, 0.0558191 , 0.04135181, 0.02943299,
       0.02012809, 0.0132251 , 0.00834879, 0.00506379])

In [109]:
np.sum(norm_skew_prob_check)

np.float64(1.0)

In [None]:
scale = 1
location = 2
pdf = 