In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.autograd as autograd
import numpy as np
import pandas as pd
import datetime
import pickle

from collections import OrderedDict

from models import Generator, predict_many, predict_one
from data import load_data_from_pickle, translate, load_dataset, dump_txt_to_pickle

device = "cuda" if torch.cuda.is_available() else "cpu"
#print(torch.cuda.get_device_name(), "|", torch.cuda.is_available())

# Data Loading

In [3]:
# path = "Data/rockyou_processed.txt"
# dataset_name = "rockyou"

# t = datetime.datetime.now()
# filtered_lines, charmap, inv_charmap = load_dataset(path)
# dump_txt_to_pickle(path, dataset_name, test_size=0.1)
# print(datetime.datetime.now() - t)

In [9]:
dataset_name = "rockyou"
t = datetime.datetime.now()
test_data, charmap, inv_charmap = load_data_from_pickle(dataset_name, train_data=False, test_data=True)
print(datetime.datetime.now() - t)

0:00:01.108659


In [10]:
len(test_data), len(charmap)

(1191019, 627)

Comparison

In [11]:
test_data = pd.Series(translate(test_data, inv_charmap)).drop_duplicates()

In [12]:
test_data #pandas object

0          sg21840|||
1          3828900328
2          zouchan|||
3          1596611|||
4          20pasht10|
              ...    
1191014    0259ibpj||
1191015    9825362|||
1191016    malw84||||
1191017    880902||||
1191018    4793773|||
Length: 1191013, dtype: object

# Predictions

In [37]:
netG = Generator(charmap).to(device)
netG.load_state_dict(torch.load("Checkpoints/netG900005:35:43AM_12-03-20", map_location=torch.device(device)))

<All keys matched successfully>

In [38]:
t = datetime.datetime.now()
preds = predict_one(netG, inv_charmap, 100000) #56 seconds 100k on cpu
print(datetime.datetime.now() - t)

0:00:51.255951


In [39]:
t = datetime.datetime.now()
with open("Predictions/random_test.txt", 'w') as f:
    f.write("Passwords\n")
    for i in range(10000):
        for pred in preds:
            f.write(pred + "\n")
print(datetime.datetime.now() - t)

0:03:36.532074


In [40]:
!wc -l Predictions/random_test.txt
!ls -lh Predictions

 1000000001 Predictions/random_test.txt
total 22118408
-rw-r--r--  1 nvijayakumar  staff    11G Dec  5 14:33 random_test.txt


In [27]:
preds = pd.read_table("Predictions/random_test.txt", nrows=100001)['Passwords']

In [31]:
preds.drop_duplicates().isin(test_data).mean()

0.002225132283581931

In [36]:
1e8 / 100000001

0.9999999900000001

# Dask Test

In [6]:
import dask.dataframe as dd

In [13]:
predictions = dd.read_table("Predictions/random_test.txt")
predictions

Unnamed: 0_level_0,Passwords
npartitions=18,Unnamed: 1_level_1
,object
,...
...,...
,...
,...


In [14]:
unique_predictions = predictions['Passwords'].drop_duplicates()
preds_mask = unique_predictions.isin(test_data)

In [15]:
matched_preds = unique_predictions[preds_mask]

In [17]:
matched_preds

Dask Series Structure:
npartitions=1
    object
       ...
Name: Passwords, dtype: object
Dask Name: index, 61 tasks

In [18]:
preds_mask.mean().compute()

0.002225132283581931

In [100]:
unique_predictions

Dask Series Structure:
npartitions=1
    object
       ...
Name: Passwords, dtype: object
Dask Name: drop-duplicates-agg, 58 tasks

In [42]:
comp = test_data.isin(pd.Series(preds))
comp.mean()

0.0001754808721651233

In [37]:
test_data[comp]

274        11899|||||
3073       645416||||
3737       659305||||
4259       657705||||
6722       883634||||
              ...    
1171464    101444||||
1174135    namanis|||
1180811    tan1950|||
1187327    maris|||||
1188539    cates|||||
Length: 209, dtype: object

In [38]:
comp1 = pd.Series(preds).isin(test_data[comp])

In [41]:
pd.Series(preds)[comp1]

889      namanis|||
1703     tansen||||
1780     tan1950|||
1803     61930|||||
2465     sisis|||||
            ...    
98536    42504|||||
98598    nenita91||
99123    60599|||||
99177    823600||||
99262    jona05||||
Length: 264, dtype: object

In [15]:
preds

['Cn02fsT|||',
 'nrakfeT|||',
 '805ĐTIr095',
 'jontama1||',
 'sarinktI||',
 'џatnbe||||',
 'Jsjaÿ41|||',
 '548930||||',
 'imnenן@1||',
 'manisl||||',
 'teiiiĞn|||',
 'wrekl89865',
 'nak59881||',
 'n209834|||',
 'rieυ;14|||',
 'notnta;98|',
 'tinake||||',
 '547445||||',
 '180995440|',
 'TๆsLik||||',
 'tistnbenj|',
 'taks979|||',
 'wikeyĐ||||',
 '50105354||',
 'shE5jant||',
 'џanibe||||',
 'mastemnnta',
 'jeAf405е||',
 'џanbis|1||',
 '6821516|||',
 'iseeÃ;||||',
 '59184420||',
 '618494||||',
 'eej1550|||',
 '1851532cma',
 'thenikυе|1',
 'wiles10815',
 'jonnice1||',
 'wisonbe|||',
 'i5ъa5053||',
 'sъassnie||',
 '6055311115',
 '4518530|||',
 '80503|||||',
 'snrretk|1|',
 'atarimaе||',
 'kĞnnti009|',
 'HnEsl|||||',
 'sicnnat|||',
 'naktes||||',
 'џanniktυ||',
 'snantama||',
 '8055235Нj|',
 'sъosnie|||',
 'tnrakesx||',
 '69594449||',
 'ce10835|||',
 'saarnryke|',
 'wefs35||||',
 'noatbat9||',
 '5sEnna;595',
 'ritbe|||||',
 'x12L32||||',
 'sisene||||',
 '84898916|6',
 '6135353949',
 'cankE33|

In [None]:
checkpoints_to_predict = OrderedDict()
checkpoints_to_predict['1'] = "Checkpoints/netG103:55:21AM_12-03-20"
checkpoints_to_predict['1000'] = "Checkpoints/netG103:55:21AM_12-03-20"
checkpoints_to_predict['3000'] = "Checkpoints/netG103:55:21AM_12-03-20"
checkpoints_to_predict['5000'] = "Checkpoints/netG103:55:21AM_12-03-20"
checkpoints_to_predict['7000'] = "Checkpoints/netG103:55:21AM_12-03-20"
checkpoints_to_predict['9000'] = "Checkpoints/netG900005:35:43AM_12-03-20"

In [None]:
for key, value in checkpoints_to_predict.items():
    netG = Generator(charmap).to(device)
    netG.load_state_dict(torch.load(value))
    print(predict_one(netG, 10))

# Old

In [None]:
netG1 = Generator(charmap).to(device)
netG1.load_state_dict(torch.load("/home/nvijayakumar/gcp-gan/Checkpoints/netG103:55:21AM_12-03-20"))

netG1000 = Generator(charmap).to(device)
netG1000.load_state_dict(torch.load("/home/nvijayakumar/gcp-gan/Checkpoints/netG100004:06:51AM_12-03-20"))

netG3000 = Generator(charmap).to(device)
netG3000.load_state_dict(torch.load("/home/nvijayakumar/gcp-gan/Checkpoints/netG300004:29:10AM_12-03-20"))

netG5000 = Generator(charmap).to(device)
netG5000.load_state_dict(torch.load("/home/nvijayakumar/gcp-gan/Checkpoints/netG500004:51:18AM_12-03-20"))

netG7000 = Generator(charmap).to(device)
netG7000.load_state_dict(torch.load("/home/nvijayakumar/gcp-gan/Checkpoints/netG700005:13:29AM_12-03-20"))

netG9000 = Generator(charmap).to(device)
netG9000.load_state_dict(torch.load("/home/nvijayakumar/gcp-gan/Checkpoints/netG900005:35:43AM_12-03-20"))

In [None]:
preds = predict_many([netG1, netG1000, netG3000, netG5000, netG7000, netG9000])
iters = ["1", "1000", "3000", "5000", "7000", "9000"]
for i, pred in zip(iters, preds):
    print(f"{i} iterations: {pred}")

In [None]:
predict_one(netG1000, inv_charmap, 100)