In [None]:
from IPython.display import display, HTML
import matplotlib.pyplot as plt
from scipy import signal
import pandas as pd
import numpy as np
import sqlite3
import cv2
import os

In [None]:
MASKED_IMAGES_DIR = "masked_images"
BACKEND_DIR = "backend"
IMAGES_DIR = "images"

DB_FILE = "db.sqlite"

PASSWORD_CHARACTERS = 10 + 52 + 32
""" = (0-9) + (a-z,A-Z) + Special Characters """

IMAGE_SIZE = 256
NUMBER_OF_IMAGES = 2

MIN_R = 3
MAX_R = 30

SAME_POINT_TOLERANCE = 10

In [None]:
print(F"Plaintext character size: {PASSWORD_CHARACTERS}")

In [None]:
plt.imshow(np.squeeze(cv2.imread(os.path.join(os.curdir, MASKED_IMAGES_DIR, "83990_no_background.png"), cv2.IMREAD_UNCHANGED)[:, :, 3]))
print(np.squeeze(cv2.imread(os.path.join(os.curdir, MASKED_IMAGES_DIR, "83990_no_background.png"), cv2.IMREAD_UNCHANGED)[:, :, 3]))

In [None]:
image_masks = {}
image_attributes = set()
for image in os.listdir(os.path.join(os.curdir, MASKED_IMAGES_DIR)):
    name = image.split(".")[0]
    image_masks[name] = np.squeeze(cv2.imread(os.path.join(os.curdir, MASKED_IMAGES_DIR, image), cv2.IMREAD_UNCHANGED)[:, :, 3])
    image_masks[name] = np.where(image_masks[name] < 255, 1, 0)
    image_attributes.add(name.split("_")[-1])

In [None]:
print(image_masks["83990_no_background"])
plt.imshow(image_masks["83990_no_background"])

In [None]:
results = sqlite3.connect(DB_FILE)

In [None]:
users = pd.read_sql_query("SELECT * FROM user;", results)
passwords = pd.read_sql_query("SELECT * FROM passwords;", results)
attempts = pd.read_sql_query("SELECT * FROM attempts;", results)

In [None]:
def extract_images(row):
    if not pd.isnull(row["password"]):
        points = row["password"].split(",")
        images = list(set([point.split()[0] for point in points]))
        return images + [np.nan] * (2 - len(images))
    else:
        return [row["password"], row["password"]]

In [None]:
def extract_types(row):
    if not pd.isnull(row["image1"]) and not pd.isnull(row["image2"]):
        image1_has_digit = any(map(str.isdigit, row["image1"]))
        image2_has_digit = any(map(str.isdigit, row["image2"]))
        used_natural = image1_has_digit or image2_has_digit
        used_geometric = image1_has_digit is False or image2_has_digit is False
        if used_geometric and used_natural:
            return "Both"
        return "Natural" if used_natural else "Geometric"
    elif not pd.isnull(row["image1"]):
        image1_has_digit = any(map(str.isdigit, row["image1"]))
        return "Natural" if image1_has_digit else "Geometric"
    else:
        return np.nan

In [None]:
def check_points(row):
    if not pd.isnull(row["password"]):
        attributes = set()
        points = row["password"].split(",")
        points = [tuple(point.split()) for point in points]

        for point in points:
            image_name = point[0].split(".")[0]
            for key, value in [(mask_name, image_mask) for mask_name, image_mask in image_masks.items() if image_name in mask_name]:
                if value[int(point[1]), int(point[2])] > 0:
                    attributes.add(key.split("_")[-1])
        
        return ", ".join(sorted(attributes)) if attributes else np.nan
    else:
        return np.nan

In [None]:
def unique_points(row):
    if not pd.isnull(row["password"]):
        points = row["password"].split(",")
        points = [tuple(point.split()) for point in points]

        points_dict = {}
        for point in points:
            image_name = point[0]
            if image_name in points_dict:
                if not any((abs(int(point[1]) - existing[0]) + abs(int(point[2]) - existing[1])) < SAME_POINT_TOLERANCE for existing in points_dict[image_name]):
                    points_dict[image_name].append((int(point[1]), int(point[2])))

            else:
                points_dict[image_name] = [(int(point[1]), int(point[2]))]

        unique_points = 0
        for key in points_dict.keys():
            unique_points += len(points_dict[key])

        return unique_points

    else:
        return 0

In [None]:
full_df = pd.merge(users, attempts, on="id", how="left")
full_df = pd.merge(full_df, passwords, on=["id", "r"], how="left").sort_values(by=["username", "r"]).reset_index(drop=True)
full_df["password_length"] = full_df.apply(lambda row: row["password"] if pd.isnull(row["password"]) else len(row["password"].split(",")), axis=1)
full_df[["image1", "image2"]] = full_df.apply(lambda row: extract_images(row), axis=1, result_type="expand")
full_df["image_types"] = full_df.apply(lambda row: extract_types(row), axis=1)
full_df["natural_characteristics"] = full_df.apply(lambda row: check_points(row), axis=1)
full_df["unique_points"] = full_df.apply(lambda row: unique_points(row), axis=1)

In [None]:
print(F"Number of particiants: {len(full_df['id'].unique().tolist())}")

In [None]:
print(F"Number of passwords: {len(full_df['password'].dropna().tolist())}")

In [None]:
print(F"Average password length: {full_df['password_length'].mean()}")

In [None]:
display(HTML(full_df.to_html()))

In [None]:
not_finished = full_df.loc[~full_df.duplicated("username", keep=False), "id"].unique()
print(F"Percent of people that did not complete the survey: {len(not_finished)}/{len(full_df['username'].unique().tolist())} = {len(not_finished) / len(full_df['username'].unique().tolist())}")

In [None]:
r_grouped = full_df.groupby("r")

In [None]:
average_success = r_grouped.apply(lambda df: pd.Series({"Sample Size": len(df.index),
                                                        "Success Rate": df["successes"].sum() / df["attempts"].sum(),
                                                        "Average Attemps": df["attempts"].mean(),
                                                        "Passwords Made": df["successes"].sum()}))
display(HTML(average_success.to_html()))
plt.figure()
average_success.plot(y="Success Rate", use_index=True)
plt.title("Success Rate of Different Radial Distances")
plt.show()

In [None]:
made_password_r_group = full_df.loc[full_df["successes"] != 0].groupby("r")

In [None]:
average_success_of_successful = made_password_r_group.apply(lambda df: pd.Series({"Sample Size": len(df.index),
                                                        "Success Rate": df["successes"].sum() / df["attempts"].sum(),
                                                        "Average Attempts": df["attempts"].mean(),
                                                        "Passwords Made": df["successes"].sum()}))
display(HTML(average_success_of_successful.to_html()))
plt.figure()
average_success_of_successful.plot(y="Success Rate", use_index=True)
plt.title("Success Rate from Successful of Different Radial Distances")
plt.show()

In [None]:
one_image = len(full_df.loc[(~pd.isnull(full_df["image1"])) & (pd.isnull(full_df["image2"]))].index)
two_images = len(full_df.loc[(~pd.isnull(full_df["image1"])) & (~pd.isnull(full_df["image2"]))].index)

plt.figure()
plt.bar(["One Image", "Two Images"], [one_image, two_images])
plt.title("Number of Images Actually Utilized")
plt.text(0, one_image / 2, F"{one_image}")
plt.text(1, two_images / 2, F"{two_images}")
plt.show()

In [None]:
just_geometric = len(full_df.loc[full_df["image_types"] == "Geometric"].index)
just_natural = len(full_df.loc[full_df["image_types"] == "Natural"].index)
both_types = len(full_df.loc[full_df["image_types"] == "Both"].index)

plt.figure()
plt.bar(["Just Geometric", "Just Natural", "Both Image Types"], [just_geometric, just_natural, both_types])
plt.title("Image Types Used")
plt.text(0, just_geometric / 2, F"{just_geometric}")
plt.text(1, just_natural / 2, F"{just_natural}")
plt.text(2, both_types / 2, F"{both_types}")
plt.show()

In [None]:
group_r_image_type = full_df.groupby(by=["r", "image_types"])

In [None]:
average_success_by_type = group_r_image_type.apply(lambda df: pd.Series({"Sample Size": len(df.index),
                                                        "Success Rate": df["successes"].sum() / df["attempts"].sum(),
                                                        "Average Attempts": df["attempts"].mean(),
                                                        "Passwords Made": df["successes"].sum()}))

print(average_success_by_type.sort_index())

In [None]:
masked_names = [name for name in image_masks.keys()]

In [None]:
for background_mask in [mask for mask in masked_names if "background" in mask]:
    image_name = background_mask.split("_")[0]
    for image_mask in [mask for mask in masked_names if image_name in mask and not "background" in mask]:
        image_masks[background_mask] = np.where(image_masks[image_mask] > 0, 0, image_masks[background_mask])

In [None]:
plt.imshow(image_masks["83990_no_background"])
print(image_masks["83990_no_background"])

In [None]:
image_attribute_counts = {}
for attribute in image_attributes:
    image_attribute_counts[attribute.title()] = len(full_df.loc[full_df["natural_characteristics"].str.contains(attribute, na=False, regex=False)].index)

image_attribute_counts["Total Natural Image Passwords"] = len(full_df.loc[(full_df["image_types"] == "Both") | (full_df["image_types"] == "Natural")].index)

In [None]:
plt.figure()
plt.bar(image_attribute_counts.keys(), image_attribute_counts.values())
plt.title("Attributes in Natural Images")
plt.xticks(rotation=15)
plt.show()

In [None]:
length_group = full_df.groupby("password_length")

In [None]:
average_unique_points = length_group.apply(lambda df: pd.Series({"Sample Size": len(df.index),
                                                        "Average Unique Points": df["unique_points"].mean(),
                                                        "Standard Deviation of Unique Points": df["unique_points"].std(),
                                                        "Max Unique Points": df["unique_points"].max(),
                                                        "Min Unique Points": df["unique_points"].min()}))

display(HTML(average_unique_points.sort_index().to_html()))
plt.figure()
average_unique_points.plot(y="Average Unique Points", use_index=True)
plt.title("Number of Unique Points per Password")
plt.xlabel("Password Length")
plt.ylabel("Unique Points in Password")
plt.show()