In [50]:
import pandas as pd
import numpy as np
import matplotlib as plt

In [2]:
df = pd.read_csv('dataUser2.csv')

In [3]:
df[df['ID_INPUT'] == 4]['VALUE'].value_counts()

chrome.exe                   147
explorer.exe                  90
VsDebugConsole.exe            63
Unable To Open Process        46
DB Browser for SQLite.exe     40
devenv.exe                    35
Messenger.exe                 27
Zoom.exe                      23
ApplicationFrameHost.exe      22
msedge.exe                    18
SearchHost.exe                14
cmd.exe                       14
Teams.exe                      8
ShellExperienceHost.exe        5
Spotify.exe                    5
CredentialUIBroker.exe         2
MoNotificationUx.exe           2
Git-2.39.1-64-bit.tmp          1
Docker Desktop.exe             1
OneDrive.exe                   1
python-3.11.1-amd64.exe        1
Name: VALUE, dtype: int64

# Emission Probability

Emission_probabiblity = {
“chrome.exe”: {“google doc”: P(“google doc” | “chrome.exe”), “google drive”: P(“google drive” | “chrome.exe”), ….}
}

P(google doc | chrome.exe) = P(google doc, chrome.exe) / P(chrome.exe)

In [70]:
df = df.assign(date = df["MEASUREMENT_TIME"].astype(str).apply(lambda x: x[:10])) # extract only the date

In [72]:
def preproc_before_emission(df, date="2023-01-19"):
    """Get the series of executables and the apps"""
    tmp_df = df[df["date"] == date]
    executables = tmp_df[tmp_df['ID_INPUT'] == 4]['VALUE'].reset_index()['VALUE']
    apps = tmp_df[tmp_df['ID_INPUT'] == 3]['VALUE'].reset_index()['VALUE']
    return (executables, apps)

In [74]:
def find_exe_prob(executables, exe_name):
    """Ex: P(chrome.exe) = (#chrome.exe) / (all exe's)"""
    numerator = sum(exe_name == executables)
    denominator = len(executables)
    return numerator / denominator

In [73]:
executables, apps = preproc_before_emission(df)

In [91]:
print(find_exe_prob(executables, "chrome.exe")) # sanity check on dataset 
# print(find_exe_prob(executables, "explorer.exe"))
# print(find_exe_prob(executables, "msedge.exe"))

0.29537366548042704


In [89]:
def find_joint_prob(executables, apps, from_exe, to_app):
    """P(A,B) = (# times A and B appear together) / (# all entries)"""
    fromExe_indices = np.where(executables == from_exe)[0]
    toApp_indices = np.where(apps == to_app)[0]
    co_appear = len(set(fromExe_indices) & set(toApp_indices)) # these are indices where "from_exe" and "to_app" appear together
    return co_appear / len(executables)

In [90]:
def find_emission_prob(executables, apps, from_exe, to_app):
    """P(to_app | from_exe) = P(from_exe, to_app) / P(from_exe)"""
    emission_numer = find_joint_prob(executables, apps, from_exe, to_app)
    emission_denom = find_exe_prob(executables, from_exe) 
    return emission_numer / emission_denom

In [94]:
# example of emission probabilities on 19 Jan 2023
executables, apps = preproc_before_emission(df, "2023-01-19")
unique_exes = executables.unique()
unique_apps = apps.unique()
emission_prob = {}
for ex in unique_exes:
    emission_prob[ex] = {}
    for app in unique_apps:
        emission_prob[ex][app] = find_emission_prob(executables, apps, ex, app)

In [95]:
emission_prob

{'VsDebugConsole.exe': {'esrv.exe': 1.0,
  'Foreground - Microsoft Visual Studio': 0.0,
  'How to Configure a C/C++ Project in VS - Google Docs - Google Chrome': 0.0,
  'Messenger': 0.0,
  'sdk': 0.0,
  'Public -- 2022-2023 (CCG DCA UCSD-HDSI Capstone) | Microsoft Teams': 0.0,
  'Search': 0.0,
  'Administrator: Command Prompt': 0.0,
  'Installation - pip documentation v22.3.1 - Google Chrome': 0.0,
  'Missing String.': 0.0,
  'Program Manager': 0.0,
  'Python Release Python 3.11.1 | Python.org - Google Chrome': 0.0,
  'Python 3.11.1 (64-bit) Setup': 0.0,
  'How to install Jupyter Notebook on Windows? - GeeksforGeeks - Google Chrome': 0.0,
  'windows - Python command not working in command prompt - Stack Overflow - Google Chrome': 0.0,
  'System Properties': 0.0,
  'Edit environment variable': 0.0,
  'Administrator: Command Prompt - py': 0.0,
  'New Tab - Google Chrome': 0.0,
  'Opening Jupyter Notebook - Google Chrome': 0.0,
  'Home Page - Select or create a notebook - Google Chrome': 