In [32]:
import pandas as pd
import matplotlib as plt
from sklearn.model_selection import train_test_split
from collections import defaultdict

In [13]:
df = pd.read_csv('dataUser2.csv')

In [14]:
def get_clean_data(df):
    return df[df['ID_INPUT'] == 4].drop(columns = ['Unnamed: 0']).reset_index(drop = True)

In [15]:
def get_unique_states(df):
    return df[df['ID_INPUT'] == 4]['VALUE'].unique()

In [16]:
def get_conditional_prob(df):
    conditional_prob = df[df['ID_INPUT'] == 4]['VALUE'].value_counts() / len(df[df['ID_INPUT'] == 4])
    return conditional_prob

In [17]:
def get_all_pairs(df):
    pairs = []
    for index in range(len(df) - 1):
        pair = (df.iloc[index]['VALUE'], df.iloc[index+1]['VALUE'])
        pairs.append(pair)
        
    return pairs

In [18]:
def split_train_test(pairs):
    X = [x[0] for x in pairs]
    y = [x[1] for x in pairs]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
    return [X_train, y_train, X_test, y_test]

In [36]:
def get_pair_frequency(X, y):
    pair_freq = defaultdict(int)
    for index in range(len(X)):
        pair = (X[index], y[index])
        pair_freq[pair] += 1
    return pair_freq

In [79]:
def get_transitional_probability(pair_freq, X):
    transitional_prob = defaultdict(int)
    for pair in pair_freq:
        total_occ = sum([x == pair[0] for x in X])
        #next_app_prob = defaultdict(int)
        #next_app_prob[pair[1]] = pair_freq[pair] / total_occ
        transitional_prob[pair] = pair_freq[pair] / total_occ
    return transitional_prob

In [80]:
def predict_HMM(df):
    
    df = get_clean_data(df)

    all_exes = get_unique_states(df)
    
    conditional_prob = get_conditional_prob(df)
    
    all_pairs = get_all_pairs(df)
    
    X_tr, y_tr, X_test, y_test = split_train_test(all_pairs)
    pair_freq = get_pair_frequency(X_tr, y_tr)
    transitional_prob = get_transitional_probability(pair_freq, X_tr)
    for x in transitional_prob:
        print(x,'    ', transitional_prob[x], '     ')

In [78]:
predict_HMM(df)

chrome.exe      defaultdict(<class 'int'>, {'msteams.exe': 0.006134969325153374})      
VsDebugConsole.exe      defaultdict(<class 'int'>, {'Teams.exe': 0.06060606060606061})      
devenv.exe      defaultdict(<class 'int'>, {'Docker Desktop.exe': 0.03125})      
LockApp.exe      defaultdict(<class 'int'>, {'chrome.exe': 1.0})      
SearchHost.exe      defaultdict(<class 'int'>, {'cmd.exe': 0.10526315789473684})      
Spotify.exe      defaultdict(<class 'int'>, {'explorer.exe': 0.3333333333333333})      
Teams.exe      defaultdict(<class 'int'>, {'Docker Desktop.exe': 0.03333333333333333})      
explorer.exe      defaultdict(<class 'int'>, {'msedge.exe': 0.015384615384615385})      
Messenger.exe      defaultdict(<class 'int'>, {'DB Browser for SQLite.exe': 0.017543859649122806})      
ApplicationFrameHost.exe      defaultdict(<class 'int'>, {'Messenger.exe': 0.047619047619047616})      
DB Browser for SQLite.exe      defaultdict(<class 'int'>, {'DB Browser for SQLite.exe': 0.0192307692

In [81]:
predict_HMM(df)

('explorer.exe', 'chrome.exe')      0.22321428571428573      
('DB Browser for SQLite.exe', 'explorer.exe')      0.6981132075471698      
('chrome.exe', 'Messenger.exe')      0.19886363636363635      
('Teams.exe', 'chrome.exe')      0.21428571428571427      
('chrome.exe', 'SearchHost.exe')      0.056818181818181816      
('VsDebugConsole.exe', 'explorer.exe')      0.17567567567567569      
('chrome.exe', 'explorer.exe')      0.1875      
('Acrobat.exe', 'explorer.exe')      1.0      
('VsDebugConsole.exe', 'VsDebugConsole.exe')      0.1891891891891892      
('Messenger.exe', 'Teams.exe')      0.14516129032258066      
('ApplicationFrameHost.exe', 'chrome.exe')      0.38095238095238093      
('msedge.exe', 'MoNotificationUx.exe')      0.15384615384615385      
('cmd.exe', 'cmd.exe')      0.13333333333333333      
('Zoom.exe', 'ApplicationFrameHost.exe')      0.0625      
('SearchHost.exe', 'ApplicationFrameHost.exe')      0.25      
('Messenger.exe', 'chrome.exe')      0.5645161290322