In [1]:
from Parse_Utils import parse_logs
from EDA_Utils import *
from Session_Utils import *

In [2]:
raw_logs = """[2022-01-01 08:15:30] 123 login success viewer
[2022-01-01 08:20:45] 123 logout viewer
[2022-01-01 09:05:12] 123 view_page article_1 viewer
[2022-01-01 09:10:30] 789 login success admin,editor
[2022-01-01 09:30:00] 456 edit_profile editor
[2022-01-01 10:00:00] 456 view_page article_2 editor
[2022-01-01 10:30:00] 789 view_page article_1 admin
[2022-01-01 10:45:22] 123 logout viewer
[2022-01-01 11:05:30] 789 logout admin
[2022-01-01 11:15:00] 456 view_page article_2 editor
[2022-01-01 11:20:45] 123 login success viewer
[2022-01-01 12:30:00] 456 logout editor
[2022-01-01 12:45:22] 789 edit_profile admin
[2022-01-01 13:15:30] 123 view_page article_3 viewer
[2022-01-01 13:30:45] 789 view_page article_4 admin
[2022-01-01 14:10:00] 123 logout viewer
[2022-01-01 14:20:00] 123 login success admin
[2022-01-01 15:00:00] 123 view_page article_4 admin
[2022-01-01 15:30:00] 789 view_page article_5 admin
[2022-01-01 16:00:00] 123 logout admin
[2022-01-01 16:30:00] 456 edit_profile editor
[2022-01-01 17:00:00] 789 edit_profile admin
[2022-01-01 18:00:00] 456 logout editor
[2022-01-01 18:15:00] 789 logout admin
"""

In [3]:
try:
    parsed_logs = parse_logs(logs= raw_logs)
except Exception as e:
    print(f"Log parsing failed completely: {e}")
    parsed_logs = []
parsed_logs

[{'timestamp': datetime.datetime(2022, 1, 1, 8, 15, 30),
  'user_id': '123',
  'activity': 'login',
  'activity_detail': 'success',
  'roles': 'viewer'},
 {'timestamp': datetime.datetime(2022, 1, 1, 8, 20, 45),
  'user_id': '123',
  'activity': 'logout',
  'activity_detail': '',
  'roles': 'viewer'},
 {'timestamp': datetime.datetime(2022, 1, 1, 9, 5, 12),
  'user_id': '123',
  'activity': 'view_page',
  'activity_detail': 'article_1',
  'roles': 'viewer'},
 {'timestamp': datetime.datetime(2022, 1, 1, 9, 10, 30),
  'user_id': '789',
  'activity': 'login',
  'activity_detail': 'success',
  'roles': ['admin', 'editor']},
 {'timestamp': datetime.datetime(2022, 1, 1, 9, 30),
  'user_id': '456',
  'activity': 'edit_profile',
  'activity_detail': '',
  'roles': 'editor'},
 {'timestamp': datetime.datetime(2022, 1, 1, 10, 0),
  'user_id': '456',
  'activity': 'view_page',
  'activity_detail': 'article_2',
  'roles': 'editor'},
 {'timestamp': datetime.datetime(2022, 1, 1, 10, 30),
  'user_id': '

In [4]:
#Active users
users_per_log = get_all_users(parsed_logs)
active_users = set(users_per_log)
print(f"Active Users are: {active_users}")

Active Users are: {'123', '789', '456'}


How many activities did each user perform in the log data?

In [5]:
#Activity count per user
for user in active_users:
    activities_per_user =  get_activities_per_user(parsed_logs,user)
    print(f"User {user}: {len(activities_per_user)}")

User 123: 10
User 789: 8
User 456: 6


Can you provide a breakdown of the activities for user ID 123?

In [6]:
#Sequential activity log of user_id 123
all_logs_123 = get_all_logs_per_user(parsed_logs,"123")
formatted_logs(all_logs_123, "123")

log breakdown of user 123:
User successfully logged in as viewer - 2022-01-01 08:15:30
User logged out as viewer - 2022-01-01 08:20:45
User viewed page  article_1 - 2022-01-01 09:05:12
User logged out as viewer - 2022-01-01 10:45:22
User successfully logged in as viewer - 2022-01-01 11:20:45
User viewed page  article_3 - 2022-01-01 13:15:30
User logged out as viewer - 2022-01-01 14:10:00
User successfully logged in as admin - 2022-01-01 14:20:00
User viewed page  article_4 - 2022-01-01 15:00:00
User logged out as admin - 2022-01-01 16:00:00


What is the frequency of each action type across all users in the log data?

In [7]:
from collections import Counter

In [8]:
#Activity frequency
all_activities = get_all_activities(parsed_logs)
activity_counts = Counter(all_activities)
for item, count in activity_counts.items():
    print(f"{item}: {count}")

login: 4
logout: 8
view_page: 8
edit_profile: 4


What is total time frame of the given logs?

In [12]:
total_log_timeframe  = get_all_logs_timeframe(parsed_logs)
print(f"The time duration of given logs is : {total_log_timeframe['start']} to {total_log_timeframe['end']}")
print(f"{total_log_timeframe['days']} days")
print(f"{total_log_timeframe['hours']} hours")
print(f"{total_log_timeframe['minutes']} mins")
print(f"{total_log_timeframe['seconds']} seconds") 

The time duration of given logs is : 2022-01-01 08:15:30 to 2022-01-01 18:15:00
0 days
9 hours
59 mins
30 seconds


List the users along with the number of sessions each user had?

For each user:
1. Retrieve all activites associated with the user.
2. Identify individual sessions by traceing login and logout events

Rules for session identification.

| Scenario                       | Session Start     | Session End                 | Notes                             |
| ------------------------------ | ----------------- | --------------------------- | --------------------------------- |
| Login → ... → Logout           | `login`           | `logout`                    | Normal session                    |
| Login → ... → (no logout)      | `login`           | next `login` or end of logs | End current session implicitly    |
| Login → ... → Login            | new `login`       | ends previous session       | Treat as new session              |
| (no login) → ... → Logout      | start of sequence | `logout`                    | Best-effort session               |
| Actions only (no login/logout) | first activity    | last activity               | Fallback session                  |
| Logout without login           | N/A               | ignore or handle gracefully | No session start → may be ignored |


In [10]:
# get all activites per user
for user in active_users:
    activities_per_user =  get_activities_per_user(parsed_logs,user)
    user_sessions = identify_sessions(activities_per_user)
    print(f"Number of sessions for user {user} = {len(user_sessions)}")


Number of sessions for user 123 = 3
Number of sessions for user 789 = 2
Number of sessions for user 456 = 2


For each user session, what was the most frequently performed action type?

In [11]:
for user in active_users:
    activities_per_user =  get_activities_per_user(parsed_logs,user)
    user_sessions = identify_sessions(activities_per_user)
    for session_id,activities in user_sessions.items():
        # Count frequencies
        counter = Counter(activities)
        # Find the max frequency
        max_count = max(counter.values())
        # Get all actions that have this max frequency
        most_frequent_activity = [action for action, count in counter.items() if count == max_count]
        print(f"The most frequent activity in session {session_id} for user {user} is {most_frequent_activity}")


The most frequent activity in session 1 for user 123 is ['view_page']
The most frequent activity in session 2 for user 123 is ['view_page']
The most frequent activity in session 3 for user 123 is ['view_page']
The most frequent activity in session 1 for user 789 is ['view_page']
The most frequent activity in session 2 for user 789 is ['edit_profile', 'view_page']
The most frequent activity in session 1 for user 456 is ['view_page']
The most frequent activity in session 2 for user 456 is ['edit_profile']
