In [3]:
import pandas as pd
import numpy as np


In [4]:
# File path to the CSV file
file_path = "logs_cleaned_data.csv"

# Read the CSV without assuming a header
df = pd.read_csv(file_path)

# Assuming that the "Message" is in a specific column (adjust the index as needed)
# For example, let's assume column 1 (index 0) contains the messages
df.columns = ["Column_1", "Message", "Column_3"]  # Rename columns for clarity

# Define a function to categorize messages based on keywords
def categorize_status(message):
    message_lower = str(message).lower()
    if "error" in message_lower:
        return "critical"
    elif "warning" in message_lower:
        return "warning"
    elif "up" in message_lower:
        return "up"
    elif "down" in message_lower:
        return "down"
    elif "available" in message_lower:
        return "available"
    elif "denied" in message_lower:
        return "denied"
    elif "critical" in message_lower:
        return "critical"
    elif "started" in message_lower:
        return "started"
    else:
        return "unknown"

# Apply the function to create a new "Status Code" feature
df['Status Code'] = df['Message'].apply(categorize_status)

# Display the updated data with the new feature for verification
df1=df[['Message', 'Status Code']]

df1.head()

Unnamed: 0,Message,Status Code
0,Agent 192.168.112.57 became available,available
1,Component DNS Server on Application NOC Infra ...,critical
2,Component Error and Warning Events: SMS Server...,critical
3,Sohar-Alreef - DIALUPTOHO Up 192.168.6.1,up
4,Error polling cloud volume prdz1rpabot1-osdisk...,critical


In [5]:

df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1559 entries, 0 to 1558
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Message      1559 non-null   object
 1   Status Code  1559 non-null   object
dtypes: object(2)
memory usage: 24.5+ KB


In [6]:
# Get distinct values in the "Status Code" column
df1['Status Code'].unique()



      dtype=object)

In [7]:
# Define a mapping of status codes to clusters
status_to_cluster = {
    'unknown': 4,         # Cluster 4: Unknown States
    'available': 1,       # Cluster 1: Normal Operations
    'critical': 3,        # Cluster 3: Critical Issues
    'up': 1,              # Cluster 1: Normal Operations
    'down': 3,            # Cluster 3: Critical Issues
    'warning': 2          # Cluster 2: Warnings
}

# Apply the mapping to create a new "Cluster" column
df1['Cluster'] = df['Status Code'].map(status_to_cluster)

# Display the updated DataFrame with clusters for verification
df1[['Status Code', 'Cluster']].head(10)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['Cluster'] = df['Status Code'].map(status_to_cluster)


Unnamed: 0,Status Code,Cluster
0,available,1
1,critical,3
2,critical,3
3,up,1
4,critical,3
5,critical,3
6,critical,3
7,critical,3
8,critical,3
9,critical,3


In [8]:

cluster_summary = df1.groupby('Cluster')['Status Code']
cluster_summary.value_counts()

Cluster  Status Code
1        up              127
         available         8
3        critical       1309
         down             13
4        unknown          71
Name: count, dtype: int64

In [9]:
# Sort the DataFrame by the "Cluster" column
df1_sorted = df1.sort_values(by='Cluster')

In [10]:
df1_sorted.head()

Unnamed: 0,Message,Status Code,Cluster
0,Agent 192.168.112.57 became available,available,1
1215,"Application ""NOC Infra Critical Active Directo...",up,1
236,"Component ""Output Cache Total Hits (MAFVendor ...",up,1
512,Component Page Reads/sec on Application PRODSW...,up,1
904,"Component ""Active Directory Domain Services"" f...",up,1


In [11]:
cluster_summary = df1_sorted.groupby('Cluster').agg({
    'Message': lambda x: ' | '.join(x),  # Combine messages with a separator
    'Status Code': lambda x: ', '.join(set(x))  # Unique status codes
}).reset_index()

In [12]:
for index, row in cluster_summary.iterrows():
    status_code = row['Status Code']
    print(f"Status Code: {status_code}")
    print(f"Messages: {row['Message']}")
    print()

Status Code: available, up
Messages: Agent 192.168.112.57 became available | Application "NOC Infra Critical Active Directory Service Monitoring" on node "ISNOCPRDC04" is up 10.255.12.12 | Component "Output Cache Total Hits (MAFVendor AppPool)" for application "Microsoft IIS" on node "mafweb01" is up 192.168.253.11 | Component Page Reads/sec on Application PRODSWSQL on Node ISNOCPRODSWSQL is Up 10.255.17.25 | Component "Active Directory Domain Services" for application "NOC Infra - Active Directory Domain Services Monitoring" on node "ISNOCPRDC01" is up 10.255.12.11 | Application "NOC Infra - Active Directory Domain Services Monitoring" on node "ISNOCPRDC01" is up 10.255.12.11 | Component "Metadata Cache Misses (MAFVendor AppPool)" for application "Microsoft IIS" on node "mafweb01" is up 192.168.253.11 | Node Greens-60F is Up. Customer Name: Choithrams Server Role: Greens Store Firewall Server Location: UAE - Retail Stores 10.168.110.100 | Component "DNS Server" for application "NOC In

In [14]:
# Filter rows with specific status codes: warning, down, critical
filtered_df = df1[df1['Status Code'].isin(['warning', 'down', 'critical'])]

# Sort the filtered DataFrame by the "Cluster" column
filtered_df_sorted = filtered_df.sort_values(by='Cluster')

# Group by Cluster and summarize
cluster_summary = filtered_df_sorted.groupby('Cluster').agg({
    'Message': lambda x: ' | '.join(x),  # Combine messages with a separator
    'Status Code': lambda x: ', '.join(set(x))  # Unique status codes
}).reset_index()

In [15]:
# Print the summary with status code as the header
for index, row in cluster_summary.iterrows():
    status_code = row['Status Code']
    print(f"Status Code: {status_code}")
    print(f"Messages: {row['Message']}")
    print()


Status Code: down, critical

