# Security Log Analysis with Machine Learning

**Project by:**
- Molla Samser (Founder)
- Rima Khatun (Designer & Tester)

**Organization:** RSK World  
**Website:** https://rskworld.in  
**Contact:** help@rskworld.in | +91 93305 39277  
**Address:** Nutanhat, Mongolkote, Purba Burdwan, West Bengal, India, 713147

---

## Overview

This notebook demonstrates an automated security log analysis system that:
- Parses and preprocesses security logs
- Extracts meaningful features
- Detects anomalies using machine learning
- Classifies security incidents
- Generates visualizations and reports


In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Import project modules
from log_parser import LogParser, generate_sample_logs
from feature_extractor import FeatureExtractor
from anomaly_detector import AnomalyDetector
from incident_classifier import IncidentClassifier
from visualizer import SecurityLogVisualizer
import config

# Set visualization style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

print("✓ Libraries imported successfully")
print("\nSecurity Log Analysis System - RSK World")
print("Contact: help@rskworld.in | +91 93305 39277")


## 1. Data Loading and Preprocessing


In [None]:
# Generate sample security logs
print("Generating sample security logs...")
generate_sample_logs(n_samples=1000, output_path='data/security_logs.csv')

# Initialize parser and load data
parser = LogParser()
df = parser.parse_csv_logs('data/security_logs.csv')

print(f"\n✓ Loaded {len(df)} log entries")
print(f"\nDataset shape: {df.shape}")
print(f"\nColumns: {', '.join(df.columns.tolist())}")


In [None]:
# Display first few rows
print("Sample log entries:")
df.head()


## 2. Feature Extraction


In [None]:
# Initialize feature extractor
extractor = FeatureExtractor()

# Extract all features
print("Extracting features from logs...")
df_features = extractor.extract_all_features(df)

# Prepare features for ML
X, _ = extractor.prepare_features_for_ml(df_features)

print(f"\n✓ Extracted {len(X.columns)} features")
print(f"\nFeature columns ({len(X.columns)}):")
for i, col in enumerate(X.columns, 1):
    print(f"{i}. {col}")


## 3. Anomaly Detection


In [None]:
# Initialize anomaly detector
detector = AnomalyDetector(method='isolation_forest', contamination=0.1)

# Detect anomalies
print("Detecting anomalies using Isolation Forest...")
df_with_anomalies = detector.detect_anomalies(df_features, X.columns.tolist())

# Get summary
anomaly_summary = detector.get_anomaly_summary(df_with_anomalies)

print("\n✓ Anomaly Detection Complete")
print(f"\nTotal Logs: {anomaly_summary['total_logs']}")
print(f"Anomalies Detected: {anomaly_summary['anomalies']}")
print(f"Normal Logs: {anomaly_summary['normal']}")
print(f"Anomaly Rate: {anomaly_summary['anomaly_percentage']:.2f}%")


## 4. Incident Classification


In [None]:
# Initialize incident classifier
classifier = IncidentClassifier()

# Classify incidents
print("Classifying security incidents...")
df_classified = classifier.classify_incidents(df_with_anomalies, X.columns.tolist())

# Get summary
incident_summary = classifier.get_incident_summary(df_classified)

print("\n✓ Incident Classification Complete")
print(f"\nTotal Incidents: {incident_summary['total_incidents']}")
print("\nIncident Breakdown:")
for incident_type, count in incident_summary['incident_counts'].items():
    percentage = incident_summary['incident_percentages'][incident_type]
    print(f"  {incident_type}: {count} ({percentage:.2f}%)")


## 5. Generate Complete Report

**Project by:** Molla Samser (Founder) & Rima Khatun (Designer & Tester)  
**Organization:** RSK World | https://rskworld.in  
**Contact:** help@rskworld.in | +91 93305 39277


In [None]:
# Generate comprehensive visualizations and reports
visualizer = SecurityLogVisualizer(output_dir='reports')

print("Generating comprehensive visualizations...")
visualizer.plot_anomaly_distribution(df_classified)
visualizer.plot_incident_classification(df_classified)
visualizer.plot_time_series_analysis(df_classified)
visualizer.plot_feature_importance(classifier.get_feature_importance())
visualizer.plot_network_analysis(df_classified)

# Generate text report
report = visualizer.generate_summary_report(
    df_classified, 
    anomaly_summary, 
    incident_summary
)

print("\n✓ All visualizations and reports generated successfully")
print("\nLocation: ./reports/")
