# Azure Ransomware Detection Lab

## Step 1: Import Python Packages

In [None]:
!pip install azure-identity azure-mgmt-loganalytics azure-monitor-query azure-mgmt-subscription &>/dev/null

from azure.identity import DeviceCodeCredential                   # Needed to authenticate with Azure
from azure.mgmt.loganalytics import LogAnalyticsManagementClient  # Allows interaction with Log Analytics
from azure.mgmt.subscription import SubscriptionClient            # Allows us to get the subscription ID
from azure.monitor.query import LogsQueryClient                   # Used to issue queries to Log Analytics
from datetime import timedelta, datetime                          # Time conversions
import pandas as pd                                               # Used to manipulate data

## Step 2: Authenticate with Azure

In [None]:
credential = DeviceCodeCredential()
credential.authenticate()
subscriptions_client = SubscriptionClient(credential)
query_client = LogsQueryClient(credential)

## Step 3: Get Data from Log Analytics Workspace

In [None]:
# Get first subscription
subscriptions = subscriptions_client.subscriptions.list()
for subscription in subscriptions:
    subscription_id = subscription.subscription_id
    break

# Get workspace_id
log_analytics_client = LogAnalyticsManagementClient(credential, subscription_id)
workspaces = log_analytics_client.workspaces.list()
for workspace in workspaces:
    if workspace.name == 'sherlocklaw':
        workspace_id = workspace.customer_id

# Get all StorageBlobLog data within the last day
query = """
StorageBlobLogs
"""
response = query_client.query_workspace(workspace_id, query, timespan=timedelta(days=1))
data = response.tables
for table in data:
    df = pd.DataFrame(data=table.rows, columns=table.columns)
pd.set_option('display.max_colwidth', None)
df

## Step 4: Detection Ransomware

In [None]:
def analyze_storagebloblogs_data(df):
    """Function to discover ransomware"""
    
    # Store GetBlob events into a separate DataFrame
    get_objects = df[df['OperationName'] == 'GetBlob']
    get_objects = get_objects.copy()
    get_objects['TimeGenerated'] = pd.to_datetime(get_objects['TimeGenerated'])

    
    # Store PutBlob events into a separate DataFrame
    put_objects = df[df['OperationName'] == 'PutBlob']
    put_objects = put_objects.copy()
    put_objects['TimeGenerated'] = pd.to_datetime(put_objects['TimeGenerated'])
    
    # Store DeleteBlob events into a separate DataFrame
    delete_objects = df[df['OperationName'] == 'DeleteBlob']
    delete_objects = delete_objects.copy()
    delete_objects['TimeGenerated'] = pd.to_datetime(delete_objects['TimeGenerated'])
    
    # Create empty results list
    results = []
    
    # Iterate through every GetBlob record
    for _, get_row in get_objects.iterrows():
        """
        Iterate through each row in get_objects and extract relevant details
          (ObjectKey, AccountName, TimeGenerated)
        """
        file_key = get_row['ObjectKey']
        account_name = get_row['AccountName']
        event_time = get_row['TimeGenerated']
        
        """
        Filters put_objects to find operations by the same account where the
          object name starts with but does not end with the file_key and
          occurred after the extracted event_time
        """
        put_candidates = put_objects[
            (put_objects['AccountName'] == account_name) &
            (~put_objects['ObjectKey'].str.endswith(file_key)) &
            (put_objects['ObjectKey'].str.startswith(file_key)) &
            (put_objects['TimeGenerated'] >= event_time)
        ]
        
        """
        Filters the delete_objects DataFrame to find operations by the same
          account where the object key matches file_key and the operation
          occurred after the extracted event_time
        """
        delete_candidates = delete_objects[
            (delete_objects['AccountName'] == account_name) &
            (delete_objects['ObjectKey'] == file_key) &
            (delete_objects['TimeGenerated'] >= event_time)
        ]
        """
        Checks if there are both upload (put_candidates) and delete
          (delete_candidates) operations, and if so, appends details about
          the original file, the account involved, the download time, the
          uploaded files, and a deletion confirmation to the results list
        """
        if not put_candidates.empty and not delete_candidates.empty:
            results.append({
                'OriginalFile': file_key,
                'AccountName': account_name,
                'DownloadedAt': event_time,
                'UploadedFiles': list(put_candidates['ObjectKey']),
                'Deleted': True
            })
    return pd.DataFrame(results)

analyze_storagebloblogs_data(df)