# Top Storage Blob Uploaders

## Step 1: Import Python Packages

In [None]:
!pip install azure-identity azure-mgmt-loganalytics azure-monitor-query azure-mgmt-subscription &>/dev/null

from azure.identity import DeviceCodeCredential                   # Needed to authenticate with Azure
from azure.mgmt.loganalytics import LogAnalyticsManagementClient  # Allows interaction with Log Analytics
from azure.mgmt.subscription import SubscriptionClient            # Allows us to get the subscription ID
from azure.monitor.query import LogsQueryClient                   # Used to issue queries to Log Analytics
from datetime import timedelta, datetime                          # Time conversions
import pandas as pd                                               # Used to manipulate data

## Step 2: Authenticate with Azure

In [None]:
credential = DeviceCodeCredential()
credential.authenticate()
subscriptions_client = SubscriptionClient(credential)
query_client = LogsQueryClient(credential)

## Step 3: Query for PutBlob

In [None]:
# Get first subscription
subscriptions = subscriptions_client.subscriptions.list()
for subscription in subscriptions:
    subscription_id = subscription.subscription_id
    break

# Get workspace_id
log_analytics_client = LogAnalyticsManagementClient(credential, subscription_id)
workspaces = log_analytics_client.workspaces.list()
for workspace in workspaces:
    if workspace.name == 'sherlocklaw':
        workspace_id = workspace.customer_id

# Get all StorageBlobLog data within the last day
query = """
StorageBlobLogs
| where OperationName == "PutBlob"
"""
response = query_client.query_workspace(workspace_id, query, timespan=timedelta(days=1))
data = response.tables
for table in data:
    df = pd.DataFrame(data=table.rows, columns=table.columns)
pd.set_option('display.max_colwidth', None)
df

## Step 4: Detection 1: Rank Top Uploaders by Upload Occurrences

In [None]:
df[['CallerIpAddress', 'CallerTcpPort']] = df['CallerIpAddress'].str.split(':', expand=True)
counts = df['CallerIpAddress'].value_counts().reset_index()
counts.columns = ['source','occurrences']
counts

## Step 5: Detection 2: Rank Top Uploaders by Sum of Uploaded File Sizes

In [None]:
sum_by_ip = df.groupby('CallerIpAddress')['RequestBodySize'].sum().reset_index()
sum_by_ip.columns = ['source', 'totalDataIn']
sum_by_ip