# Ransomware Detection Lab

## Step 1: Import Python Packages

In [None]:
import boto3                     # Amazon Python SDK
from datetime import datetime    # DateTime package for time conversions
import json                      # Needed to JSONify data for pandas
import pandas as pd              # Used to manipulate data
import time                      # Used to wait for query to finish

## Step 2: Query for Downloaded Files

In [None]:
# Get current time
end_time = datetime.now().timestamp()

# Get 24 hours prior
start_time = end_time - 86400

# Create CloudWatch Logs client and get S3 data
client = boto3.client('logs')
response = client.start_query(
    queryLanguage='CWLI',
    logGroupName='/baker221b/cloudtrail',
    startTime=int(start_time),
    endTime=int(end_time),
    queryString='fields @message | filter eventSource == "s3.amazonaws.com"'
)
time.sleep(5)
query_id = response['queryId']
response = client.get_query_results(
    queryId = query_id
)
results = response['results']

# Shove data into a pandas DataFrame
results_with_message = []
for result in results:
    results_with_message.append(result[0]['value'])
data = [json.loads(item) for item in results_with_message]
df = pd.read_json(json.dumps(data), orient='records')
expanded_request = pd.json_normalize(df['requestParameters'])
expanded_user_identity = pd.json_normalize(df['userIdentity'])
df = pd.concat([df, expanded_request, expanded_user_identity], axis=1)
df

## Step 3: Detect Ransomware

In [None]:
def analyze_cloudtrail_data(df):
    """Function to discover ransomware"""
    
    # Store GetObject events into a separate DataFrame
    get_objects = df[df['eventName'] == 'GetObject']
    get_objects = get_objects.copy()
    get_objects['eventTime'] = get_objects['eventTime'].apply(lambda x: datetime.strptime(x, '%Y-%m-%dT%H:%M:%SZ'))
    
    # Store PutObject events into a separate DataFrame
    put_objects = df[df['eventName'] == 'PutObject']
    put_objects = put_objects.copy()
    put_objects['eventTime'] = put_objects['eventTime'].apply(lambda x: datetime.strptime(x, '%Y-%m-%dT%H:%M:%SZ'))
    
    # Store DeleteObject events into a separate DataFrame
    delete_objects = df[df['eventName'] == 'DeleteObject']
    delete_objects = delete_objects.copy()
    delete_objects['eventTime'] = delete_objects['eventTime'].apply(lambda x: datetime.strptime(x, '%Y-%m-%dT%H:%M:%SZ'))
    
    # Create empty results list
    results = []
    
    # Iterate through every GetObject record
    for _, get_row in get_objects.iterrows():
        """
        Iterate through each row in get_objects and extract relevant details
          (key, bucketName, eventTime)
        """
        file_key = get_row['key']
        bucket_name = get_row['bucketName']
        event_time = get_row['eventTime']
        
        """
        Filters put_objects to find operations by the same account where the
          object name starts with but does not end with the file_key and
          occurred after the extracted event_time
        """
        put_candidates = put_objects[
            (put_objects['bucketName'] == bucket_name) &
            (~put_objects['key'].str.endswith(file_key)) &
            (put_objects['key'].str.startswith(file_key)) &
            (put_objects['eventTime'] >= event_time)
        ]
        
        """
        Filters the delete_objects DataFrame to find operations by the same
          account where the object key matches file_key and the operation
          occurred after the extracted event_time
        """
        delete_candidates = delete_objects[
            (delete_objects['bucketName'] == bucket_name) &
            (delete_objects['key'] == file_key) &
            (delete_objects['eventTime'] >= event_time)
        ]
        
        """
        Checks if there are both upload (put_candidates) and delete
          (delete_candidates) operations, and if so, appends details about
          the original file, the bucket involved, the download time, the
          uploaded files, and a deletion confirmation to the results list
        """
        if not put_candidates.empty and not delete_candidates.empty:
            results.append({
                'OriginalFile': file_key,
                'BucketName': bucket_name,
                'DownloadedAt': event_time,
                'UploadedFiles': list(put_candidates['key']),
                'Deleted': True
            })
    return pd.DataFrame(results)
analyze_cloudtrail_data(df)