In [2]:
import pandas as pd
import numpy as np
import boto3
import json

from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import fetch_20newsgroups
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import preprocessing
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin

# Use cases

Potential ideas:

 * You want to create a side project and leave it out there without paying server costs
 * You are an indie hacker and want to sell something, maybe an API, you want scalability but no baseline costs
 * You are a DS and need to deploy a model internally and creating a Python server is not allowed
 * You are a MLE and want to create a system where DSs just uploads a pickle file and you have an endpoint ready


When not to use it:

 * When creating Flask / FastAPI applications is allowed, within budget
 * When there is infrastructure to scale webservers (e.g. Kubernetes)

In [3]:
dataset = fetch_20newsgroups()

In [18]:
print(dataset.DESCR[:1086])

.. _20newsgroups_dataset:

The 20 newsgroups text dataset
------------------------------

The 20 newsgroups dataset comprises around 18000 newsgroups posts on
20 topics split in two subsets: one for training (or development)
and the other one for testing (or for performance evaluation). The split
between the train and test set is based upon a messages posted before
and after a specific date.

This module contains two loaders. The first one,
:func:`sklearn.datasets.fetch_20newsgroups`,
returns a list of the raw texts that can be fed to text feature
extractors such as :class:`~sklearn.feature_extraction.text.CountVectorizer`
with custom parameters so as to extract feature vectors.
The second one, :func:`sklearn.datasets.fetch_20newsgroups_vectorized`,
returns ready-to-use features, i.e., it is not necessary to use a feature
extractor.

**Data Set Characteristics:**

    Classes                     20
    Samples total            18846
    Dimensionality               1
    Features      

In [47]:
X = dataset.data
y = np.array([dataset.target_names[i] for i in dataset.target])

In [74]:
class InverseLabelEncoder(TransformerMixin, BaseEstimator):
    def __init__(self, le):
        super().__init__()
        self.le = le
                 
    def fit(self, _, y):
        return y
                 
    def predict(self, _, y):
        return self.le.inverse_transform(y)

In [75]:
tfidf = TfidfVectorizer(stop_words='english', lowercase=True, max_features=1000)
lr = LogisticRegression(C=5e1, solver='lbfgs', multi_class='multinomial', random_state=42, n_jobs=4)
le = preprocessing.LabelEncoder()
le.fit(y)

inv_le = InverseLabelEncoder(le)

In [80]:
pipe = Pipeline([('tfidf', tfidf), ('lr', lr)])

In [81]:
model = pipe.fit(X, le.fit_transform(y))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [83]:
def predict(X, pipe, le):
    return le.inverse_transform(pipe.predict(X))

In [84]:
predict(X, pipe, le)

array(['rec.autos', 'comp.sys.mac.hardware', 'comp.sys.mac.hardware', ...,
       'comp.sys.ibm.pc.hardware', 'comp.graphics', 'rec.motorcycles'],
      dtype='<U24')

In [88]:
eli5.show_weights(lr)

NameError: name 'eli5' is not defined

# Creating IAM role

First, make sure if you have AWS credentials in place. For example, you can install the `aws` command line and run aws configure.

In [2]:
iam_client = boto3.client('iam')

In [3]:
# Specify the role name and trust policy for the Lambda service
role_name = 'lambda-execution-role'
trust_policy = {
    'Version': '2012-10-17',
    'Statement': [
        {
            'Effect': 'Allow',
            'Principal': {'Service': 'lambda.amazonaws.com'},
            'Action': 'sts:AssumeRole'
        }
    ]
}

In [4]:
# Create the role
response = iam_client.create_role(
    RoleName=role_name,
    AssumeRolePolicyDocument=json.dumps(trust_policy),
    Description='Execution role for Lambda function',
)

In [5]:
response

{'Role': {'Path': '/',
  'RoleName': 'lambda-execution-role',
  'RoleId': 'AROAU6CNRNLCNUSYPFME5',
  'Arn': 'arn:aws:iam::339465038532:role/lambda-execution-role',
  'CreateDate': datetime.datetime(2023, 5, 1, 13, 38, 57, tzinfo=tzutc()),
  'AssumeRolePolicyDocument': {'Version': '2012-10-17',
   'Statement': [{'Effect': 'Allow',
     'Principal': {'Service': 'lambda.amazonaws.com'},
     'Action': 'sts:AssumeRole'}]}},
 'ResponseMetadata': {'RequestId': 'b6180fb8-a694-4c6f-9578-1cebd7458a29',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'b6180fb8-a694-4c6f-9578-1cebd7458a29',
   'content-type': 'text/xml',
   'content-length': '796',
   'date': 'Mon, 01 May 2023 13:38:56 GMT'},
  'RetryAttempts': 0}}

In [6]:
# Get the role ARN
role_arn = response['Role']['Arn']

# Attach the AWSLambdaBasicExecutionRole policy
policy_arn = 'arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole'
iam_client.attach_role_policy(
    RoleName=role_name,
    PolicyArn=policy_arn
)

print(f'Role created: {role_arn}')

Role created: arn:aws:iam::339465038532:role/lambda-execution-role


# Setting up Lambda function

In [7]:
lambda_client = boto3.client('lambda')

In [8]:
lambda_file = "my_lambda_function.py"

with open(lambda_file, "w") as f:
    f.write("import json\n\n")
    f.write("def lambda_handler(event, context):\n")
    f.write("    name = event.get('name', 'World')\n")
    f.write("    message = f'Hello, {name}!'\n")
    f.write("    return {\n")
    f.write("        'statusCode': 200,\n")
    f.write("        'body': json.dumps({\n")
    f.write("            'message': message\n")
    f.write("        })\n")
    f.write("    }\n")

In [9]:
import zipfile

zip_file = 'my_lambda_function.zip'

with zipfile.ZipFile(zip_file, 'w') as z:
    z.write(lambda_file)

In [None]:
# Read the zipped deployment package
with open(zip_file, 'rb') as f:
    deployment_package = f.read()

In [11]:
function_name = 'lambda-function-ML-model'

# Create the Lambda function
response = lambda_client.create_function(
    FunctionName=function_name,
    Runtime='python3.10',
    Role=role_arn,
    Handler='my_lambda_function.lambda_handler',
    Code={
        'ZipFile': deployment_package
    },
    Description='A simple Lambda function',
    Timeout=10,
    MemorySize=128,
    Publish=True,
)


In [12]:
print(json.dumps(response, indent=2))


{
  "ResponseMetadata": {
    "RequestId": "93fd38f8-1af3-46a1-8c0c-cb20e2fc4239",
    "HTTPStatusCode": 201,
    "HTTPHeaders": {
      "date": "Mon, 01 May 2023 13:42:31 GMT",
      "content-type": "application/json",
      "content-length": "1249",
      "connection": "keep-alive",
      "x-amzn-requestid": "93fd38f8-1af3-46a1-8c0c-cb20e2fc4239"
    },
    "RetryAttempts": 0
  },
  "FunctionName": "lambda-function-ML-model",
  "FunctionArn": "arn:aws:lambda:eu-west-1:339465038532:function:lambda-function-ML-model",
  "Runtime": "python3.8",
  "Role": "arn:aws:iam::339465038532:role/lambda-execution-role",
  "Handler": "my_lambda_function.lambda_handler",
  "CodeSize": 376,
  "Description": "A simple Lambda function",
  "Timeout": 10,
  "MemorySize": 128,
  "LastModified": "2023-05-01T13:42:31.515+0000",
  "CodeSha256": "HIJBq+/2eWi17MClpN0sP+jXlCuDOT2m8oNhTzD/AZI=",
  "Version": "1",
  "TracingConfig": {
    "Mode": "PassThrough"
  },
  "RevisionId": "628648dc-d31c-405b-a708-f69248a

In [13]:
# Prepare the event to pass to the Lambda function
event = {
    'name': 'John'
}

# Invoke the Lambda function
response = lambda_client.invoke(
    FunctionName=function_name,
    InvocationType='RequestResponse',
    LogType='Tail',
    Payload=json.dumps(event)
)

# Get the response from the Lambda function
result = json.loads(response['Payload'].read())

print(result)

{'statusCode': 200, 'body': '{"message": "Hello, John!"}'}


# Setting up API gateway

# Testing

# FastAPI comparison