![](img/330-banner.png)

### Imports

In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import cross_validate, train_test_split
from sklearn.pipeline import Pipeline, make_pipeline

#### Python Installations
```
pip install Flask
pip install Flask-WTF
pip install joblib
```

### Building a model 

In [2]:
df = pd.read_csv("data/menstrual_data.csv")
# df = df.dropna(inplace=True)
df.head()
# sample_df = sample_df.rename(
#     columns={"cleaned_hm": "moment", "ground_truth_category": "target"}
# )
# sample_df.head()

Unnamed: 0,ClientID,CycleNumber,Group,CycleWithPeakorNot,ReproductiveCategory,LengthofCycle,MeanCycleLength,EstimatedDayofOvulation,LengthofLutealPhase,FirstDayofHigh,...,Method,Prevmethod,Methoddate,Whychart,Nextpreg,NextpregM,Spousesame,SpousesameM,Timeattemptpreg,BMI
0,nfp8122,1,0,1,0,29,27.33,17,12,12.0,...,9.0,,,2.0,7.0,7.0,1.0,1.0,0.0,21.254724111867
1,nfp8122,2,0,1,0,27,,15,12,13.0,...,,,,,,,,,,
2,nfp8122,3,0,1,0,29,,15,14,,...,,,,,,,,,,
3,nfp8122,4,0,1,0,27,,15,12,13.0,...,,,,,,,,,,
4,nfp8122,5,0,1,0,28,,16,12,12.0,...,,,,,,,,,,


In [3]:
df["ClientID"].value_counts()

ClientID
nfp8122    45
nfp8328    33
nfp8323    33
nfp8289    32
nfp8066    31
           ..
nfp8247     1
nfp8229     1
nfp8049     1
nfp8200     1
nfp8284     1
Name: count, Length: 159, dtype: int64

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1665 entries, 0 to 1664
Data columns (total 80 columns):
 #   Column                      Non-Null Count  Dtype 
---  ------                      --------------  ----- 
 0   ClientID                    1665 non-null   object
 1   CycleNumber                 1665 non-null   int64 
 2   Group                       1665 non-null   int64 
 3   CycleWithPeakorNot          1665 non-null   int64 
 4   ReproductiveCategory        1665 non-null   int64 
 5   LengthofCycle               1665 non-null   int64 
 6   MeanCycleLength             1665 non-null   object
 7   EstimatedDayofOvulation     1665 non-null   object
 8   LengthofLutealPhase         1665 non-null   object
 9   FirstDayofHigh              1665 non-null   object
 10  TotalNumberofHighDays       1665 non-null   object
 11  TotalHighPostPeak           1665 non-null   object
 12  TotalNumberofPeakDays       1665 non-null   object
 13  TotalDaysofFertility        1665 non-null   obje

In [5]:
# df['MeanCycleLength'] = df['MeanCycleLength'].replace('', np.nan)
num_feats = ['LengthofCycle', 'LengthofLutealPhase', 'FirstDayofHigh', 'TotalNumberofHighDays', 'TotalNumberofPeakDays','LengthofMenses', 'TotalMensesScore', 'Age', 'Height', 'Weight', 'Numberpreg', 'Abortions', 'BMI', 'MensesScoreDayOne', 'MensesScoreDayTwo', 'MensesScoreDayThree',
       'MensesScoreDayFour', 'MensesScoreDayFive', 'MensesScoreDaySix'] # impute menses scores with 0

for col in num_feats:
    df[col] = pd.to_numeric(df[col], errors='coerce')
# df['MeanCycleLength'] = pd.to_numeric(df['MeanCycleLength'], errors='coerce')

df['EstimatedDayofOvulation'] = pd.to_numeric(df['EstimatedDayofOvulation'], errors='coerce')


# df['MeanCycleLength'] = df['MeanCycleLength'].astype(float)
# df.astype({'MeanCycleLength': 'float'}).dtypes


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1665 entries, 0 to 1664
Data columns (total 80 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   ClientID                    1665 non-null   object 
 1   CycleNumber                 1665 non-null   int64  
 2   Group                       1665 non-null   int64  
 3   CycleWithPeakorNot          1665 non-null   int64  
 4   ReproductiveCategory        1665 non-null   int64  
 5   LengthofCycle               1665 non-null   int64  
 6   MeanCycleLength             1665 non-null   object 
 7   EstimatedDayofOvulation     1515 non-null   float64
 8   LengthofLutealPhase         1514 non-null   float64
 9   FirstDayofHigh              1407 non-null   float64
 10  TotalNumberofHighDays       1653 non-null   float64
 11  TotalHighPostPeak           1665 non-null   object 
 12  TotalNumberofPeakDays       1649 non-null   float64
 13  TotalDaysofFertility        1665 

In [7]:
df.describe(include="all")

Unnamed: 0,ClientID,CycleNumber,Group,CycleWithPeakorNot,ReproductiveCategory,LengthofCycle,MeanCycleLength,EstimatedDayofOvulation,LengthofLutealPhase,FirstDayofHigh,...,Method,Prevmethod,Methoddate,Whychart,Nextpreg,NextpregM,Spousesame,SpousesameM,Timeattemptpreg,BMI
count,1665,1665.0,1665.0,1665.0,1665.0,1665.0,1665.0,1515.0,1514.0,1407.0,...,1665.0,1665.0,1665.0,1665.0,1665.0,1665.0,1665.0,1665.0,1665.0,131.0
unique,159,,,,,,105.0,,,,...,8.0,9.0,16.0,6.0,8.0,8.0,4.0,3.0,2.0,
top,nfp8122,,,,,,,,,,...,,,,,,,,,,
freq,45,,,,,,1524.0,,,,...,1523.0,1635.0,1633.0,1523.0,1523.0,1524.0,1523.0,1523.0,1523.0,
mean,,8.040841,0.382583,0.912312,0.055255,29.299099,,15.963036,13.270806,11.761905,...,,,,,,,,,,25.206891
std,,6.593686,0.486164,0.282925,0.479706,3.887932,,3.548488,2.671602,3.253758,...,,,,,,,,,,5.541611
min,,1.0,0.0,0.0,0.0,18.0,,6.0,1.0,5.0,...,,,,,,,,,,16.826657
25%,,3.0,0.0,1.0,0.0,27.0,,14.0,12.0,10.0,...,,,,,,,,,,21.086579
50%,,7.0,0.0,1.0,0.0,29.0,,15.0,13.0,11.0,...,,,,,,,,,,24.138503
75%,,11.0,1.0,1.0,0.0,31.0,,18.0,14.0,13.0,...,,,,,,,,,,27.933911


In [8]:
# df = df.sort_values(by=['ClientID', 'CycleNumber'])

# df.groupby(['ClientID']).ffill()
# df['Height'] = df.groupby(['ClientID']).ffill()['Height']
# df['Weight'] = df.groupby(['ClientID']).ffill()['Weight']
# df['BMI'] = df.groupby(['ClientID']).ffill()['BMI']
# df['Age'] = df.groupby(['ClientID']).ffill()['Age']
# df
df = df.groupby('ClientID').ffill()

In [9]:
df.rename(columns={"EstimatedDayofOvulation": "target"}, inplace=True)
df.dropna(subset=["target"], inplace=True)
train_df, test_df = train_test_split(df, test_size=0.10, random_state=123)
train_df.head()

Unnamed: 0,CycleNumber,Group,CycleWithPeakorNot,ReproductiveCategory,LengthofCycle,MeanCycleLength,target,LengthofLutealPhase,FirstDayofHigh,TotalNumberofHighDays,...,Method,Prevmethod,Methoddate,Whychart,Nextpreg,NextpregM,Spousesame,SpousesameM,Timeattemptpreg,BMI
192,8,1,1,0,31,,16.0,15.0,15.0,1.0,...,,,,,,,,,,30.110859
137,1,0,1,1,33,32.6,23.0,10.0,18.0,10.0,...,9.0,,,3.0,0.0,0.0,1.0,1.0,0.0,18.775006
12,13,0,1,0,27,,15.0,12.0,10.0,5.0,...,,,,,,,,,,21.254724
1601,26,0,1,0,36,,21.0,15.0,14.0,7.0,...,,,,,,,,,,
481,10,1,1,0,27,,13.0,14.0,10.0,3.0,...,,,,,,,,,,22.860302


In [10]:
train_df.shape

(1483, 79)

In [11]:
test_df.shape

(165, 79)

### Target Class
Our target class is EstimatedDayofOvulation. We will use the other dataset features to predict the estimated ovulation day for an app user.

In [12]:
X_train = train_df.drop(columns=["target"])
y_train = train_df["target"]

X_test = test_df.drop(columns=["target"])
y_test = test_df["target"]

In [13]:
y_train.isna().sum()

0

In [14]:
X_train.columns

Index(['CycleNumber', 'Group', 'CycleWithPeakorNot', 'ReproductiveCategory',
       'LengthofCycle', 'MeanCycleLength', 'LengthofLutealPhase',
       'FirstDayofHigh', 'TotalNumberofHighDays', 'TotalHighPostPeak',
       'TotalNumberofPeakDays', 'TotalDaysofFertility',
       'TotalFertilityFormula', 'LengthofMenses', 'MeanMensesLength',
       'MensesScoreDayOne', 'MensesScoreDayTwo', 'MensesScoreDayThree',
       'MensesScoreDayFour', 'MensesScoreDayFive', 'MensesScoreDaySix',
       'MensesScoreDaySeven', 'MensesScoreDayEight', 'MensesScoreDayNine',
       'MensesScoreDayTen', 'MensesScoreDay11', 'MensesScoreDay12',
       'MensesScoreDay13', 'MensesScoreDay14', 'MensesScoreDay15',
       'TotalMensesScore', 'MeanBleedingIntensity',
       'NumberofDaysofIntercourse', 'IntercourseInFertileWindow',
       'UnusualBleeding', 'PhasesBleeding', 'IntercourseDuringUnusBleed',
       'Age', 'AgeM', 'Maristatus', 'MaristatusM', 'Yearsmarried', 'Wedding',
       'Religion', 'ReligionM', 'Eth

### Feature Transformation

We will use simple imputer and standard scaler to transform our data.

In [15]:
num_feats = ['LengthofCycle', 'LengthofLutealPhase', 'FirstDayofHigh', 'TotalNumberofHighDays', 'TotalNumberofPeakDays','LengthofMenses', 'TotalMensesScore', 'Age', 'Height', 'Weight', 'Numberpreg', 'Abortions', 'BMI', 'MensesScoreDayOne', 'MensesScoreDayTwo', 'MensesScoreDayThree',
       'MensesScoreDayFour', 'MensesScoreDayFive', 'MensesScoreDaySix'] # impute menses scores with 0

# passthrough_feats = ['Group', 'CycleWithPeakorNot']
# drop the rest

# height, weight, BMI, age need to carry down
# 19 columns 

In [16]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.compose import make_column_transformer

num_transformer = make_pipeline(SimpleImputer(strategy="median"), StandardScaler())

In [17]:
preprocessor = make_column_transformer(
    (num_transformer, num_feats),
    # ("passthrough", passthrough_feats),
    # other features are dropped
)

In [18]:
X_train.shape

(1483, 78)

In [19]:
# X_train = preprocessor.fit_transform(X_train)
# X_test = preprocessor.transform(X_test)
# X_train_df = pd.DataFrame(X_train, columns=num_feats)

In [20]:
# X_train[0]

In [21]:
y_train

192     16.0
137     23.0
12      15.0
1601    21.0
481     13.0
        ... 
1136    14.0
1362    12.0
1423    18.0
1405    16.0
1551    14.0
Name: target, Length: 1483, dtype: float64

In [22]:
from sklearn.linear_model import Ridge

pipe_lr = make_pipeline(preprocessor, Ridge())

In [23]:
pipe_lr.fit(X_train, y_train);

In [24]:
pipe_lr.score(X_train, y_train)

0.8598035729844141

In [25]:
pipe_lr.score(X_test, y_test)

0.9413972573033073

### Saving the model 

- If we want to deploy a model, we need to save it. 
- We are using `joblib` for that. 

In [26]:
import joblib

with open("web_api/ovulationpredictor.joblib", "wb") as f:
    joblib.dump(pipe_lr, f)
with open("web_application/ovulationpredictor.joblib", "wb") as f:
    joblib.dump(pipe_lr, f)

We'll define a function that accepts input data as a dictionary and returns a prediction:

### Loading our saved model 

Let's write a function to get predictions. 

In [29]:
def return_prediction(model, input_list):   
    input = pd.DataFrame(input_list, index=[0])
    prediction = model.predict(input)[0]
    return prediction

In [30]:
from array import array

model = joblib.load("web_api/ovulationpredictor.joblib") # load the saved model

input_list = { # sample data
 'LengthofCycle': 0.3228787189273703,
 'LengthofLutealPhase': 0.9539014718391354,
 'FirstDayofHigh': 0.8783249921503523,
 'TotalNumberofHighDays': 0.6267111465085908,
 'TotalNumberofPeakDays': 0.9608764899313866,
 'LengthofMenses': 0.4591566374790159,
 'TotalMensesScore': 0.6019266855267724,
 'Age': 0.5527174199241642,
 'Height': 0.7752654666120737,
 'Weight': 0.5712515184134752,
 'Numberpreg': 0.8577488445619815,
 'Abortions': 0.1092300656164632,
 'BMI': 0.085970773511727,
 'MensesScoreDayOne': 0.31597228575643843,
 'MensesScoreDayTwo': 0.3943271147273022,
 'MensesScoreDayThree': 0.8378335499479616,
 'MensesScoreDayFour': 0.7607489669348005,
 'MensesScoreDayFive': 0.40170033015553275,
 'MensesScoreDaySix': 0.8547654462351196
}

# input_list = {'LengthofCycle': 26, 'LengthofLutealPhase': 5, 'FirstDayofHigh': 7,'TotalNumberofHighDays': 12, 'TotalNumberofPeakDays': 8, 'LengthofMenses': 7,'TotalMensesScore': 8, 'Age': 15, 'Height': 20, 'Weight': 15, 'Numberpreg': 0,'Abortions': 1, 'BMI': 27, 'MensesScoreDayOne': 1, 'MensesScoreDayTwo': 2,'MensesScoreDayThree', 'MensesScoreDayFour', 'MensesScoreDayFive',\n       'MensesScoreDaySix'],\n  }
return_prediction(model, input_list)

1.6754392644317733

This function appears in the `app.py` that we'll be using shortly.

### (Optional) Setting up a directory structure and environment

- We need a specific directory structure to help us easily deploy our machine learning model. 
- This is already set up in this repo.

```
├── web_api
│   └── moment_predictor.joblib  # this is the machine learning model we have built locally
│   └── app.py  # the file that defines our flask API
│   └── Procfile  # required to help start flask app
│   └── requirements.txt  # file containing required packages
│   
└── web_application
    └── moment_predictor.joblib  # this is the machine learning model we have built locally
    └── app.py  # the file that defines our flask API
    └── Procfile  # required to help start flask app
    └── requirements.txt  # file containing required packages
    └── templates  # contains HTML templates to help us build the web application
    │   └── style.css  # css template to be used in web application
    └── static  # this subdirectory contains CSS style sheets
        └── home.html  # html template to be used in web application
        └── prediction.html  # html template to be used in web application
```

### Model deployment

We have two options for deploying our moment prediction model. We can:

1. Build a web application (app) with a HTML user-interface that interacts directly with our model.
2. Develop a RESTful (REST stands for REpresentational State Transfer) web API that accepts HTTP requests in the form of input data and returns a prediction.

We'll explore both options below.

### Building and deploying a web app

|      | on localhost (my laptop) | on server (the interwebs) |
|------|--------------------------|--------------------------|
| app  |        ❓      |                          |
| API  |                          |                          |


- Flask can create entire web applications.
- We need to link our code to some html and css to create our web application.
- We will use Flask to create a html form, accept data submitted to the form, and return a prediction using the submitted data. 
- Again, I won't go into too much detail here, but we can open up `web_application/` and take a quick look.

- If you want to learn more about Flask, see:
  - [Flask tutorial video series by Corey Schafer](https://www.youtube.com/playlist?list=PL-osiE80TeTs4UjLw5MM6OjgkjFeUxCYH)
  - [Flask docs](https://flask.palletsprojects.com/en/1.1.x/)
  - [Flask tutorial by Miguel Grinberg](https://blog.miguelgrinberg.com/post/the-flask-mega-tutorial-part-i-hello-world)


- Let's try `web_application/app.py` that handles this part.
- We can open it up here in Jupyter and take a look.
- If we run `python app.py` we'll bring it to life.

### Web app on local server

1. Go to the terminal. 
2. Navigate to the `web_application` directory. 
3. Run the following to make the app alive: `python app.py`. 
    - If you get an error, you may need to install those extra packages and make sure you have the environment loaded.
4. Now you should be able to access the app at: `http://127.0.0.1:5000/` or `http://localhost:5000/`. 

### Web app on a real server

- If you want people to use your app/model, you would probably want to put it on a real server and not your laptop so that it's live all the time. 

|      | on localhost (my laptop) | on server (the interwebs)|
|------|--------------------------|--------------------------|
| app  |              ✅            |       ❓       |
| API  |                          |                          |

- We'll use [Render](https://render.com/) for this. 


### Render set-up (I already did these):

1. Go to [render](https://dashboard.render.com/), log-in
2. Click on "New" and create a web service.  

![](img/render-new-app.png)
<!-- <img src="img/render-new-app.png" width="600"> -->

### Demo of creating a new web service
- Create an associated GitHub repository. 
- I've put the contents from `web_application` folder [in this repository](https://github.com/kvarada/ml-deployment-example).
- Then you connect that repository to your new web service in render. 

![](img/github-to-render.png)
- Once you fill in all the information, it'll create an environment for you using `requirement.txt` and if everything goes well, your web service will be live.
- You'll see a link at the top which you can share with your friends. 

- Try it out for your own machine learning applications! 
- This is nice! If you develop a model and you want your friends to try it out without installing anything on their local computers, you can do this.  

### API on the localhost 

- Often you want other people to be able to use your models in their applications.
- We can do this by creating an **API**. 
- If you don't know what an API is, that's OK.
  - For our purposes, it's something that exists at a particular address, that can accept information and return information.
  - Sort of like a function but not Python-specific and potentially accessible by anyone on the internet.

|      | on localhost (my laptop) | on server (the interwebs) |
|------|--------------------------|--------------------------|
| app  |              ✅            |        ✅       |
| API  |       ❓      |                          |




- Go to the terminal. 
- Navigate to `web_api` folder in this repo.
- Run the following to make the api alive: `python app.py`

(Note that for more complex applications, you may choose to containerize everything in a Docker container to deploy to render).

### Sending a request to the API

- We have a RESTful (REST stands for REpresentational State Transfer) web API that accepts HTTP requests in the form of input data and returns a prediction.
- Now you can send requests to the API and get predictions. 

In [None]:
!curl -d '{"text":"The students will be submitting their last homework assignment soon!"}' -H "Content-Type: application/json" -X POST http://localhost:5000/predict

- `curl` (stands for client URL) is a tool for transferring data using various network protocols.

Here's what each part does in the command: 

- `-d` $\rightarrow$ allows you to send a POST request with the specified data.
- `'{"text":"The students will be submitting their last homework assignment soon!"}'` $\rightarrow$ the JSON data you're sending.
- `-H` $\rightarrow$ specifies a custom header, in this case, Content-Type: application/json to tell the server you're sending JSON.
- `-X POST` $\rightarrow$ explicitly sets the HTTP method to POST.
- `http://localhost:5000/predict` $\rightarrow$ the URL where you're sending the request.

Make sure you have `app.py` running at port 5000 when you execute the curl command. 

Okay, so we have a working API running on localhost, but we don't want to host this service on my laptop!

### Deploying the API on a server

- We now want to deploy it on a "real" server so others can send it requests. 
- We will use render to deploy our app but you could also use other services such as AWS.

|      | on localhost (my laptop) | on server (the interwebs) |
|------|--------------------------|--------------------------|
| app  |              ✅            |        ✅       |
| API  |       ✅      |                  ❓        |



Follow the instructions [here](https://render.com/docs/api) to try the API on server. 

### Discussion

- There are many ways to deploy a model; a RESTful API is very common and convenient. 
- As you can see, a simple deployment is fairly straightward. 
- However, there may be other considerations such as:
  - Privacy/security
  - Scaling to large number of requests
  - Error handling
  - Real-time / speed
  - Low-resource environments (e.g. edge computing)
  - etc.

```{note}
The order of steps is approximate, and some steps may need to be repeated during prototyping, experimentation, and as needed over time. 
```

<br><br>