### Import all the libraries

In [None]:

import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from ucimlrepo import fetch_ucirepo 
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import mlflow
import mlflow.tensorflow
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
import mlflow.pyfunc

   X  Y month  day  FFMC   DMC     DC  ISI  temp  RH  wind  rain  area
0  7  5   mar  fri  86.2  26.2   94.3  5.1   8.2  51   6.7   0.0   0.0
1  7  4   oct  tue  90.6  35.4  669.1  6.7  18.0  33   0.9   0.0   0.0
2  7  4   oct  sat  90.6  43.7  686.9  6.7  14.6  33   1.3   0.0   0.0
3  8  6   mar  fri  91.7  33.3   77.5  9.0   8.3  97   4.0   0.2   0.0
4  8  6   mar  sun  89.3  51.3  102.2  9.6  11.4  99   1.8   0.0   0.0
after area log transformation: 
0      0.009950
1      0.009950
2      0.009950
3      0.009950
4      0.009950
         ...   
195    2.304583
196    2.391511
197    2.403335
198    2.419479
199    2.426571
Name: area, Length: 200, dtype: float64
          X         Y      FFMC       DMC        DC       ISI      temp  \
0  1.008313  0.569860 -0.805959 -1.323326 -1.830477 -0.860946 -1.842640   
1  1.008313 -0.244001 -0.008102 -1.179541  0.488891 -0.509688 -0.153278   
2  1.008313 -0.244001 -0.008102 -1.049822  0.560715 -0.509688 -0.739383   
3  1.440925  1.383722  0.1

### Function to create a Neural Network Model

In [28]:
def create_model(input_dim):
    model = Sequential()
    model.add(Dense(64, activation='relu', input_dim=input_dim))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1))
    return model

### Download the dataset from its repo

In [None]:
# Fetch the dataset
forest_fires = fetch_ucirepo(id=162)
forestFiresDataframe = pd.DataFrame(forest_fires.data['features'])
forestFiresDataframe['area'] = forest_fires.data['targets']
print(forestFiresDataframe.head())

### Define the categorical and numerical features

In [None]:
categoricalFeatures = ['month', 'day']
numericalFeatures = ['X', 'Y', 'FFMC', 'DMC', 'DC', 'ISI', 'temp', 'RH']

### Handle missing values 

In [None]:
# Although I noticed none in this dataset but it is a good practice to check for missing values :)
imputer = SimpleImputer(strategy='most_frequent')
forestFiresDataframe[categoricalFeatures] = imputer.fit_transform(forestFiresDataframe[categoricalFeatures])
forestFiresDataframe[numericalFeatures] = imputer.fit_transform(forestFiresDataframe[numericalFeatures])

### Log transformation for target variable i.e. area

In [None]:
# Area is skewed towards 0, so we apply a log transformation to it to make it more normally distributed
forestFiresDataframe['area'] = forestFiresDataframe['area'].replace(0,1.01).fillna(1.01).apply(lambda x: np.log(x))
print("After area log transformation: ")
print(forestFiresDataframe['area'].head(200))

### Encode categorical variables

In [None]:
# Apply one-hot encoding to categorical features, and scale numerical features
# We'll end up with a total of 27 columns after one-hot encoding
encoder = OneHotEncoder(handle_unknown='ignore')
encodedCategoricalFeatures = encoder.fit_transform(forestFiresDataframe[categoricalFeatures]).toarray()

# Scale numerical features
scaler = StandardScaler()
scaledNumericalFeatures = scaler.fit_transform(forestFiresDataframe[numericalFeatures])

# Combine the processed features
processedData = np.hstack((scaledNumericalFeatures, encodedCategoricalFeatures))


### Convert to Dataframe

In [None]:

# Convert to DataFrame for better readability
processedDataframe = pd.DataFrame(processedData, columns=numericalFeatures + encoder.get_feature_names_out(categoricalFeatures).tolist())
print(processedDataframe.head())


### Divide features and target variable into separate variables

In [None]:

X = processedDataframe
y = forestFiresDataframe['area']


### Split data into taining and testing

In [None]:

# Split into training and test sets
featuresTrain, featuresTest, targetsTrain, targetsTest = train_test_split(X, y, test_size=0.2, random_state=None)

### Create an MLFlow experiment

In [None]:
# We set this experiment before running the model, so mlflow can log the model to this experiment
mlflow.set_experiment("neural_network_regression")

<Experiment: artifact_location='file:///c:/Users/sajid/Desktop/Flow/mlruns/895373400277021307', creation_time=1734545721621, experiment_id='895373400277021307', last_update_time=1734545721621, lifecycle_stage='active', name='neural_network_regression', tags={}>

### Define model parameters

In [30]:
input_dim = featuresTrain.shape[1]
epochs = 50
batch_size = 32
learning_rate = 0.001

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 24ms/step - loss: 3.0879 - val_loss: 2.0703
Epoch 2/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 2.5141 - val_loss: 2.0452
Epoch 3/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 1.8785 - val_loss: 2.1032
Epoch 4/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 1.9966 - val_loss: 2.1197
Epoch 5/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 2.0353 - val_loss: 2.1476
Epoch 6/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 1.9232 - val_loss: 2.1300
Epoch 7/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 1.8916 - val_loss: 2.1411
Epoch 8/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 1.7858 - val_loss: 2.1619
Epoch 9/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 



### Compile and train neural network model while logging everything with MLFlow

In [None]:

# End any active MLflow run, faced some issues with this in the past so better end the previous run
mlflow.end_run()

# Start an MLflow run
with mlflow.start_run():
    # Log parameters
    mlflow.log_param("input_dim", input_dim)
    mlflow.log_param("epochs", epochs)
    mlflow.log_param("batch_size", batch_size)
    mlflow.log_param("learning_rate", learning_rate)

    # Create and compile the model
    model = create_model(input_dim)
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss=MeanSquaredError())

    # Train the model
    history = model.fit(featuresTrain, targetsTrain, epochs=epochs, batch_size=batch_size, validation_data=(featuresTest, targetsTest))

    # Log the model
    mlflow.tensorflow.log_model(model, "model")

    # Log metrics
    for epoch in range(epochs):
        mlflow.log_metric("loss", history.history['loss'][epoch], step=epoch)
        mlflow.log_metric("val_loss", history.history['val_loss'][epoch], step=epoch)

### Make a test prediction on the logged model

In [None]:
model_uri = 'runs:/157958cbd0e64c819a98fc9693844cd8/model'

# Load the model
model = mlflow.pyfunc.load_model(model_uri)

# Define an input example
INPUT_EXAMPLE = featuresTest.iloc[0].to_dict()

# Convert the input example to a DataFrame
input_df = pd.DataFrame([INPUT_EXAMPLE])

# Validate the serving payload works on the model
prediction = model.predict(input_df)
print(prediction)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step
          0
0  0.458066


### Just a test prediction on the logged model

In [None]:
INPUT_EXAMPLE = featuresTest.iloc[0].to_dict()

# Convert the input example to a DataFrame
input_df = pd.DataFrame([INPUT_EXAMPLE])

# Validate the serving payload works on the model
prediction = model.predict(input_df)
print(prediction)

### Make predictions from the logged model, on the whole test set

In [None]:
logged_model = 'runs:/157958cbd0e64c819a98fc9693844cd8/model'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict on a Pandas DataFrame.
loaded_model.predict(featuresTest)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step


Unnamed: 0,0
393,0.458066
196,0.841729
493,0.139625
289,0.659365
166,0.824134
...,...
241,-0.178453
42,0.549066
5,1.350644
124,1.394855


### Just another experiment with mlflow to log predictions file as an artifact on the current run of the model

In [None]:
logged_model = 'runs:/157958cbd0e64c819a98fc9693844cd8/model'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict on a Pandas DataFrame.
import pandas as pd
predictions = loaded_model.predict(featuresTest)

# Save predictions to a CSV file
predictions_df = predictions.rename(columns={0: "predictions"})
predictions_df.to_csv("predictions.csv", index=False)

# Log the predictions file
mlflow.log_artifact("predictions.csv")

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step


### So far, we're done with the training and logging of our model with MLFlow. We've trained our Neural Network and Logged it using MLFlow. The model metrics etc and its artifacts could be observed on the MLFlow UI as in the attached screenshots.

### Next Step is the deployment of this logged model which I have done using Streamlit. In streamlit code we'll use the run id of the model from this current run to get predictions from this trained model using unseen data (That unseen data is being retrieved from the user using the streamlit interface). Streamlit then communicates with the logged model and get prediction of forest fire area from that model using the input features which were entered by the user on the streamlit UI.

### After deployment on localhost, I have tried various methods for cloud deployment. The first one being Streamlit's own cloud. 


### Method to deploy on streamlit cloud is:
1. Create an account on streamlit.io
2. Push your whole code repo alongwith mlflow runs directory, to GitHub
3. Click on Create app button on the top right of the streamlit.io page
4. Now select Deploy a public app from Github
5. Now just follow the directions to connect your github repo to streamlit
6. After some time of being in the oven the app will be deployed to streamlit cloud
7. You'll get a URL for the app, in previous steps while connecting github repo you could even set a custom URL like I did
8. Just copy and paste the URL in any browser and you can see your app running and you can interact with your model using the streamlit interface and get predictions on fresh unseen data.

Here's the link to my streamlit app for this model: https://ds24-as2-c24muhna.streamlit.app/

### Method to deploy using Ngrok
First of all I don't prefer this method since it's an old school thing for just demonstration purposes, has no real industry value. I did it back in my Bachelors project for demonstration of my CNN model by making it interact with a mobile app.
Here are the steps for ngrok
1. Install ngrok on your system
2. Navigate to the ngrok directory and activate it
3. Save or export your model as an h5 file
4. Ngrok will expose a port on your system to be accessible through internet via an ip that ngrok provides you. This IP is kind of temporary since it'll be valid as long as your system or ngrok doesn't halt.
5. You need to configure a Flask API as well with ngrok, Flask will get predictions from the model while ngrok will serve those results over the IP:port that you exposed
6. You can catch the results from the requests and show them anywhere like in a web app or a mobile app 

### Deploy on cloud using Docker image, GitHub Actions CI/CD and Azure Cloud
1. Create a ci/cd pipleline, it's a yml file in the .github/workflows directory. The script in this file connects the docker hub image and the azure cloud and github is the mediator between them.
2. As soon as we push a new code update, github will use this pipeline to build and deploy the docker image to Azure Cloud
3. Make sure to properly configure your Azure Cloud credentials like clientid, clientsecret, dockerhub login id and password etc on GitHub repository's settings under Secrets option
4. Now if all your configurations were correct then any new push to your github repo will trigger this ci/cd pipeline and your model will be built into docker image and deployed on Azure cloud 

### Please refer to the PDF file for screenshots of different steps in MLFlow UI and Deployment of Model for predictions on fresh unseen data.