In [2]:
!pip install mlflow



In [10]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import mlflow
import mlflow.sklearn

# MLflow tracking
mlflow.set_experiment('titanic_survival_prediction')

# URL to the Titanic dataset
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"

# Read data and preprocess
data = pd.read_csv(url)
data = data[['Survived', 'Pclass', 'Sex', 'Age', 'Fare']]
data['Sex'] = data['Sex'].map({'male': 0, 'female': 1})
data = data.dropna()

# Split data
X = data.drop('Survived', axis=1)
y = data['Survived']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

with mlflow.start_run():
    # Train the model
    model = LogisticRegression()
    model.fit(X_train, y_train)
    
    # Predict and calculate accuracy
    predictions = model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)
    
    # Log parameters, metrics, and model
    mlflow.log_param("model_type", "LogisticRegression")
    mlflow.log_metric("accuracy", accuracy)
    model_path = "model"
    mlflow.sklearn.log_model(model, model_path)
    
    # After logging the model, register it
    run_id = mlflow.active_run().info.run_id
    model_uri = f"runs:/{run_id}/{model_path}"
    model_details = mlflow.register_model(model_uri, "TitanicSurvivalModel")
    
    model_name = "TitanicSurvivalModel"
    
    client = mlflow.tracking.MlflowClient()
    client.transition_model_version_stage(
        name=model_name,
        version=model_details.version,
        stage="Staging",
    )
    
    
    
    print('Model training completed. Accuracy:', accuracy)


Model training completed. Accuracy: 0.7552447552447552


Registered model 'TitanicSurvivalModel' already exists. Creating a new version of this model...
Created version '3' of model 'TitanicSurvivalModel'.
  client.transition_model_version_stage(


In [4]:
from flask import Flask, request, jsonify
import mlflow.pyfunc

# Load model from MLflow model registry
model_name = "TitanicSurvivalModel"
model_version = 1  
model = mlflow.pyfunc.load_model(model_uri=f"models:/{model_name}/{model_version}")

app = Flask(__name__)

@app.route('/predict', methods=['POST'])
def predict():
    data = request.json
    df = pd.DataFrame(data, index=[0])
    prediction = model.predict(df)
    return jsonify({'prediction': int(prediction[0])})

if __name__ == '__main__':
    app.run(debug=True, port=5001)



 * Serving Flask app '__main__'
 * Debug mode: on


Address already in use
Port 5001 is in use by another program. Either identify and stop that program, or start the server with a different port.


AssertionError: 

In [7]:
test_data = pd.read_csv('test.csv')
gender_submission = pd.read_csv('gender_submission.csv')
merged_data = pd.merge(test_data, gender_submission, on='PassengerId', how='inner')
print(merged_data.head())
merged_data.to_csv('merged_test_data.csv', index=False)

   PassengerId  Pclass                                          Name     Sex  \
0          892       3                              Kelly, Mr. James    male   
1          893       3              Wilkes, Mrs. James (Ellen Needs)  female   
2          894       2                     Myles, Mr. Thomas Francis    male   
3          895       3                              Wirz, Mr. Albert    male   
4          896       3  Hirvonen, Mrs. Alexander (Helga E Lindqvist)  female   

    Age  SibSp  Parch   Ticket     Fare Cabin Embarked  Survived  
0  34.5      0      0   330911   7.8292   NaN        Q         0  
1  47.0      1      0   363272   7.0000   NaN        S         1  
2  62.0      0      0   240276   9.6875   NaN        Q         0  
3  27.0      0      0   315154   8.6625   NaN        S         0  
4  22.0      1      1  3101298  12.2875   NaN        S         1  


In [8]:

new_data_url = 'merged_test_data.csv'
new_data = pd.read_csv(new_data_url)
new_data['Sex'] = new_data['Sex'].map({'male': 0, 'female': 1})
new_data = new_data[['Survived', 'Pclass', 'Sex', 'Age', 'Fare']].dropna()

X_new = new_data.drop('Survived', axis=1)
y_new = new_data['Survived']


In [9]:
with mlflow.start_run():
 
    model_updated = LogisticRegression()
    model_updated.fit(X_new, y_new)

    predictions_updated = model_updated.predict(X_test)
    accuracy_updated = accuracy_score(y_test, predictions_updated)

    mlflow.log_param("model_type", "LogisticRegression_updated")
    mlflow.log_metric("accuracy_updated", accuracy_updated)
    updated_model_path = "model_updated"
    mlflow.sklearn.log_model(model_updated, updated_model_path)

    run_id_updated = mlflow.active_run().info.run_id
    model_uri_updated = f"runs:/{run_id_updated}/{updated_model_path}"
    model_details_updated = mlflow.register_model(model_uri_updated, "TitanicSurvivalModelUpdated")
    
    print('Updated model training completed. New Accuracy:', accuracy_updated)


Updated model training completed. New Accuracy: 0.7342657342657343


Successfully registered model 'TitanicSurvivalModelUpdated'.
Created version '1' of model 'TitanicSurvivalModelUpdated'.


In [11]:
client.transition_model_version_stage(
    name=model_name,
    version=model_details.version,
    stage="Production",
)

  client.transition_model_version_stage(


<ModelVersion: aliases=[], creation_timestamp=1709518416199, current_stage='Production', description=None, last_updated_timestamp=1709518518906, name='TitanicSurvivalModel', run_id='1bedc7fb063f4ad081435cf296117d75', run_link=None, source='file:///Users/ryliu/Documents/09GitRep/mlworkflow/mlruns/377329108053326363/1bedc7fb063f4ad081435cf296117d75/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>

In [12]:
previous_version = 1  

client.transition_model_version_stage(
    name=model_name,
    version=previous_version,
    stage="Production",
)

  client.transition_model_version_stage(


<ModelVersion: aliases=[], creation_timestamp=1709513233549, current_stage='Production', description=None, last_updated_timestamp=1709518546683, name='TitanicSurvivalModel', run_id='1dd6fb5f9d7045c7b257915a08515527', run_link=None, source='file:///Users/ryliu/Documents/09GitRep/mlworkflow/mlruns/377329108053326363/1dd6fb5f9d7045c7b257915a08515527/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=1>

In [13]:
client.transition_model_version_stage(
    name=model_name,
    version=model_details.version,
    stage="Archived",
)


  client.transition_model_version_stage(


<ModelVersion: aliases=[], creation_timestamp=1709518416199, current_stage='Archived', description=None, last_updated_timestamp=1709518561753, name='TitanicSurvivalModel', run_id='1bedc7fb063f4ad081435cf296117d75', run_link=None, source='file:///Users/ryliu/Documents/09GitRep/mlworkflow/mlruns/377329108053326363/1bedc7fb063f4ad081435cf296117d75/artifacts/model', status='READY', status_message=None, tags={}, user_id=None, version=3>