In [1]:

import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import boto3
import pandas as pd
from dotenv import load_dotenv
import os
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score
import warnings
import numpy as np
from mlflow.models.signature import infer_signature
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [2]:
load_dotenv()

access_key = os.getenv("AWS_ACCESS_KEY_ID_CLIENT")
secret_key = os.getenv("AWS_SECRET_ACCESS_KEY_CLIENT")
s3_url = os.getenv("MLFLOW_S3_ENDPOINT_URL_CLIENT")
tracker_url = os.getenv("MLFLOW_URL_CLIENT")
os.environ["MLFLOW_S3_ENDPOINT_URL"] = os.getenv("MLFLOW_S3_ENDPOINT_URL_CLIENT")
os.environ["MLFLOW_TRACKING_USERNAME"] = os.getenv("MLFLOW_ADMIN_USERNAME_CLIENT")
os.environ["MLFLOW_TRACKING_PASSWORD"] = os.getenv("MLFLOW_ADMIN_PASSWORD_CLIENT")
os.environ["MLFLOW_S3_IGNORE_TLS"] = "true"
os.environ["MLFLOW_ENABLE_SYSTEM_METRICS_LOGGING"] = "true"
bucket_name = "dataset"
object_name = "StressLevelDataset.csv"

In [3]:
print(s3_url)
print(tracker_url)
print(access_key)
print(secret_key)

https://localhost
http://localhost
admin
irul12345


In [4]:
s3 = boto3.client(
    's3',
    endpoint_url=s3_url,
    aws_access_key_id=access_key,
    aws_secret_access_key=secret_key,
    verify=False
)

In [5]:
try:
    # Attempt to list buckets
    response = s3.list_buckets()
    
    # If successful, print bucket names
    print("Connected successfully! Buckets available:")
    for bucket in response['Buckets']:
        print(f"- {bucket['Name']}")
except Exception as e:
    # Print any connection errors
    print("Connection error:", e)


Connected successfully! Buckets available:
- dataset
- mlflow


In [6]:
try:
    response = s3.get_object(Bucket=bucket_name, Key=object_name)
    print(response)
    dataset_content = response.get('Body')
    print(dataset_content)
    # Load the dataset into a DataFrame
    df = pd.read_csv(dataset_content)
    print("Dataset loaded successfully:")
    print(df.head())
except Exception as e:
    print("Error fetching dataset from MinIO:", e)


{'ResponseMetadata': {'RequestId': '181472FC5B1C9A73', 'HostId': 'b675f5c02385af78c69266a96b22038bea3ecd7423814977a7267fd68ac2bbeb', 'HTTPStatusCode': 200, 'HTTPHeaders': {'accept-ranges': 'bytes', 'content-length': '48717', 'content-type': 'text/csv', 'etag': '"5cfd21529296fcb45db83f2598b2c124"', 'last-modified': 'Wed, 25 Dec 2024 14:40:14 GMT', 'server': 'MinIO', 'strict-transport-security': 'max-age=31536000; includeSubDomains', 'vary': 'Origin, Accept-Encoding', 'x-amz-id-2': 'b675f5c02385af78c69266a96b22038bea3ecd7423814977a7267fd68ac2bbeb', 'x-amz-request-id': '181472FC5B1C9A73', 'x-content-type-options': 'nosniff', 'x-ratelimit-limit': '1575', 'x-ratelimit-remaining': '1575', 'x-xss-protection': '1; mode=block', 'x-amz-version-id': '51392e35-b078-4edc-8605-9d75a4b28960', 'date': 'Wed, 25 Dec 2024 14:49:44 GMT'}, 'RetryAttempts': 0}, 'AcceptRanges': 'bytes', 'LastModified': datetime.datetime(2024, 12, 25, 14, 40, 14, tzinfo=tzutc()), 'ContentLength': 48717, 'ETag': '"5cfd21529296

In [7]:
df.describe()

Unnamed: 0,anxiety_level,self_esteem,mental_health_history,depression,headache,blood_pressure,sleep_quality,breathing_problem,noise_level,living_conditions,...,basic_needs,academic_performance,study_load,teacher_student_relationship,future_career_concerns,social_support,peer_pressure,extracurricular_activities,bullying,stress_level
count,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,...,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0
mean,11.063636,17.777273,0.492727,12.555455,2.508182,2.181818,2.66,2.753636,2.649091,2.518182,...,2.772727,2.772727,2.621818,2.648182,2.649091,1.881818,2.734545,2.767273,2.617273,0.996364
std,6.117558,8.944599,0.500175,7.727008,1.409356,0.833575,1.548383,1.400713,1.328127,1.119208,...,1.433761,1.414594,1.315781,1.384579,1.529375,1.047826,1.425265,1.417562,1.530958,0.821673
min,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,6.0,11.0,0.0,6.0,1.0,1.0,1.0,2.0,2.0,2.0,...,2.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,1.0,0.0
50%,11.0,19.0,0.0,12.0,3.0,2.0,2.5,3.0,3.0,2.0,...,3.0,2.0,2.0,2.0,2.0,2.0,2.0,2.5,3.0,1.0
75%,16.0,26.0,1.0,19.0,3.0,3.0,4.0,4.0,3.0,3.0,...,4.0,4.0,3.0,4.0,4.0,3.0,4.0,4.0,4.0,2.0
max,21.0,30.0,1.0,27.0,5.0,3.0,5.0,5.0,5.0,5.0,...,5.0,5.0,5.0,5.0,5.0,3.0,5.0,5.0,5.0,2.0


In [8]:
X = df.drop(columns=['stress_level'])  # Features
y = df['stress_level'] 

In [9]:
experiment_name = "Stress_checker"
mlflow.set_tracking_uri(tracker_url)
mlflow.set_experiment(experiment_name)

2024/12/25 21:49:45 INFO mlflow.tracking.fluent: Experiment with name 'Stress_checker' does not exist. Creating a new experiment.


<Experiment: artifact_location='s3://mlflow/1', creation_time=1735138185327, experiment_id='1', last_update_time=1735138185327, lifecycle_stage='active', name='Stress_checker', tags={}>

In [10]:
X = df.drop(columns=['stress_level'])  # Features
y = df['stress_level'] 

In [11]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [12]:
from mlflow.models.signature import infer_signature
import mlflow.xgboost
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier

# Model parameters
n_estimators = 100
max_depth = 5
random_state = 42

with mlflow.start_run():
    # Log parameters
    mlflow.log_param("n_estimators", n_estimators)
    mlflow.log_param("max_depth", max_depth)
    mlflow.log_param("random_state", random_state)

    # Initialize and train the model
    model = XGBClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=random_state)
    model.fit(X_train, y_train)

    # Evaluate the model
    predictions = model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)
    mlflow.log_metric("accuracy", accuracy)

    # Define input example and signature
    input_example = X_test.iloc[:1].fillna(X_test.mean())
    signature = infer_signature(X_test, model.predict(X_test))

    # Log the model with MLflow
    mlflow.xgboost.log_model(
        model,
        artifact_path="model",
        input_example=input_example,
        signature=signature
    )

    # Print run information
    run_id = mlflow.active_run().info.run_id
    print(f"Run ID: {run_id}")
    print(f"Model accuracy: {accuracy}")


2024/12/25 21:49:46 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.


Run ID: 57a9c8cda65740c29cb71826e822201f
Model accuracy: 0.8681818181818182
🏃 View run flawless-duck-152 at: http://localhost/#/experiments/1/runs/57a9c8cda65740c29cb71826e822201f
🧪 View experiment at: http://localhost/#/experiments/1


2024/12/25 21:49:53 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2024/12/25 21:49:53 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!


In [13]:
from mlflow.models.signature import infer_signature
import mlflow.xgboost
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score

# Model parameters
n_estimators = 100
max_depth = 5
random_state = 42

with mlflow.start_run():
    # Log parameters
    mlflow.log_param("n_estimators", n_estimators)
    mlflow.log_param("max_depth", max_depth)
    mlflow.log_param("random_state", random_state)

    # Initialize and train the model
    model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=random_state)
    model.fit(X_train, y_train)

    # Evaluate the model
    predictions = model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)
    mlflow.log_metric("accuracy", accuracy)

    # Define input example and signature
    input_example = X_test.iloc[:1].fillna(X_test.mean())
    signature = infer_signature(X_test, model.predict(X_test))

    # Log the model with MLflow
    mlflow.sklearn.log_model(
        model,
        artifact_path="model",
        input_example=input_example,
        signature=signature
    )

    # Print run information
    run_id = mlflow.active_run().info.run_id
    print(f"Run ID: {run_id}")
    print(f"Model accuracy: {accuracy}")


2024/12/25 21:49:53 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.


Run ID: bdd839a4c5344945bd3ceab6589a31cc
Model accuracy: 0.8818181818181818
🏃 View run clumsy-conch-307 at: http://localhost/#/experiments/1/runs/bdd839a4c5344945bd3ceab6589a31cc
🧪 View experiment at: http://localhost/#/experiments/1


2024/12/25 21:49:58 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2024/12/25 21:49:58 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!


In [14]:
import mlflow.pyfunc

model_uri = f"runs:/{run_id}/model"
model = mlflow.pyfunc.load_model(model_uri)

# Check if metadata and signature are available
if model.metadata:
    print("Model signature:", model.metadata.get_input_schema())
else:
    print("Model metadata is missing.")

  from .autonotebook import tqdm as notebook_tqdm
Downloading artifacts: 100%|██████████| 7/7 [00:00<00:00, 2708.75it/s]

Model signature: ['anxiety_level': long (required), 'self_esteem': long (required), 'mental_health_history': long (required), 'depression': long (required), 'headache': long (required), 'blood_pressure': long (required), 'sleep_quality': long (required), 'breathing_problem': long (required), 'noise_level': long (required), 'living_conditions': long (required), 'safety': long (required), 'basic_needs': long (required), 'academic_performance': long (required), 'study_load': long (required), 'teacher_student_relationship': long (required), 'future_career_concerns': long (required), 'social_support': long (required), 'peer_pressure': long (required), 'extracurricular_activities': long (required), 'bullying': long (required)]



