# MLOps Project - Data Exploration

This notebook demonstrates the MLOps pipeline components.

In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from src.data.data_loader import load_iris_data
from src.config import RAW_DATA_DIR

%matplotlib inline
sns.set_style('whitegrid')

## 1. Load Data

In [None]:
df = load_iris_data()
print(f"Dataset shape: {df.shape}")
df.head()

## 2. Data Statistics

In [None]:
df.describe()

In [None]:
df['target_name'].value_counts()

## 3. Visualization

In [None]:
# Pairplot
sns.pairplot(df, hue='target_name', height=2.5)
plt.suptitle('Iris Dataset - Feature Relationships', y=1.02)
plt.show()

In [None]:
# Correlation matrix
feature_cols = [col for col in df.columns if col not in ['target', 'target_name']]
corr = df[feature_cols].corr()

plt.figure(figsize=(10, 8))
sns.heatmap(corr, annot=True, cmap='coolwarm', center=0)
plt.title('Feature Correlation Matrix')
plt.show()

## 4. MLflow Experiment Tracking Example

In [None]:
import mlflow

# Set tracking URI
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("iris_exploration")

with mlflow.start_run():
    # Log parameters
    mlflow.log_param("dataset", "iris")
    mlflow.log_param("samples", len(df))
    
    # Log metrics
    mlflow.log_metric("features", len(feature_cols))
    mlflow.log_metric("classes", df['target'].nunique())
    
    print("Metrics logged to MLflow!")