In [1]:
# Install dependencies from requirements.txt
!pip3 install -r requirements.txt



In [2]:
# Initialize Git and DVC repositories
!git init
!dvc init

[33mhint: Using 'master' as the name for the initial branch. This default branch name[m
[33mhint: is subject to change. To configure the initial branch name to use in all[m
[33mhint: [m
[33mhint: 	git config --global init.defaultBranch <name>[m
[33mhint: [m
[33mhint: Names commonly chosen instead of 'master' are 'main', 'trunk' and[m
[33mhint: 'development'. The just-created branch can be renamed via this command:[m
[33mhint: [m
[33mhint: 	git branch -m <name>[m
Initialized empty Git repository in /home/jupyter/week_4/.git/
Initialized DVC repository.

You can now commit the changes to git.

[31m+---------------------------------------------------------------------+
[0m[31m|[0m                                                                     [31m|[0m
[31m|[0m        DVC has enabled anonymous aggregate usage analytics.         [31m|[0m
[31m|[0m     Read the analytics documentation (and how to opt-out) here:     [31m|[0m
[31m|[0m             <[36mhtt

In [None]:
%%writefile .gitignore
.venv/
__pycache__/
*.pyc
.vscode/
.ipynb_checkpoints/
.env
gcloud-creds.json
.DS_Store

In [3]:
# Configure DVC to use Google Cloud Storage as remote storage
BUCKET_NAME="mlops-course-week1-001"
!dvc remote add -d gcp-store gs://$BUCKET_NAME/dvc_store

Setting 'gcp-store' as a default remote.
[0m

In [4]:
# Authenticate with Google Cloud
PATH_TO_CREDENTIALS="gcloud-creds.json"
!gcloud auth activate-service-account --key-file=$PATH_TO_CREDENTIALS

Activated service account credentials for: [github-actions-bot@buoyant-country-473106-a3.iam.gserviceaccount.com]


To take a quick anonymous survey, run:
  $ gcloud survey



In [5]:
# Commit Initial setup and DVC remote configuration to Git
!git add .dvc/config .gitignore requirements.txt setup.ipynb
!git commit -m "Initial commit: Setup project with DVC and Git"

[master (root-commit) 833bf99] Initial commit: Setup project with DVC and Git
 6 files changed, 543 insertions(+)
 create mode 100644 .dvc/.gitignore
 create mode 100644 .dvc/config
 create mode 100644 .dvcignore
 create mode 100644 .gitignore
 create mode 100644 requirements.txt
 create mode 100644 setup.ipynb


In [6]:
# Download dataset file
!wget -O data.csv "https://github.com/IITMBSMLOps/ga_resources/blob/week_1/data/raw/iris.csv?raw=true"

--2025-10-19 16:17:54--  https://github.com/IITMBSMLOps/ga_resources/blob/week_1/data/raw/iris.csv?raw=true
Resolving github.com (github.com)... 140.82.114.4
Connecting to github.com (github.com)|140.82.114.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://github.com/IITMBSMLOps/ga_resources/raw/refs/heads/week_1/data/raw/iris.csv [following]
--2025-10-19 16:17:55--  https://github.com/IITMBSMLOps/ga_resources/raw/refs/heads/week_1/data/raw/iris.csv
Reusing existing connection to github.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/IITMBSMLOps/ga_resources/refs/heads/week_1/data/raw/iris.csv [following]
--2025-10-19 16:17:55--  https://raw.githubusercontent.com/IITMBSMLOps/ga_resources/refs/heads/week_1/data/raw/iris.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.108.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.github

In [7]:
# Train and save the model
!python3 train.py

Python version: 3.10.18 | packaged by conda-forge | (main, Jun  4 2025, 14:45:41) [GCC 13.3.0]
Data read successfully.
Data split into train and test sets.
Training Decision Tree model...
Model trained successfully.
The accuracy of the Decision Tree is 0.983
Saving the model to model.joblib...
Model saved successfully.


In [8]:
# Track the model files and artifacts via DVC
!dvc add data.csv model.joblib

# Track the code files via git
!git add data.csv.dvc model.joblib.dvc .gitignore
!git commit -m "feat: Track dataset and model with DVC"

 [?25l[32m⠋[0m Checking graph
  0% Adding...|                           | data.csv |0/2 [00:00<?,     ?file/s]
![A
Collecting files and computing hashes in data.csv     |0.00 [00:00,     ?file/s][A
                                                                                [A
![A
  0% Checking cache in '/home/jupyter/week_4/.dvc/cache/files/md5'| |0/? [00:00<[A
                                                                                [A
![A
  0%|          |Adding data.csv to cache              0/1 [00:00<?,     ?file/s][A
                                                                                [A
![A
  0%|          |Checking out /home/jupyter/week_4/data0/1 [00:00<?,    ?files/s][A
  0% Adding...|                       | model.joblib |0/2 [00:00<?,     ?file/s][A
![A
Collecting files and computing hashes in model.joblib |0.00 [00:00,     ?file/s][A
                                                                                [A
![A
  0% Checking ca

In [9]:
# Push files and artifacts to google cloud storage via DVC
!dvc push

Collecting                                            |2.00 [00:00,  108entry/s]
Pushing
![A
  0% Checking cache in 'mlops-course-week1-001/dvc_store/files/md5'| |0/? [00:00[A
 50% Querying cache in 'mlops-course-week1-001/dvc_store/files/md5'|▌|1/2 [00:01[A
Pushing                                                                         [A
Everything is up to date.
[0m

In [10]:
# Add Git remote and set main branch
!git remote add origin https://github.com/mayank160920/iris-mlops-ci-3.git
!git branch -M main
# git push -u origin main

In [11]:
# Create GitHub Actions workflow directory
!mkdir -p .github/workflows

In [None]:
%%writefile .github/workflows/ci.yml
name: MLOps CI Pipeline

on:
  push:
    branches: [dev]
  pull_request:
    branches: [main]

permissions:
  contents: write
  pull-requests: write

jobs:
  test-and-report:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v3

      - name: Set up Python
        uses: actions/setup-python@v4
        with:
          python-version: '3.9'

      - name: Upgrade pip
        run: python -m pip install --upgrade pip

      - name: Install core dependencies
        run: |
          pip install -r requirements.txt

      - name: Run CML
        uses: iterative/setup-cml@v2

      - name: Authenticate with Google Cloud
        uses: google-github-actions/auth@v1
        with:
          credentials_json: '${{ secrets.GCP_SA_KEY }}'

      - name: Pull artifacts from DVC
        run: dvc pull

      - name: Run Pytest and Save Report
        run: pytest --maxfail=1 --disable-warnings -q > report.txt

      - name: Create CML report on Pull Request
        if: github.event_name == 'pull_request'
        env:
          repo_token: ${{ secrets.GITHUB_TOKEN }}
        run: |
          echo "## ✅ MLOps CI Report" > cml_report.md
          echo "**Branch:** ${{ github.head_ref }}" >> cml_report.md
          echo "**Commit:** \`${{ github.sha }}\`" >> cml_report.md
          echo "" >> cml_report.md
          echo "### 🧪 Test Summary" >> cml_report.md
          cat report.txt >> cml_report.md
          echo "" >> cml_report.md
          echo "---" >> cml_report.md
          echo "_Generated automatically by CML_" >> cml_report.md
          cml comment create cml_report.md

Overwriting .github/workflows/ci.yml


In [None]:
# Commit tests, requirements, and CI workflow to Git and push to dev branch
!git add .
!git commit -m "feat: Add tests, requirements, and CI workflow"
!git checkout -b dev  # Create and switch to the dev branch

In [None]:
!git push -u origin dev