In [None]:
!rm -rf .git .dvc iris.csv.dvc artifacts __pycache__ .ipynb_checkpoints .gitignore dvc.yaml dvc.lock

In [None]:
!pip install -q dvc[gs] scikit-learn pandas joblib
!pip install -q --upgrade google-cloud-aiplatform

In [None]:
!git init
!dvc init
!git config user.email "tarunarora6029@email.com"
!git config user.name "tarunarora6029"

In [None]:
from google.cloud import aiplatform

PROJECT_ID = "verdant-nova-461606-f4"
LOCATION = "us-central1"
BUCKET_URI = "gs://ibdpractice"
DVC_REMOTE = "gcsremote"

aiplatform.init(project=PROJECT_ID, location=LOCATION, staging_bucket=BUCKET_URI)
!dvc remote add -d {DVC_REMOTE} {BUCKET_URI}/dvcstore

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics

df = pd.read_csv("iris.csv")
train, test = train_test_split(df, test_size=0.4, stratify=df['species'], random_state=42)

X_train = train[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
y_train = train['species']
X_test = test[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
y_test = test['species']

model = DecisionTreeClassifier(max_depth=3, random_state=1)
model.fit(X_train, y_train)

preds = model.predict(X_test)
print("Accuracy:", metrics.accuracy_score(y_test, preds))

In [None]:
import joblib
import os

os.makedirs("artifacts", exist_ok=True)
joblib.dump(model, "artifacts/model.joblib")

In [None]:
# Track dataset
!dvc add iris.csv
!echo '!iris.csv.dvc' >> .gitignore

# Track model
!dvc add artifacts/model.joblib
!echo '!artifacts/**.dvc' >> .gitignore

# Stage all
!git add .gitignore iris.csv.dvc artifacts/model.joblib.dvc
!git commit -m "Initial commit: iris + model"
!dvc push

In [None]:
!git add .
!git commit -m "Initial commit: cleaned repo with DVC tracking"
!git remote add origin git@github.com:tarunarora6029/practice.git
!git push -u origin main --force