In [1]:
"""
COLAB SETUP & VERIFICATION
Run this FIRST to ensure everything is configured correctly
"""

# ============================================================
# SECTION 1: GITHUB INTEGRATION
# ============================================================

print("="*60)
print("STEP 1: GitHub Setup")
print("="*60)

from google.colab import auth
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive')
print("✓ Google Drive mounted")

# Authenticate with GitHub
auth.authenticate_user()
print("✓ GitHub authentication complete")

# Clone repository
YOUR_GITHUB_USERNAME = "YOUR_USERNAME"  # ← UPDATE THIS
REPO_NAME = "AI-Tools-Assignment"

repo_url = f"https://github.com/samkiva/AI-Tools-Assignment.git"

!git clone {repo_url} 2>/dev/null
os.chdir(f'/content/{REPO_NAME}')

print(f"✓ Repository cloned: {repo_url}")
print(f"✓ Working directory: {os.getcwd()}")

# ============================================================
# SECTION 2: LIBRARY VERIFICATION
# ============================================================

print("\n" + "="*60)
print("STEP 2: Library Versions")
print("="*60)

import tensorflow as tf
import numpy as np
import pandas as pd
import sklearn
import spacy
import matplotlib
import seaborn

print(f"✓ TensorFlow: {tf.__version__}")
print(f"✓ NumPy: {np.__version__}")
print(f"✓ Pandas: {pd.__version__}")
print(f"✓ Scikit-learn: {sklearn.__version__}")
print(f"✓ spaCy: {spacy.__version__}")
print(f"✓ Matplotlib: {matplotlib.__version__}")
print(f"✓ Seaborn: {seaborn.__version__}")

# ============================================================
# SECTION 3: GPU VERIFICATION (CRITICAL FOR TASK 2)
# ============================================================

print("\n" + "="*60)
print("STEP 3: GPU Availability (CRITICAL)")
print("="*60)

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print(f"✓ GPU AVAILABLE: {len(gpus)} GPU(s) detected")
    for gpu in gpus:
        print(f"  → {gpu}")
else:
    print("⚠ NO GPU DETECTED!")
    print("  → Go to Runtime → Change runtime type → Hardware accelerator: GPU")

# Test GPU computation
@tf.function
def test_gpu():
    a = tf.random.normal([10000, 10000])
    b = tf.random.normal([10000, 10000])
    return tf.matmul(a, b)

print("\nTesting GPU performance...")
result = test_gpu()
print(f"✓ GPU computation successful: {result.shape}")

# ============================================================
# SECTION 4: STORAGE STRUCTURE
# ============================================================

print("\n" + "="*60)
print("STEP 4: Creating Project Structure")
print("="*60)

folders = [
    'notebooks',
    'data',
    'models',
    'outputs/visualizations',
    'outputs/predictions'
]

for folder in folders:
    os.makedirs(folder, exist_ok=True)
    print(f"✓ Created: {folder}/")

print("\nProject structure:")
!tree -L 2 || ls -la

# ============================================================
# SECTION 5: DOWNLOAD SPACY MODEL
# ============================================================

print("\n" + "="*60)
print("STEP 5: Download spaCy Model")
print("="*60)

import subprocess
subprocess.run(['python', '-m', 'spacy', 'download', 'en_core_web_sm'],
               capture_output=True)

nlp = spacy.load('en_core_web_sm')
print(f"✓ spaCy model loaded: {nlp.meta['name']}")

# ============================================================
# SECTION 6: TEST DATA ACCESS
# ============================================================

print("\n" + "="*60)
print("STEP 6: Test Dataset Loading")
print("="*60)

# Iris Dataset
from sklearn.datasets import load_iris
iris = load_iris()
print(f"✓ Iris dataset loaded: {iris.data.shape}")

# MNIST Dataset
from tensorflow.keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
print(f"✓ MNIST dataset loaded: {X_train.shape}")

# ============================================================
# SECTION 7: SAVE SETUP CONFIRMATION
# ============================================================

print("\n" + "="*60)
print("STEP 7: Verification Complete!")
print("="*60)

print("""
✅ READY TO START!

Your Colab environment is configured for:
  • Task 1: Scikit-learn (CPU, instant)
  • Task 2: TensorFlow CNN (GPU, fast training)
  • Task 3: spaCy NLP (CPU, instant)

NEXT STEPS:
  1. Update YOUR_GITHUB_USERNAME above
  2. Run this cell to initialize
  3. Move to Task 1, 2, or 3 notebooks
  4. After each task, commit to GitHub:
     !git add -A
     !git commit -m "Completed Task X"
     !git push

GitHub Integration:
  • Changes auto-saved to {YOUR_GITHUB_USERNAME}/{REPO_NAME}
  • Easy access from any device
  • Version control for debugging

GPU Status: ✓ CONFIRMED
Libraries: ✓ VERIFIED
Datasets: ✓ ACCESSIBLE
""")

STEP 1: GitHub Setup
Mounted at /content/drive
✓ Google Drive mounted
✓ GitHub authentication complete
✓ Repository cloned: https://github.com/samkiva/AI-Tools-Assignment.git
✓ Working directory: /content/AI-Tools-Assignment

STEP 2: Library Versions
✓ TensorFlow: 2.19.0
✓ NumPy: 2.0.2
✓ Pandas: 2.2.2
✓ Scikit-learn: 1.6.1
✓ spaCy: 3.8.7
✓ Matplotlib: 3.10.0
✓ Seaborn: 0.13.2

STEP 3: GPU Availability (CRITICAL)
✓ GPU AVAILABLE: 1 GPU(s) detected
  → PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')

Testing GPU performance...
✓ GPU computation successful: (10000, 10000)

STEP 4: Creating Project Structure
✓ Created: notebooks/
✓ Created: data/
✓ Created: models/
✓ Created: outputs/visualizations/
✓ Created: outputs/predictions/

Project structure:
/bin/bash: line 1: tree: command not found
total 36
drwxr-xr-x 7 root root 4096 Oct 27 05:48 .
drwxr-xr-x 1 root root 4096 Oct 27 05:48 ..
drwxr-xr-x 2 root root 4096 Oct 27 05:48 data
drwxr-xr-x 8 root root 4096 Oct 27 05:48 