# 🎯 Lecture 1: Python Fundamentals for ML\n\n## 📋 Overview\n\n**Instructor:** Ho-min Park\n**Email:** homin.park@ghent.ac.kr\n\nPython programming essentials for Machine Learning\n\n## 📚 Table of Contents\n1. Python Basics & Data Types\n2. Functions & Parameters\n3. Classes & OOP\n4. Essential ML Libraries\n5. File I/O\n6. Memory & Performance\n7. GPU Operations\n8. Remote Computing\n9. ML Workflow Project\n10. Best Practices

In [None]:
import numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt\nimport sys\nimport time\nimport warnings\nwarnings.filterwarnings('ignore')\n\nprint('✅ Libraries loaded!')

---\n## Practice 1: Python Basics & Data Types\n\n### Learning Objectives\n- Understand data types for ML\n- Compare memory usage\n- NumPy vs Python lists

In [None]:
# Basic data types\nx = 42\ny = 3.14\nname = 'ML'\ndata = [1, 2, 3]\n\nprint(f'Integer: {x}, type: {type(x)}')\nprint(f'Float: {y}, type: {type(y)}')\nprint(f'String: {name}, type: {type(name)}')\nprint(f'List: {data}, type: {type(data)}')

In [None]:
# NumPy data types\nsize = 1000\n\narr_int8 = np.random.randint(0, 100, size=size, dtype=np.int8)\narr_float16 = np.random.randn(size).astype(np.float16)\narr_float32 = np.random.randn(size).astype(np.float32)\n\nprint(f'INT8:    {arr_int8.nbytes} bytes')\nprint(f'FLOAT16: {arr_float16.nbytes} bytes')\nprint(f'FLOAT32: {arr_float32.nbytes} bytes')\nprint(f'\\nMemory savings FP32→FP16: {(1-arr_float16.nbytes/arr_float32.nbytes)*100:.1f}%')

---\n## Practice 2: Functions & Parameters\n\n### Learning Objectives\n- Define functions\n- Use parameters and return values\n- Understand *args and **kwargs

In [None]:
# Basic function\ndef calculate_accuracy(correct, total):\n    return (correct / total) * 100\n\nacc = calculate_accuracy(85, 100)\nprint(f'Accuracy: {acc:.2f}%')

In [None]:
# Default parameters\ndef train_model(epochs=10, lr=0.001, batch_size=32):\n    print(f'Training: epochs={epochs}, lr={lr}, batch_size={batch_size}')\n    return f'Trained for {epochs} epochs'\n\nresult1 = train_model()\nresult2 = train_model(20, 0.01)\nresult3 = train_model(epochs=50, lr=0.0001)

In [None]:
# *args and **kwargs\ndef flexible_function(*args, **kwargs):\n    print('Positional args:', args)\n    print('Keyword args:', kwargs)\n\nflexible_function(1, 2, 3, name='ResNet', layers=50)

---\n## Practice 3: Classes & OOP\n\n### Learning Objectives\n- Create classes\n- Use __init__ and self\n- Implement methods\n- Understand inheritance

In [None]:
# Simple class\nclass SimpleModel:\n    def __init__(self, name, input_size, output_size):\n        self.name = name\n        self.input_size = input_size\n        self.output_size = output_size\n        self.is_trained = False\n    \n    def train(self, epochs):\n        print(f'Training {self.name} for {epochs} epochs...')\n        self.is_trained = True\n    \n    def predict(self, data):\n        if not self.is_trained:\n            print('⚠️ Model not trained!')\n            return None\n        return np.random.randint(0, self.output_size, len(data))\n\nmodel = SimpleModel('MyModel', 784, 10)\nmodel.train(10)\npreds = model.predict(np.zeros((5, 784)))\nprint(f'Predictions: {preds}')

---\n## Practice 4: Essential ML Libraries\n\n### Learning Objectives\n- Master NumPy operations\n- Work with Pandas\n- Create visualizations\n- PyTorch basics

In [None]:
# NumPy essentials\narr1 = np.array([1, 2, 3])\narr2 = np.array([4, 5, 6])\n\nprint(f'Addition: {arr1 + arr2}')\nprint(f'Multiplication: {arr1 * arr2}')\nprint(f'Dot product: {arr1 @ arr2}')\n\n# Matrix operations\nmatrix = np.random.randn(3, 3)\nprint(f'\\nMatrix:\\n{matrix}')\nprint(f'\\nMean: {matrix.mean():.4f}')\nprint(f'Std: {matrix.std():.4f}')

In [None]:
# Pandas basics\ndata = {\n    'Model': ['ResNet', 'VGG', 'EfficientNet'],\n    'Accuracy': [0.92, 0.89, 0.95],\n    'Params_M': [25.5, 138.3, 5.3]\n}\n\ndf = pd.DataFrame(data)\nprint(df)\nprint(f'\\nBest model: {df.loc[df[\"Accuracy\"].idxmax(), \"Model\"]}')

---\n## Practice 5: File I/O & Data Loading\n\n### Learning Objectives\n- Read/write CSV and JSON\n- Use Pickle\n- Manage paths with pathlib

In [None]:
import json\nimport pickle\nfrom pathlib import Path\n\n# CSV operations\ndata = {\n    'epoch': [1, 2, 3, 4, 5],\n    'loss': [0.5, 0.4, 0.3, 0.2, 0.1],\n    'accuracy': [0.85, 0.88, 0.91, 0.93, 0.95]\n}\n\ndf = pd.DataFrame(data)\ndf.to_csv('training_log.csv', index=False)\nloaded_df = pd.read_csv('training_log.csv')\nprint('CSV saved and loaded:')\nprint(loaded_df.head())

In [None]:
# JSON operations\nconfig = {\n    'model': 'ResNet50',\n    'epochs': 100,\n    'batch_size': 32,\n    'learning_rate': 0.001\n}\n\nwith open('config.json', 'w') as f:\n    json.dump(config, f, indent=2)\n\nwith open('config.json', 'r') as f:\n    loaded_config = json.load(f)\n\nprint('Config saved and loaded:')\nprint(json.dumps(loaded_config, indent=2))

---\n## Practice 6: Memory & Performance\n\n### Learning Objectives\n- Measure memory usage\n- Time code execution\n- Compare performance\n- Understand vectorization

In [None]:
# Memory comparison\nimport sys\n\nsize = 10000\npython_list = list(range(size))\nnumpy_array = np.arange(size)\n\nprint(f'Python List: {sys.getsizeof(python_list):,} bytes')\nprint(f'NumPy Array: {numpy_array.nbytes:,} bytes')\nprint(f'Reduction: {(1 - numpy_array.nbytes/sys.getsizeof(python_list))*100:.1f}%')

In [None]:
# Performance comparison\nimport time\n\nsize = 1_000_000\n\n# Python loop\nstart = time.time()\nresult = [i * 2 for i in range(size)]\ntime_list = time.time() - start\n\n# NumPy vectorized\narr = np.arange(size)\nstart = time.time()\nresult = arr * 2\ntime_numpy = time.time() - start\n\nprint(f'Python list: {time_list:.4f}s')\nprint(f'NumPy array: {time_numpy:.4f}s')\nprint(f'Speedup: {time_list/time_numpy:.1f}x 🚀')

---\n## Practice 7: GPU & Tensor Operations\n\n### Learning Objectives\n- Check GPU availability\n- Create PyTorch tensors\n- Transfer between CPU/GPU\n- Compare performance

In [None]:
# Check GPU (optional, requires PyTorch)\ntry:\n    import torch\n    \n    print(f'PyTorch version: {torch.__version__}')\n    print(f'CUDA available: {torch.cuda.is_available()}')\n    \n    if torch.cuda.is_available():\n        print(f'GPU name: {torch.cuda.get_device_name(0)}')\n    \n    # Create tensors\n    cpu_tensor = torch.randn(3, 3)\n    print(f'\\nCPU tensor device: {cpu_tensor.device}')\n    \n    if torch.cuda.is_available():\n        gpu_tensor = cpu_tensor.to('cuda')\n        print(f'GPU tensor device: {gpu_tensor.device}')\n        \nexcept ImportError:\n    print('⚠️ PyTorch not installed. Skip this section.')

---\n## Practice 8: SSH & Remote Computing\n\n### Learning Objectives\n- Environment variables\n- SSH configuration\n- Remote workflow concepts

In [None]:
import os\n\n# Environment variables\nos.environ['MODEL_PATH'] = '/models/checkpoint.pth'\nos.environ['BATCH_SIZE'] = '32'\nos.environ['LEARNING_RATE'] = '0.001'\n\nprint('Environment variables set:')\nprint(f'  MODEL_PATH: {os.environ[\"MODEL_PATH\"]}')\nprint(f'  BATCH_SIZE: {os.environ[\"BATCH_SIZE\"]}')\nprint(f'  LEARNING_RATE: {os.environ[\"LEARNING_RATE\"]}')

---\n## Practice 9: ML Workflow Project\n\n### Learning Objectives\n- Complete ML workflow\n- Data → Train → Evaluate → Save\n- Integrated pipeline

In [None]:
class MLWorkflow:\n    def __init__(self, name):\n        self.name = name\n        self.history = {'loss': [], 'acc': []}\n    \n    def load_data(self):\n        X = np.random.randn(1000, 20)\n        y = np.random.randint(0, 2, 1000)\n        print(f'✓ Data loaded: {X.shape}')\n        return X, y\n    \n    def train(self, epochs=5):\n        print(f'\\nTraining {self.name}...')\n        for epoch in range(1, epochs+1):\n            loss = 1.0 / epoch\n            acc = 1 - 1.0 / (epoch + 1)\n            self.history['loss'].append(loss)\n            self.history['acc'].append(acc)\n            print(f'  Epoch {epoch}: loss={loss:.4f}, acc={acc:.4f}')\n    \n    def save(self):\n        with open(f'{self.name}_results.pkl', 'wb') as f:\n            pickle.dump(self.history, f)\n        print(f'\\n✓ Results saved')\n\n# Run workflow\nworkflow = MLWorkflow('ImageClassifier')\nX, y = workflow.load_data()\nworkflow.train(epochs=5)\nworkflow.save()

---\n## Practice 10: Debugging & Best Practices\n\n### Learning Objectives\n- Error handling\n- Logging\n- Type hints\n- Documentation

In [None]:
# Error handling\ndef safe_divide(a, b):\n    try:\n        return a / b\n    except ZeroDivisionError:\n        print('⚠️ Error: Division by zero!')\n        return None\n    except TypeError:\n        print('⚠️ Error: Invalid types!')\n        return None\n\nprint(safe_divide(10, 2))\nprint(safe_divide(10, 0))\nprint(safe_divide('10', 2))

In [None]:
# Type hints\nfrom typing import List, Dict\n\ndef calculate_metrics(predictions: List[int], labels: List[int]) -> Dict[str, float]:\n    \"\"\"\n    Calculate evaluation metrics.\n    \n    Args:\n        predictions: Predicted labels\n        labels: True labels\n    \n    Returns:\n        Dictionary with metrics\n    \"\"\"\n    correct = sum(p == l for p, l in zip(predictions, labels))\n    accuracy = correct / len(labels)\n    return {'accuracy': accuracy, 'error_rate': 1 - accuracy}\n\npreds = [1, 0, 1, 1, 0]\nlabels = [1, 0, 1, 0, 0]\nmetrics = calculate_metrics(preds, labels)\nprint(f'Metrics: {metrics}')

---\n## 🎯 Practice Complete!\n\n### Summary\n\nYou've learned:\n1. ✅ Python data types & memory\n2. ✅ Functions & parameters\n3. ✅ Classes & OOP\n4. ✅ NumPy, Pandas, Matplotlib\n5. ✅ File I/O operations\n6. ✅ Memory & performance\n7. ✅ GPU tensor operations\n8. ✅ Remote computing basics\n9. ✅ Complete ML workflow\n10. ✅ Best practices\n\n### Key Insights\n- FP16 saves 50% memory vs FP32\n- NumPy is 10-100x faster than loops\n- GPU provides massive speedup\n- Good practices prevent bugs\n\n### Next Steps\n- Practice with real datasets\n- Build ML projects\n- Review Lecture 1 concepts\n\n**Happy Coding! 🎉**