**Değerlendirme ve Görevler**

[PyTorch RNN Tutorial - Name Classification Using A Recurrent Neural Net](https://www.youtube.com/watch?v=WEV61GmmPrk)

*Videodaki projeyi yapınız*

In [None]:
# Name Classification (Char-RNN) — Quick demo
# Küçük bir karakter-seviyeli RNN ile isimleri (erkek/kadın) sınıflandırma örneği.
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# Tiny synthetic dataset
names = [
    ("Emma", 0), ("Olivia", 0), ("Ava", 0), ("Isabella", 0), ("Sophia", 0),
    ("Liam", 1), ("Noah", 1), ("Oliver", 1), ("Elijah", 1), ("James", 1)
]

# build char vocabulary
chars = sorted(list({c.lower() for n,_ in names for c in n}))
stoi = {c:i+1 for i,c in enumerate(chars)}  # 0 reserved for padding
itos = {i:c for c,i in stoi.items()}
vocab_size = len(stoi)+1

max_len = max(len(n) for n,_ in names)

def encode(name):
    arr = [stoi[c.lower()] for c in name]
    # pad
    arr = arr + [0]*(max_len - len(arr))
    return torch.tensor(arr, dtype=torch.long)

class NameDataset(Dataset):
    def __init__(self, pairs):
        self.data = [(encode(n), torch.tensor(lbl, dtype=torch.long)) for n,lbl in pairs]
    def __len__(self): return len(self.data)
    def __getitem__(self, idx): return self.data[idx]

ds = NameDataset(names)
loader = DataLoader(ds, batch_size=4, shuffle=True)

# Model: simple embedding + RNN (GRU) + classifier
class SimpleNameRNN(nn.Module):
    def __init__(self, vocab_size, emb=16, hidden=32, num_classes=2):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, emb, padding_idx=0)
        self.gru = nn.GRU(emb, hidden, batch_first=True)
        self.fc = nn.Linear(hidden, num_classes)
    def forward(self, x):
        e = self.embed(x)
        out, h = self.gru(e)
        return self.fc(h[-1])

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SimpleNameRNN(vocab_size).to(device)
opt = optim.Adam(model.parameters(), lr=0.01)
crit = nn.CrossEntropyLoss()

# Quick training
model.train()
for epoch in range(5):
    total=0; correct=0; running_loss=0.0
    for xb,yb in loader:
        xb,yb = xb.to(device), yb.to(device)
        opt.zero_grad()
        logits = model(xb)
        loss = crit(logits, yb)
        loss.backward()
        opt.step()
        running_loss += loss.item()
        pred = logits.argmax(dim=1)
        total += yb.size(0); correct += (pred==yb).sum().item()
    print(f"Epoch {epoch+1}/5 — loss: {running_loss/len(loader):.4f} — acc: {correct/total:.2f}")

# Save
os.makedirs('./models', exist_ok=True)
path = './models/name_rnn_quick.pth'
torch.save({'model_state':model.state_dict(),'stoi':stoi,'max_len':max_len}, path)
print('Saved model to', path)


[PyTorch Tutorial - RNN & LSTM & GRU - Recurrent Neural Nets](https://www.youtube.com/watch?v=0_PgWWmauHk)

*Videodaki projeyi yapınız*

In [None]:
# RNN vs LSTM vs GRU — tiny sequence modeling demo
# Task: predict next value in a short numeric sequence (toy task) and show one training step per model.
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

# synthetic sequences: input seq of length 5, target = next value (sum of inputs mod 10)
import numpy as np
np.random.seed(0)
X = np.random.randint(0,10,(100,5)).astype(np.float32)/10.0
Y = (X.sum(axis=1) % 1.0).astype(np.float32)  # small float target

X = torch.tensor(X).unsqueeze(-1)  # (N,5,1)
Y = torch.tensor(Y).unsqueeze(-1)  # (N,1)
loader = DataLoader(TensorDataset(X,Y), batch_size=16, shuffle=True)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class SeqModel(nn.Module):
    def __init__(self, rnn_type='RNN', input_size=1, hidden=16):
        super().__init__()
        if rnn_type=='RNN':
            self.rnn = nn.RNN(input_size, hidden, batch_first=True)
        elif rnn_type=='LSTM':
            self.rnn = nn.LSTM(input_size, hidden, batch_first=True)
        else:
            self.rnn = nn.GRU(input_size, hidden, batch_first=True)
        self.fc = nn.Linear(hidden, 1)
    def forward(self, x):
        out, h = self.rnn(x)
        if isinstance(h, tuple):
            h = h[0]
        return self.fc(h[-1])

models = {t: SeqModel(t).to(device) for t in ('RNN','LSTM','GRU')}
opt = {t: torch.optim.Adam(m.parameters(), lr=0.01) for t,m in models.items()}
crit = nn.MSELoss()

# one epoch quick run
for t,m in models.items():
    m.train()
    total_loss=0.0
    for xb,yb in loader:
        xb,yb = xb.to(device), yb.to(device)
        opt[t].zero_grad()
        out = m(xb)
        loss = crit(out, yb)
        loss.backward()
        opt[t].step()
        total_loss += loss.item()
    print(f"{t} — avg loss: {total_loss/len(loader):.4f}")


[Pytorch Transformers from Scratch (Attention is all you need)](https://www.youtube.com/watch?v=U0s0f995w14)

*Videodaki projeyi yapınız*

In [None]:
# Transformers from Scratch — Multi-Head Self-Attention minimal implementation
import torch
import torch.nn as nn

def clones(module, N):
    return nn.ModuleList([module for _ in range(N)])

class MultiHeadSelfAttention(nn.Module):
    def __init__(self, embed_dim=32, num_heads=4):
        super().__init__()
        assert embed_dim % num_heads == 0
        self.d_k = embed_dim // num_heads
        self.num_heads = num_heads
        self.qkv = nn.Linear(embed_dim, embed_dim*3)
        self.out = nn.Linear(embed_dim, embed_dim)
    def forward(self, x):
        B, T, E = x.shape
        qkv = self.qkv(x)  # (B,T,3E)
        q,k,v = qkv.chunk(3, dim=-1)
        # reshape for heads
        q = q.view(B, T, self.num_heads, self.d_k).transpose(1,2)  # (B,heads,T,d_k)
        k = k.view(B, T, self.num_heads, self.d_k).transpose(1,2)
        v = v.view(B, T, self.num_heads, self.d_k).transpose(1,2)
        scores = torch.matmul(q, k.transpose(-2,-1)) / (self.d_k ** 0.5)
        attn = torch.softmax(scores, dim=-1)
        out = torch.matmul(attn, v)  # (B,heads,T,d_k)
        out = out.transpose(1,2).contiguous().view(B,T,E)
        return self.out(out)

# Quick test
x = torch.rand(2,10,32)
attn = MultiHeadSelfAttention(32,4)
out = attn(x)
print('Input shape:', x.shape, 'Output shape:', out.shape)


[Implementing the Self-Attention Mechanism from Scratch in PyTorch!](https://www.youtube.com/watch?v=ZPLym9rJtM8)

*Videodaki projeyi yapınız*

In [None]:
# Scaled Dot-Product Attention (single-head) — compact implementation and check
import torch
import torch.nn.functional as F

def scaled_dot_product_attention(q,k,v, mask=None):
    # q,k,v: (B, T, D)
    scores = torch.matmul(q, k.transpose(-2,-1)) / (q.size(-1)**0.5)
    if mask is not None:
        scores = scores.masked_fill(mask==0, float('-inf'))
    attn = F.softmax(scores, dim=-1)
    return torch.matmul(attn, v), attn

B,T,D = 1,5,8
q = torch.rand(B,T,D)
k = torch.rand(B,T,D)
v = torch.rand(B,T,D)
out, attn = scaled_dot_product_attention(q,k,v)
print('out.shape', out.shape, 'attn.shape', attn.shape)


[Learn PyTorch in 5 Projects – Tutorial](https://www.youtube.com/watch?v=E0bwEAWmVEM&t=12974s)

*Videodaki son projeyi yapınız (Transformer)*

In [None]:
# Minimal Transformer Encoder Block — tiny demo
import torch
import torch.nn as nn

class TransformerEncoderBlock(nn.Module):
    def __init__(self, embed_dim=32, heads=4, ff_dim=64, dropout=0.1):
        super().__init__()
        self.attn = nn.MultiheadAttention(embed_dim, heads, dropout=dropout, batch_first=True)
        self.ff = nn.Sequential(
            nn.Linear(embed_dim, ff_dim),
            nn.ReLU(),
            nn.Linear(ff_dim, embed_dim)
        )
        self.norm1 = nn.LayerNorm(embed_dim)
        self.norm2 = nn.LayerNorm(embed_dim)
        self.dropout = nn.Dropout(dropout)
    def forward(self, x):
        attn_out, _ = self.attn(x, x, x)
        x = self.norm1(x + self.dropout(attn_out))
        ff_out = self.ff(x)
        x = self.norm2(x + self.dropout(ff_out))
        return x

# quick forward pass
x = torch.rand(2,10,32)
block = TransformerEncoderBlock(32, heads=4)
out = block(x)
print('Transformer block input', x.shape, 'output', out.shape)
