# softmax

In [2]:
import numpy as np
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / np.sum(e_x)

x=np.array([2.0,1.0,0.1])
print(softmax(x))

[0.65900114 0.24243297 0.09856589]


# BN

In [3]:
import numpy as np

def batch_norm(x,alpha,gamma,epsilon=1e-12):
    x_mean=np.mean(x,axis=0)
    x_var=np.var(x,axis=0) #按列计算方差

    x_hat=(x-x_mean)/np.sqrt(x_var+epsilon)
    out=gamma*x_hat+alpha #线性变换
    return out

# LN

In [4]:
import numpy as np
def layer_norm(x,alpha,gamma,epsilon=1e-12):
    x_mean=np.mean(x,axis=1,keepdims=True) #按行计算均值
    x_var=np.var(x,axis=1,keepdims=True) #按行计算方差

    x_hat=(x-x_mean)/np.sqrt(x_var+epsilon)
    out=gamma*x_hat+alpha #线性变换
    return out

# 对比损失：正样本距离尽可能小，负样本距离尽可能大

In [None]:
import torch
import torch.nn as nn

class ContrastiveLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin

    def forward(self, output1, output2, label):
        euclidean_distance = torch.norm(output1 - output2, dim=1)
        loss_contrastive = torch.mean((label) * torch.pow(euclidean_distance, 2) +
                                      (1-label) * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0), 2))
        return loss_contrastive

# 双塔里pairwise的margin loss:正负样本之间的距离尽可能大

In [None]:
import torch
import torch.nn as nn

class PairwiseMarginLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(PairwiseMarginLoss, self).__init__()
        self.margin = margin

    def forward(self, anchor, positive, negative):
        pos_distance = torch.norm(anchor - positive, dim=1)
        neg_distance = torch.norm(anchor - negative, dim=1)
        loss = torch.mean(torch.clamp(pos_distance - neg_distance + self.margin, min=0.0))
        return loss 

# 交叉熵

In [5]:
import numpy as np
def cross_entropy(predictions, targets, epsilon=1e-12):
    predictions = np.clip(predictions, epsilon, 1. - epsilon)
    N = predictions.shape[0]
    ce = -np.sum(targets * np.log(predictions + 1e-9)) / N
    return ce

y_true = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
y_pred = np.array([[0.9, 0.05, 0.05], [0.1, 0.8, 0.1], [0.2, 0.2, 0.6]])
print(cross_entropy(y_pred, y_true))

0.2797765622367497


# focal loss

In [6]:
import numpy as np
def focal_loss(predictions, targets, alpha=0.25, gamma=2.0, epsilon=1e-12):
    predictions = np.clip(predictions, epsilon, 1. - epsilon)
    pt = np.where(targets == 1, predictions, 1 - predictions)
    loss = -alpha * (1 - pt) ** gamma * np.log(pt)
    return np.mean(loss)

y_true = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
y_pred = np.array([[0.9, 0.05, 0.05], [0.1, 0.8, 0.1], [0.2, 0.2, 0.6]])
print(focal_loss(y_pred, y_true))

0.003109072441720451


# IOU交并比

In [None]:
def iou(boxA, boxB):
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])

    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)

    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)

    iou = interArea / float(boxAArea + boxBArea - interArea)

    return iou

# multi

In [None]:
import torch 
import torch.nn as nn
import torch.nn.functional as F

class MultiHeadSelfAttention(nn.Module):
    def __init__(self, embed_size, num_heads):
        super(MultiHeadSelfAttention, self).__init__()
        self.embed_size = embed_size
        self.num_heads = num_heads
        self.head_dim = embed_size // num_heads

        assert (self.head_dim * num_heads == embed_size), "Embedding size needs to be divisible by num_heads"

        self.values = nn.Linear(self.head_dim, self.head_dim, bias=False)
        self.keys = nn.Linear(self.head_dim, self.head_dim, bias=False)
        self.queries = nn.Linear(self.head_dim, self.head_dim, bias=False)
        self.fc_out = nn.Linear(num_heads * self.head_dim, embed_size)

    def forward(self, values, keys, queries, mask):
        N = queries.shape[0]
        value_len, key_len, query_len = values.shape[1], keys.shape[1], queries.shape[1]

        values = values.reshape(N, value_len, self.num_heads, self.head_dim)
        keys = keys.reshape(N, key_len, self.num_heads, self.head_dim)
        queries = queries.reshape(N, query_len, self.num_heads, self.head_dim)

        values = self.values(values)
        keys = self.keys(keys)
        queries = self.queries(queries)

        energy = torch.einsum("nqhd,nkhd->nhqk", [queries, keys])

        if mask is not None:
            energy = energy.masked_fill(mask == 0, float("-1e20"))

        attention = torch.softmax(energy / (self.embed_size ** (1 / 2)), dim=3)

        out = torch.einsum("nhql,nlhd->nqhd", [attention, values]).reshape(
            N, query_len, self.num_heads * self.head_dim
        )

        out = self.fc_out(out)
        return out

# Adam

In [None]:
def adam(w,x,y,lr=0.001,beta1=0.9,beta2=0.999,epsilon=1e-8):
    m=len(y)
    mt=np.zeros_like(w)
    vt=np.zeros_like(w)
    t=0

    for epoch in range(1000):
        i=np.random.randint(0,m)
        xi=x[i,:]
        yi=y[i]

        grad=xi.reshape(-1,1)*(np.dot(xi,w)-yi)
        t+=1

        mt=beta1*mt+(1-beta1)*grad
        vt=beta2*vt+(1-beta2)*(grad**2)

        mt_hat=mt/(1-beta1**t)
        vt_hat=vt/(1-beta2**t)

        w=w-lr*mt_hat/(np.sqrt(vt_hat)+epsilon)#用修正后的动量（mt_hat和 vt_hat）更新模型参数 w。
    
    return w

# AUC

In [None]:
def auc(y_true, y_scores):
    pos_indices = np.where(y_true == 1)[0]
    neg_indices = np.where(y_true == 0)[0]
    
    n_pos=len(pos_indices)
    n_neg=len(neg_indices)
    
    if n_pos == 0 or n_neg == 0:
        return 0.0  # Avoid division by zero
    
    count= 0
    for i in pos_indices:
        for j in neg_indices:
            if y_scores[i] > y_scores[j]:
                count += 1
            elif y_scores[i] == y_scores[j]:
                count += 0.5        
    
    auc_value = count / (n_pos * n_neg)
    return auc_value

# bpr loss

In [None]:
import numpy as np
from torch import sigmoid
def bpr_loss(pos_scores, neg_scores):
    loss = -np.mean(np.log(sigmoid(pos_scores - neg_scores)))
    return loss

# 手撕线性回归的梯度下降

In [None]:
#生成数据
np.random.seed(0)
X=2*np.random.rand(100,1)
y=4+3*X+np.random.randn(100,1)

#超参数
learning_rate=0.01
n_iterations=1000
m=len(y)

w=np.random.randn(1,1) #初始化权重
b=0.0 #初始化偏置

#梯度下降
for epoch in range(n_iterations):
    y_pred=X.dot(w)+b
    error=y_pred-y#误差

    #计算梯度
    dw=2/m*X.T.dot(error)
    db=2/m*np.sum(error)

    #更新参数
    w=w-learning_rate*dw
    b=b-learning_rate*db

# 手撕LR的梯度下降

In [None]:
import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def compute_loss(x,y,theta):
    m=len(y)
    y_pred=sigmoid(np.dot(x,theta))
    loss=-1/m*(np.dot(y.T,np.log(y_pred))+np.dot((1-y).T,np.log(1-y_pred)))
    return loss

def compute_gradient(x,y,theta):
    m=len(y)
    y_pred=sigmoid(np.dot(x,theta))
    gradient=1/m*np.dot(x.T,(y_pred-y))
    return gradient

#添加截距项
def add_intercept(x):
    intercept=np.ones((x.shape[0],1))
    return np.concatenate((intercept,x),axis=1)

def gradient_descent(x,y,learning_rate=0.01,n_iterations=1000):
    loss_history=[]
    for i in range(n_iterations):
        gradient=compute_gradient(x,y,theta)
        theta=theta-learning_rate*gradient
        loss=compute_loss(x,y,theta)
        loss_history.append(loss)
    return theta,loss_history

def predict(x,theta,threshold=0.5):
    x=add_intercept(x)
    probs=sigmoid(np.dot(x,theta))
    return (probs>=threshold).astype(int)

# 自注意力机制

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class SingleHeadSelfAttention(nn.Module):
    def __intt__(self,d_model,d_k,d_v):
        super(SingleHeadSelfAttention,self).__init__()
        self.d_model=d_model
        self.d_k=d_k
        self.d_v=d_v

        self.W_q=nn.Linear(d_model,d_k)
        self.W_k=nn.Linear(d_model,d_k)
        self.W_v=nn.Linear(d_model,d_v)

        self.fc=nn.Linear(d_v,d_model)

    def forward(self,queries,keys,values,mask=None):
        Q=self.W_q(queries)
        K=self.W_k(keys)
        V=self.W_v(values)

        scores=torch.matmul(Q,K.transpose(-2,-1))/torch.sqrt(torch.tensor(self.d_k,dtype=torch.float32))

        if mask is not None:
            scores=scores.masked_fill(mask==0,float('-1e9'))

        attention_weights=F.softmax(scores,dim=-1)
        out=torch.matmul(attention_weights,V)
        out=self.fc(out)
        return out