In [12]:
from nltk.translate.bleu_score import sentence_bleu
def cumulative_bleu(references, candidate):
    bleu_1_gram=sentence_bleu(references, candidate, weights=(1, 0, 0, 0))
    bleu_2_gram=sentence_bleu(references, candidate, weights=(0.5, 0.5, 0, 0))
    bleu_3_gram=sentence_bleu(references, candidate, weights=(0.3, 0.3, 0.3, 0))
    bleu_4_gram=sentence_bleu(references, candidate, weights=(0.25, 0.25, 0.25, 0.25))
    return bleu_1_gram, bleu_2_gram, bleu_3_gram, bleu_4_gram

candidate_text=["This","is","some","generated","text"]

reference_texts=[["This","is","some","reference","text"]]
#,["This","is","another","reference","text"]
c_bleu=cumulative_bleu(reference_texts, candidate_text)
print(c_bleu)
# 几何加权平均的结果

(0.8, 0.6324555320336759, 0.5463634277011612, 7.380245217279165e-78)
0.6


In [13]:
from rouge import Rouge
generated_text="This is some generated text"


reference_texts=["This is some reference text","This is another reference text"]

rouge=Rouge()
scores=rouge.get_scores(generated_text, reference_texts[0])
print(scores)

[{'rouge-1': {'r': 0.8, 'p': 0.8, 'f': 0.7999999950000002}, 'rouge-2': {'r': 0.5, 'p': 0.5, 'f': 0.4999999950000001}, 'rouge-l': {'r': 0.8, 'p': 0.8, 'f': 0.7999999950000002}}]


In [15]:
import math

sentences=[['this','is','a','tree'],['this','is','a','tree']]
unigram={'this':3/10,'is':1/10,'a':3/10,'book':1/10,'tree':4/10}

PPL=0
# for s in sentences:
#     PPL+=math.exp(-sum([math.log(unigram[w]) for w in s]))

for sentence in sentences:
    logs=0
    for word in sentence:
        logs+=math.log(unigram[word],2)
    temp=-logs/len(sentence)
    perplexity=2**temp
    PPL=PPL+perplexity
print(PPL/len(sentences))

4.0824829046386295


In [7]:
# 多线程和多进程是并发的两种方式，可以利用cpu的多核心，使得程序的运行效率更高
# I/O密集型任务
import threading
import time
import requests

def download(url):
    print(f"开始下载: {url}")
    response = requests.get(url)
    print(f"{url} 下载完成，大小: {len(response.content)}字节")

urls = [
    "https://www.python.org",
    "https://www.baidu.com",
    "https://www.github.com"
]

# 单线程执行（基准）
start = time.time()
for url in urls:
    download(url)
print(f"单线程耗时: {time.time() - start:.2f}秒\n")

# 多线程执行
threads = []
start = time.time()
for url in urls:
    t = threading.Thread(target=download, args=(url,))
    t.start()
    threads.append(t)

for t in threads:
    t.join()  # 等待所有线程结束
print(f"多线程耗时: {time.time() - start:.2f}秒")

开始下载: https://www.python.org
https://www.python.org 下载完成，大小: 49992字节
开始下载: https://www.baidu.com
https://www.baidu.com 下载完成，大小: 2443字节
开始下载: https://www.github.com
https://www.github.com 下载完成，大小: 286878字节
单线程耗时: 3.71秒

开始下载: https://www.python.org
开始下载: https://www.baidu.com
开始下载: https://www.github.com
https://www.baidu.com 下载完成，大小: 2443字节
https://www.python.org 下载完成，大小: 49992字节
https://www.github.com 下载完成，大小: 286877字节
多线程耗时: 2.16秒


In [27]:
import threading

# 共享资源
counter = 0
lock = threading.Lock()  # 创建锁对象

def increment():
    global counter
    for _ in range(100000):  # 大量操作暴露同步问题
        # 使用锁保护关键代码段
        with lock:  # 自动获取和释放锁
            counter += 1



# 创建多个线程
threads = []
for i in range(6):
    t = threading.Thread(target=increment)
    threads.append(t)
    t.start()

# 等待所有线程完成
for t in threads:
    t.join()

print(f"最终计数器值: {counter}")  # 应该是500000

最终计数器值: 600000


In [36]:
from concurrent.futures import ThreadPoolExecutor
import requests
import time

# 下载网页的函数
def download(url):
    start = time.time()
    response = requests.get(url)
    duration = time.time() - start
    size = len(response.content)
    return f"{url}: {size}字节, 耗时: {duration:.2f}秒"

# 网页列表
urls = [
    "https://www.python.org",
    "https://www.baidu.com",
    "https://www.github.com",
    "https://www.google.com",
    "https://www.microsoft.com"
]

# 使用线程池执行
start_time = time.time()
with ThreadPoolExecutor(max_workers=5) as executor:
    # 提交任务到线程池
    futures = [executor.submit(download, url) for url in urls]
    
    # 获取结果
    for future in futures:
        print(future.result())

print(f"总耗时: {time.time() - start_time:.2f}秒")

https://www.python.org: 49992字节, 耗时: 1.05秒
https://www.baidu.com: 2443字节, 耗时: 0.12秒
https://www.github.com: 286904字节, 耗时: 1.29秒
https://www.google.com: 18265字节, 耗时: 0.79秒
https://www.microsoft.com: 201253字节, 耗时: 0.25秒
总耗时: 1.29秒


In [39]:
import multiprocessing
import time
import os

# 进程执行的任务函数
def cpu_intensive_task(n):
    print(f"进程 {os.getpid()} 开始计算 {n}",flush=True)
    result = 0
    for i in range(n):
        result += i * i
    print(f"进程 {os.getpid()} 完成计算 {n}, 结果: {result}",flush=True)
    return result

if __name__ == "__main__":  # 必须添加的防护
    # 任务列表
    tasks = [10000000, 15000000, 20000000]
    
    # 单进程执行
    print("单进程执行:")
    start = time.time()
    for task in tasks:
        cpu_intensive_task(task)
    print(f"单进程耗时: {time.time() - start:.2f}秒\n")
    
    # 多进程执行
    print("多进程执行:")
    processes = []
    start = time.time()
    for task in tasks:
        p = multiprocessing.Process(target=cpu_intensive_task, args=(task,))
        p.start()
        processes.append(p)
    
    # 等待所有进程完成
    for p in processes:
        p.join()
    
    print(f"多进程总耗时: {time.time() - start:.2f}秒")

单进程执行:
进程 18976 开始计算 10000000
进程 18976 完成计算 10000000, 结果: 333333283333335000000
进程 18976 开始计算 15000000
进程 18976 完成计算 15000000, 结果: 1124999887500002500000
进程 18976 开始计算 20000000
进程 18976 完成计算 20000000, 结果: 2666666466666670000000
单进程耗时: 3.15秒

多进程执行:
多进程总耗时: 0.37秒


In [47]:
import multiprocessing

def worker(k):
    print("子进程输出")
    k.append('nihao')

if __name__ == '__main__':
    k=[]
    p = multiprocessing.Process(target=worker, args=(k,))
    p.start()
    p.join()  # 等待子进程结束
    print(k)

[]


In [6]:
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor

# 线程池处理I/O任务
with ThreadPoolExecutor(max_workers=5) as executor:
    executor.map(download, urls)  # urls为下载链接列表

# 进程池处理CPU任务
if __name__ == "__main__":
    with ProcessPoolExecutor() as executor:
        results = executor.map(calculate, numbers)  # numbers为计算参数

开始下载: https://www.python.org开始下载: https://www.baidu.com

开始下载: https://www.github.com
https://www.baidu.com 下载完成，大小: 2443字节
https://www.python.org 下载完成，大小: 49992字节
https://www.github.com 下载完成，大小: 286878字节


In [None]:
import threading
counter = 0
lock = threading.Lock()

def safe_increment():
    global counter
    with lock:  # 自动获取和释放锁
        counter += 1

from multiprocessing import Queue
q = Queue()

def worker(q):
    q.put("子进程数据")

if __name__ == "__main__":
    p = multiprocessing.Process(target=worker, args=(q,))
    p.start()
    print(q.get())  # 输出: 子进程数据
    p.join()