In [1]:
from random import randint
from sys import getsizeof
from multiprocessing import Process
import os
from datetime import datetime
import math

In [2]:
def create_data(filename='data.txt', size=17179869184):
    """
    Генерация чисел и запись в файл.
    filename - имя файла;
    size - минимальный размер файла.
    """
    length = 0
    with open(filename, 'wt') as file:
        while length < size:
            n = randint(0, 4294967295)
            s = f'{n}\n'
            file.write(s)
            length += getsizeof(s)

In [3]:
def read_data(filename='data.txt'):
    """
    Построчное чтение чисел из файла.
    Возвращает список из прочитанных чисел.
    """
    data = []
    with open(filename, 'rt') as file:
        for line in file:
            data.append(int(line))
    return data

In [4]:
def write_to_file(data, i):
    """
    Запись в файл минимального и максимального чисел из полученного списка, а также суммы всех чисел.
    """
    with open('res%i' %i, 'wt') as file:
        file.write("%i\n%i\n%i" %
                   (min(data), max(data), sum(data)))

In [5]:
def get_part(data, num, part):
    """
    Разбиение списка data на num частей, возврат части с номером part (нумерация с единицы).
    """
    avg = len(data) / float(num)
    start = avg*(part-1)
    stop = avg*part
    return data[int(start):int(stop)]

In [6]:
def process_data(data, mode='single'):
    """
    Поиск минимального и максимального числа в наборе data, нахождение суммы всех чисел.
    data - набор чисел;
    mode - режим работы.
    Режимы работы: single, fork, multiprocessing.
    """
    if mode == 'single':
        return min(data), max(data), sum(data)
    else:
        min_int, max_int, sum_int = None, None, None
        size = len(data)//6
        children = []
        for i in range(1, 7):
            if mode == 'fork':
                p = os.fork()
                if not p:
                    write_to_file(get_part(data, 6, i), i)
                    os.abort()
                else:
                    children.append(int(p))
            elif mode == 'multiprocessing':
                children.append(Process(target=write_to_file, args=(get_part(data, 6, i), i)))
                children[-1].start()
        for p in children:
            if mode == 'fork':
                os.waitpid(p, 0)
            elif mode == 'multiprocessing':
                p.join()
        min_int, max_int, sum_int = [], [], []
        for i in range(1, 7):
            with open('res%i' %i, 'rt') as file:
                min_int.append(int(file.readline()))
                max_int.append(int(file.readline()))
                sum_int.append(int(file.readline()))
    return min(min_int), max(max_int), sum(sum_int)

In [7]:
def process_part(filename, i):
    sum_int = 0
    min_int = math.inf
    max_int = -1
    with open(filename, 'rt') as file:
        for index, line in enumerate(file):
            if not index % i:
                num = int(line)
                if num < min_int:
                    min_int = num
                if num > max_int:
                    max_int = num
                sum_int += num
    with open('res%i' %i, 'wt') as file:
        file.write("%i\n%i\n%i" %
                   (min_int, max_int, sum_int))

In [13]:
def process_file(filename='data.txt', mode='single'):
    """
    Поиск минимального и максимального числа в наборе data, нахождение суммы всех чисел.
    filename - имя файла, содержащего набор чисел;
    mode - режим работы.
    Режимы работы: single, fork, multiprocessing.
    """
    sum_int = 0
    min_int = math.inf
    max_int = -1
    if mode == 'single':
        with open(filename, 'rt') as file:
            for line in file:
                num = int(line)
                if num < min_int:
                    min_int = num
                if num > max_int:
                    max_int = num
                sum_int += num
        return min_int, max_int, sum_int
    else:
        children = []
        for i in range(1, 7):
            if mode == 'fork':
                p = os.fork()
                if not p:
                    process_part(filename, i)
                    os.abort()
                else:
                    children.append(int(p))
            elif mode == 'multiprocessing':
                children.append(Process(target=process_part, args=(filename, i)))
                children[-1].start()
        for p in children:
            if mode == 'fork':
                os.waitpid(p, 0)
            elif mode == 'multiprocessing':
                p.join()
        for i in range(1, 7):
            with open('res%i' %i, 'rt') as file:
                num = int(file.readline())
                if num < min_int:
                    min_int = num
                num = int(file.readline())
                if num > max_int:
                    max_int = num
                sum_int += int(file.readline())
    return min_int, max_int, sum_int

In [12]:
timestamp = datetime.now()
create_data()
print(f"Creating data: {datetime.now()-timestamp}")
timestamp = datetime.now()
data = read_data()
print(f"Reading data: {datetime.now()-timestamp}\n")
modes = ['single', 'fork', 'multiprocessing']
for m in modes:
    timestamp = datetime.now()
    result = process_file(mode=m)
    print(f"Mode: {m}, time: {datetime.now()-timestamp},\n{result}\n")

Mode: single, time: 0:01:15.798151,
(19, 4294967293, 617548189501011737)

Mode: fork, time: 0:06:23.170225,
(19, 617548189501011737, 1512970349423174434)

Mode: multiprocessing, time: 0:06:42.748824,
(19, 617548189501011737, 1512970349423174434)



In [8]:
process_data(data, mode='single')

(135233364, 3960806896, 45330325847)

In [9]:
process_data(data, mode='multiprocessing')

(135233364, 3960806896, 45330325847)