Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compared with read/write operations, AIO has higher latency overhead #6

Closed
zhangqiang-01 opened this issue Sep 24, 2020 · 1 comment

Comments

@zhangqiang-01
Copy link

I created a 40M file and used 4k random read and write to testing it。

When 50 working goroutines and each coroutine execute 1000 io tasks
r/w latency: 1.469s
aio latency: 6.378s

const.go

package compared
     
const (
    goroutineNum = 50                                                                                                                                                                                                                                                           
    rwNum        = 1000
    fileSize     = 4 * 100 * 1024 * 1024
    sloat        = fileSize / (4 * 1024)
)    
     
// only use in linux
const O_DIRECT = 0x100000

test code: r/w

package compared

import (
	"fmt"
	"math/rand"
	"os"
	"sync"
	"testing"
	"time"
)

func dorw(wg *sync.WaitGroup, file *os.File) {
	buf := make([]byte, 4096)
	for i := 0; i < rwNum; i++ {
		randNum := rand.Int31()
		op := randNum % int32(2)
		off := int64((randNum % int32(sloat)) * 4096)

                start := time.Now()
		if op == 0 {

			n, _ := file.ReadAt(buf, off)
			if n != 4096 {
				panic("n != 4096")
			}
		} else {

			n, _ := file.WriteAt(buf, off)
			if n != 4096 {
				panic("n != 4096")
			}
		}
               end := time.Now()

               fmt.Printf("op:%d %d\n", op, end.UnixNano() - start.UnixNano())
	}

	wg.Done()
}

func TestRW(t *testing.T) {
	// 环境准备
	file, err := os.OpenFile("1.txt", os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
	if err != nil {
		t.Fatalf("open 1.txt failed, err: %v", err)
	}
	// 创建指定大小的文件
	file.Seek(fileSize, 0)
	file.Write([]byte("aaa"))
	file.Close()

	file, err = os.OpenFile("1.txt", os.O_RDWR | O_DIRECT, 0644)
	if err != nil {
		t.Fatalf("open 1.txt failed, err: %v", err)
	}

	start := time.Now()
	wg := &sync.WaitGroup{}
	wg.Add(goroutineNum)
	for i := 0; i < goroutineNum; i++ {
		go dorw(wg, file)
	}
	wg.Wait()
	end := time.Now()

	fmt.Printf("rw delay %d ms\n", (end.UnixNano()-start.UnixNano())/(1000*1000))

	file.Close()
}

test code: aio

package compared

import (
	"fmt"
	"go-aio"
	"math/rand"
	"os"
	"sync"
	"testing"
	"time"
)

func do(wg *sync.WaitGroup, file *os.File, gaio *aio.AIO) {
	buf := make([]byte, 4096)
	for i := 0; i < rwNum; i++ {
		randNum := rand.Int31()
		op := randNum % int32(2)
		off := int64((randNum % int32(sloat)) * 4096)

                start := time.Now()
		if op == 0 {
			n, _ := gaio.DoRequest(aio.ReadOP, file, buf, off)
			if n != 4096 {
				panic("n != 4096")
			}
		} else {
			n, _ := gaio.DoRequest(aio.WriteOP, file, buf, off)
			if n != 4096 {
				panic("n != 4096")
			}

		}
               end := time.Now()

               fmt.Printf("op:%d %d\n", op, end.UnixNano() - start.UnixNano())
	}

	wg.Done()
}

func TestAIO(t *testing.T) {
	// 环境准备
	file, err := os.OpenFile("1.txt", os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
	if err != nil {
		t.Fatalf("open 1.txt failed, err: %v", err)
	}
	// 创建指定大小的文件
	file.Seek(fileSize, 0)
	file.Write([]byte("aaa"))
	file.Close()

	file, err = os.OpenFile("1.txt", os.O_RDWR | O_DIRECT, 0644)
	if err != nil {
		t.Fatalf("open 1.txt failed, err: %v", err)
	}

	gaio := aio.Setup(8, 4096)

	start := time.Now()
	wg := &sync.WaitGroup{}
	wg.Add(goroutineNum)
	for i := 0; i < goroutineNum; i++ {
		go do(wg, file, gaio)
	}
	wg.Wait()
	end := time.Now()

	fmt.Printf("aio rw delay %d ms\n", (end.UnixNano()-start.UnixNano())/(1000*1000))

	file.Close()
}
@traetox
Copy link
Owner

traetox commented Sep 24, 2020

This isn't all that surprising, the Linux AIO interfaces are seriously neglected. It also appears that the performance of the AIO interface is deeply dependent on the underlying filesystem, and most filesystems appear to barely implement the interface let alone optimize it.

If you want good performance with very high concurrency, you are almost always better off with pre-allocated memory mapped files. The golang scheduler is now so good that pure read/write calls with go routines will probably always be faster too (as you have seen here). Pure Read/Write calls will certainly be more consistent across different filesystems.

@traetox traetox closed this as completed Sep 24, 2020
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants