Skip to content
This repository has been archived by the owner on Oct 11, 2023. It is now read-only.

Overlay volumes are out of control #928

Closed
smazurov opened this issue May 10, 2016 · 10 comments
Closed

Overlay volumes are out of control #928

smazurov opened this issue May 10, 2016 · 10 comments
Assignees

Comments

@smazurov
Copy link

smazurov commented May 10, 2016

When running sudo df logged in as rancher via ssh, I get 2000+ entities such as this:

overlay               29351944   8604624  19389640  31% /var/lib/docker/overlay/b276a4fc6a0ba41534242436f94469cf19395f9ee2cdfaf25c71c64d68eca5af/merged
overlay               29351944   8604624  19389640  31% /var/lib/docker/overlay/b276a4fc6a0ba41534242436f94469cf19395f9ee2cdfaf25c71c64d68eca5af/merged

They're all the same folder, but once in a while the size changes.

Tracking it down, its a container than I start on a schedule using rancher-cron (something I am working on as a simple cron solution) once a minute. The container itself is set to io.rancher.container.start_once=true and maintained by rancher.

Output of docker inspect:

[
    {
        "Id": "ab407a3abe6fe864c2251a1b9269c7544b9e6c0c9121fc604e3548db86f6626d",
        "Created": "2016-05-07T00:27:33.477429259Z",
        "Path": "docker/startup",
        "Args": [
            "redacted",
        ],
        "State": {
            "Status": "exited",
            "Running": false,
            "Paused": false,
            "Restarting": false,
            "OOMKilled": false,
            "Dead": false,
            "Pid": 0,
            "ExitCode": 0,
            "Error": "",
            "StartedAt": "2016-05-09T19:58:20.768540168Z",
            "FinishedAt": "2016-05-09T19:58:29.30269597Z"
        },
        "Image": "redacted",
        "ResolvConfPath": "/var/lib/docker/containers/ab407a3abe6fe864c2251a1b9269c7544b9e6c0c9121fc604e3548db86f6626d/resolv.conf",
        "HostnamePath": "/var/lib/docker/containers/ab407a3abe6fe864c2251a1b9269c7544b9e6c0c9121fc604e3548db86f6626d/hostname",
        "HostsPath": "/var/lib/docker/containers/ab407a3abe6fe864c2251a1b9269c7544b9e6c0c9121fc604e3548db86f6626d/hosts",
        "LogPath": "/var/lib/docker/containers/ab407a3abe6fe864c2251a1b9269c7544b9e6c0c9121fc604e3548db86f6626d/ab407a3abe6fe864c2251a1b9269c7544b9e6c0c9121fc604e3548db86f6626d-json.log",
        "Name": "redacted",
        "RestartCount": 0,
        "Driver": "overlay",
        "MountLabel": "",
        "ProcessLabel": "",
        "AppArmorProfile": "",
        "ExecIDs": null,
        "HostConfig": {
            "Binds": null,
            "ContainerIDFile": "",
            "LogConfig": {
                "Type": "json-file",
                "Config": {
                    "max-file": "2",
                    "max-size": "25m"
                }
            },
            "NetworkMode": "default",
            "PortBindings": null,
            "RestartPolicy": {
                "Name": "",
                "MaximumRetryCount": 0
            },
            "VolumeDriver": "",
            "VolumesFrom": null,
            "CapAdd": null,
            "CapDrop": null,
            "Dns": [
                "169.254.169.250"
            ],
            "DnsOptions": null,
            "DnsSearch": [
                "redacted",
                "rancher.internal"
            ],
            "ExtraHosts": null,
            "GroupAdd": null,
            "IpcMode": "",
            "Links": null,
            "OomScoreAdj": 0,
            "PidMode": "",
            "Privileged": false,
            "PublishAllPorts": false,
            "ReadonlyRootfs": false,
            "SecurityOpt": null,
            "UTSMode": "",
            "ShmSize": 67108864,
            "ConsoleSize": [
                0,
                0
            ],
            "Isolation": "",
            "CpuShares": 0,
            "CgroupParent": "",
            "BlkioWeight": 0,
            "BlkioWeightDevice": null,
            "BlkioDeviceReadBps": null,
            "BlkioDeviceWriteBps": null,
            "BlkioDeviceReadIOps": null,
            "BlkioDeviceWriteIOps": null,
            "CpuPeriod": 0,
            "CpuQuota": 0,
            "CpusetCpus": "",
            "CpusetMems": "",
            "Devices": null,
            "KernelMemory": 0,
            "Memory": 0,
            "MemoryReservation": 0,
            "MemorySwap": 0,
            "MemorySwappiness": -1,
            "OomKillDisable": false,
            "PidsLimit": 0,
            "Ulimits": null
        },
        "GraphDriver": {
            "Name": "overlay",
            "Data": {
                "LowerDir": "/var/lib/docker/overlay/827fa1ac20bdb1e95033f8765ff100e87b741dc6d6b1961462976daf22770bf5/root",
                "MergedDir": "/var/lib/docker/overlay/b276a4fc6a0ba41534242436f94469cf19395f9ee2cdfaf25c71c64d68eca5af/merged",
                "UpperDir": "/var/lib/docker/overlay/b276a4fc6a0ba41534242436f94469cf19395f9ee2cdfaf25c71c64d68eca5af/upper",
                "WorkDir": "/var/lib/docker/overlay/b276a4fc6a0ba41534242436f94469cf19395f9ee2cdfaf25c71c64d68eca5af/work"
            }
        },
        "Mounts": [
            {
                "Name": "4e508d7e2944e34ed79514880209fd26605fd88e3750ba30ecb2262fbd9d692e",
                "Source": "/var/lib/docker/volumes/4e508d7e2944e34ed79514880209fd26605fd88e3750ba30ecb2262fbd9d692e/_data",
                "Destination": "/app/storage",
                "Driver": "local",
                "Mode": "",
                "RW": true,
                "Propagation": ""
            }
        ],
        "Config": {
            "Hostname": "ab407a3abe6f",
            "Domainname": "",
            "User": "",
            "AttachStdin": false,
            "AttachStdout": false,
            "AttachStderr": false,
            "ExposedPorts": {
                "80/tcp": {}
            },
            "Tty": false,
            "OpenStdin": false,
            "StdinOnce": false,
            "Env": [
                "redacted"
            ],
            "Cmd": [
                "php",
                "artisan",
                "schedule:run"
            ],
            "Image": "[redacted]",
            "Volumes": {
                "/app/storage": {}
            },
            "WorkingDir": "/app",
            "Entrypoint": [
                "docker/startup"
            ],
            "MacAddress": "02:9e:59:c2:f9:7b",
            "OnBuild": null,
            "Labels": {
                "io.rancher.container.ip": "10.42.217.199/16",
                "io.rancher.container.name": "[redacted]",
                "io.rancher.container.pull_image": "always",
                "io.rancher.container.start_once": "true",
                "io.rancher.container.uuid": "16822e5b-d62b-4256-a18d-0de8457c87bb",
                "io.rancher.cron.schedule": "0 * * * * * ",
                "io.rancher.project.name": "[redacted]",
                "io.rancher.project_service.name": "[redacted]",
                "io.rancher.scheduler.affinity:host_label": "type=node",
                "io.rancher.service.deployment.unit": "[redacted]",
                "io.rancher.service.launch.config": "io.rancher.service.primary.launch.config",
                "io.rancher.stack.name": "[redacted]",
                "io.rancher.stack_service.name": "[redacted]"
            }
        },
        "NetworkSettings": {
            "Bridge": "",
            "SandboxID": "",
            "HairpinMode": false,
            "LinkLocalIPv6Address": "",
            "LinkLocalIPv6PrefixLen": 0,
            "Ports": null,
            "SandboxKey": "[redacted, because key could mean private]",
            "SecondaryIPAddresses": null,
            "SecondaryIPv6Addresses": null,
            "EndpointID": "",
            "Gateway": "",
            "GlobalIPv6Address": "",
            "GlobalIPv6PrefixLen": 0,
            "IPAddress": "",
            "IPPrefixLen": 0,
            "IPv6Gateway": "",
            "MacAddress": "",
            "Networks": {
                "bridge": {
                    "IPAMConfig": null,
                    "Links": null,
                    "Aliases": null,
                    "NetworkID": "73031b501fba177f5e59753ceb8dcd0e1c00e8285d7b145b430a672f735fdcd8",
                    "EndpointID": "",
                    "Gateway": "",
                    "IPAddress": "",
                    "IPPrefixLen": 0,
                    "IPv6Gateway": "",
                    "GlobalIPv6Address": "",
                    "GlobalIPv6PrefixLen": 0,
                    "MacAddress": ""
                }
            }
        }
    }
]

output of docker info:

[rancher@ip-10-0-2-101 ~]$ docker info
Containers: 9
 Running: 8
 Paused: 0
 Stopped: 1
Images: 21
Server Version: 1.10.3
Storage Driver: overlay
 Backing Filesystem: extfs
Execution Driver: native-0.2
Logging Driver: json-file
Plugins:
 Volume: local
 Network: bridge null host
Kernel Version: 4.2.8-ckt4-rancher
Operating System: RancherOS (containerized)
OSType: linux
Architecture: x86_64
CPUs: 1
Total Memory: 1.956 GiB
Name: ip-10-0-2-101.us-west-1.compute.internal

This also spun t2.small instance out of control (not entirely sure as there were other services on that machine), leading to 30+ load, with very minimal actual usage, filled with lines like:

30120 25538 root     R N  10988   1%   0% du -s /var/lib/docker/overlay/b276a4fc6a0ba41534242436f94469cf19395f9ee2cdfaf25c71c64d68eca5af
30911 25538 root     R N  10788   1%   0% du -s /var/lib/docker/overlay/b276a4fc6a0ba41534242436f94469cf19395f9ee2cdfaf25c71c64d68eca5af

As soon as the box had containers removed from it, it stopped having load problems.

I am now re-running everything using an Ubuntu 14.04 instance to see if the issue is with RancherOS or not.

Unfortunately, before I figured that its a good idea to open this issue, I have terminated the box, so I cannot pull any additional from the instance.

RancherOS Version: 0.4.4
Where are you running RancherOS? AWS

@joshwget
Copy link
Contributor

joshwget commented Jun 2, 2016

I can't think of a reason that RancherOS would behave differently than any other distro here. If you do see different results on Ubuntu though, we'll definitely look into this!

@smazurov
Copy link
Author

smazurov commented Jun 2, 2016

I have tested it on Ubuntu, and the issue is not present. My other thought would be Storage Driver differences, since rancherOS is overlay, while Ubuntu is aufs by default.

@PanJ
Copy link

PanJ commented Jun 16, 2016

Currently having the same issue. Not sure if it is related to overlay's issue about exhausting inode. We use Rancher together with RancherOS and have to upgrade the container several times a day. Considering to move out of RancherOS for now.

@joshwget joshwget added this to the v1.0 Milestone milestone Sep 1, 2016
@deniseschannon deniseschannon modified the milestone: v1.0 Milestone Oct 5, 2016
@Nuxij
Copy link

Nuxij commented Mar 11, 2017

I have also seen huge lists making df unusable, I've not done any testing towards what causes it though

@ryanrca
Copy link

ryanrca commented Mar 26, 2018

Is there a workaround for this? I awoke this morning to many downed services (that have been working fine for months) and discovered all hosts were out of disk space. There are thousands of these entries in /var/lib/docker/overlay.

I'd like to figure out how to fix it quickly vs, re-deploying new hosts and updating DNS.

@niusmallnan niusmallnan added this to the v1.4.0 milestone Mar 27, 2018
@niusmallnan
Copy link
Contributor

niusmallnan commented Mar 30, 2018

@JacieChao
We have upgraded the system-docker to 17.06 in v1.3.0 and used overlay2 as default storage driver.
Please help me to check if this issue can be mitigated.

@JacieChao
Copy link
Contributor

We could set overlay2 for both system-docker and user docker as storage driver. According to my verification,this issue can be mitigated by using overlay2.

The verify steps are as follows:

  1. I created 2 RancherOS hosts to compare with overlay and overlay2.
  • overlay host
$ sudo docker info | grep Driver

Storage Driver: overlay
Logging Driver: json-file
Cgroup Driver: cgroupfs
  • overlay2 host
$ sudo docker info | grep Driver

Storage Driver: overlay2
Logging Driver: json-file
Cgroup Driver: cgroupfs
  1. I started a rancher server and added 2 test hosts.
  2. I started on a schedule using rancher-cron, and created several services on both hosts.
  3. I upgraded the container serveral times on each hosts, then compared with the storage information between overlay and overlay2. The results are as follows.
$ sudo df -h | grep overlay2

/dev/xvda1                7.4G      4.0G      3.0G  57% /var/lib/docker/overlay2
overlay                   7.4G      4.0G      3.0G  57% /var/lib/docker/overlay2/93c99dbea48a67be8a36f46d680f3e0c0226392b514515168a970d521313b3ec/merged
overlay                   7.4G      4.0G      3.0G  57% /var/lib/docker/overlay2/2395544c6ba51b71c8dd8b6d98713fb91a7fd10288be9f2a83c5175391018854/merged
overlay                   7.4G      4.0G      3.0G  57% /var/lib/docker/overlay2/8920991104f4320508a921602984ac3337703487d75575a9a8e47731337b925b/merged
overlay                   7.4G      4.0G      3.0G  57% /var/lib/docker/overlay2/57fbd3b2af19a33f76ca081303a142454bd815b0b08bf3d4ed991ee3a3d17e1f/merged
overlay                   7.4G      4.0G      3.0G  57% /var/lib/docker/overlay2/ea76bb8d8e450a3e866463b9964a87a7808d439d126790a81cdf9ae7fbc8013c/merged
overlay                   7.4G      4.0G      3.0G  57% /var/lib/docker/overlay2/46d7d51797589165924502bea0e237f53c5c5801f4dd45b221224c0c1ac25f30/merged
overlay                   7.4G      4.0G      3.0G  57% /var/lib/docker/overlay2/559b54a8a78efb7e2c5095429eba110c0389a792e9d31c14de82f88d02c46a99/merged
overlay                   7.4G      4.0G      3.0G  57% /var/lib/docker/overlay2/d7ffe4c2aa3882470ae1bad7438c5ba739cbc7a3c8f537457029df5b0c5a8bca/merged
overlay                   7.4G      4.0G      3.0G  57% /var/lib/docker/overlay2/25ac933128f55bda95b59f7fc65989cdb64e7e809769f25011a8fcad3974c166/merged
overlay                   7.4G      4.0G      3.0G  57% /var/lib/docker/overlay2/abe2815e6d021612f7e70b8e21d10cb4bc6e97f63005df5089b87f6d6302ee60/merged
overlay                   7.4G      4.0G      3.0G  57% /var/lib/docker/overlay2/a426fcf04528f527b632bb2e6e06cafcec826952ec3d90bf3b0d4b482ccbdfe3/merged
overlay                   7.4G      4.0G      3.0G  57% /var/lib/docker/overlay2/3ca2991ceff54cd46236069136df2dfddbe8e91d8d09729c8d2b2c5262059a9a/merged
overlay                   7.4G      4.0G      3.0G  57% /var/lib/docker/overlay2/50a03f35c81e5668a0ce55aff01dbb9390d408e11f9f4435728e09559f8b98f6/merged
overlay                   7.4G      4.0G      3.0G  57% /var/lib/docker/overlay2/7c5017d8271d161c16984866c46b9228f246bb1d0196847832c27a659714233d/merged
$ sudo df -h | grep overlay

overlay                   7.4G      4.6G      2.4G  66% /
/dev/xvda1                7.4G      4.6G      2.4G  66% /var/lib/docker/overlay
overlay                   7.4G      4.6G      2.4G  66% /var/lib/docker/overlay/cb42017e5a4c0d9ccfef196be2369b0d5b2dc14619182ce6d0766b8cdb0d1bc0/merged
overlay                   7.4G      4.6G      2.4G  66% /var/lib/docker/overlay/3afa1fa49a90c8bfa55a0b6212aac3fd71f303b99366656ddc01164912c1d6ca/merged
overlay                   7.4G      4.6G      2.4G  66% /var/lib/docker/overlay/fede913818660ed80a151b8c3e6aef125bac726726536a50f9f6ed6488c45d31/merged
overlay                   7.4G      4.6G      2.4G  66% /var/lib/docker/overlay/bf4d685a8f1ec9c05585c3496c65b9749b56d80341319bd81f8326ab8c46237f/merged
overlay                   7.4G      4.6G      2.4G  66% /var/lib/docker/overlay/ee00fb71a591148bd6cef5e7b003fed4170fa84922fa18f22fcbefb58c0a0817/merged
overlay                   7.4G      4.6G      2.4G  66% /var/lib/docker/overlay/d3340b750da06ab6d6d71bef8d89147895070a68ce84569c7401ee04a01dd1a6/merged
overlay                   7.4G      4.6G      2.4G  66% /var/lib/docker/overlay/e22bc554ff5b2f57eb92662e823acaa4e06c03a539023925102a7362bceae4a4/merged
overlay                   7.4G      4.6G      2.4G  66% /var/lib/docker/overlay/ec9b66e4fc940cc1741d0f95f69f521c52faaca4f8f8083de7e2d5c47790c6e6/merged
overlay                   7.4G      4.6G      2.4G  66% /var/lib/docker/overlay/e64e869ea6da2e6450e3ee0806bdf27bc481ff0c92bb8bccf6dc6187e57ae68a/merged
overlay                   7.4G      4.6G      2.4G  66% /var/lib/docker/overlay/f150b5b3de6b8bd7ace765a93c42db4da5db0594bf9f6dac1de460ba81558a83/merged
overlay                   7.4G      4.6G      2.4G  66% /var/lib/docker/overlay/37af5a231512b2a9b046bf28719de31c5bc199fdbc8664ce2479c9bb911b3e19/merged
overlay                   7.4G      4.6G      2.4G  66% /var/lib/docker/overlay/af47ec54ba63b64ac0f0a538a2e081cb79606efb2fc53aea743c6a82b0622a46/merged

As the result, using overlay2 as storage driver can save almost 10% storage, and the storage growth was not obvious when upgrading the container. In my test, When running sudo df, I didn't get repeat entities of overlay or overlay2. I think this issue can be mitigated by using overlay2 as storage driver.

@niusmallnan
Copy link
Contributor

Close it.
If anyone has any questions, please let me know.

@cmer
Copy link

cmer commented Feb 9, 2019

What's the proper way to fix this?

@niusmallnan niusmallnan removed this from the v1.4.0 milestone Mar 1, 2019
Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Projects
None yet
Development

No branches or pull requests

9 participants