In [1]:
from hdfs import InsecureClient
from datetime import datetime
import posixpath as psp

In [2]:
client = InsecureClient(url='http://localhost:9870', root='/')

In [3]:
# Helper
def perms(p, prefix='-'):
    s = ''
    for idx in range(0, 3):
        if int(p[idx]) & 4 is 4: s = s + 'r'
        else: s = s + '-'
        if int(p[idx]) & 2 is 2: s = s + 'w'
        else: s = s + '-'
        if int(p[idx]) & 1 is 1: s = s + 'x'
        else: s = s + '-'

    return prefix + s

# helper to print stats about a file/directory
# the prefix is the first printed permission,
# a 'd' for directory otherwise '-'
def printfile(name, stats, prefix='-'):
    print(' '.join((\
        perms(stats['permission'], prefix),\
        '  -' if stats['replication'] is 0 else '%3d' % stats['replication'],\
        stats['owner'],\
        stats['group'],\
        '%10d' % stats['length'],\
        datetime.fromtimestamp(stats['modificationTime'] / 1000).strftime('%Y-%m-%d %H:%M'),\
        name)))

In [4]:
# 1. Make a directory named: /activity1/
client.makedirs(hdfs_path='/activity1/', permission=None)
client.makedirs(hdfs_path='/activity1/data/', permission=None)

In [5]:
# 2. Put the file RandomText.txt into HDFS as the path: /activity1/data/RandomText.txt
client.upload(hdfs_path='/activity1/data/', local_path='./RandomText.txt')

'/activity1/data/RandomText.txt'

In [6]:
# 3. List the contents of the directory /activity1/data/
client.list('/activity1/data')

['RandomText.txt']

In [7]:
# Move the HDFS file /activity1/data/RandomText.txt to /activity1/data/NotSoRandomText.txt
client.rename('/activity1/data/RandomText.txt', '/activity1/data/NotSoRandomText.txt')
client.list('/activity1/data')

['NotSoRandomText.txt']

In [8]:
with open('./RandomText.txt', 'r') as f:
    for line in f:
        temp = line

In [9]:
# 5. Append the local file RandomText.txt to the end of the HDFS file: /activity1/data/NotSoRandomText.txt
client.write(hdfs_path='/activity1/data/NotSoRandomText.txt', data=temp, append=True)

In [10]:
# 6. List the disk space used by the directory /activity1/data/
diskSpaceUsed = client.content('/activity1/data/', strict=True)
diskSpaceUsed['spaceConsumed']

65538

In [11]:
# 7. Put the local file MoreRandomText.txt into HDFS as the path: /activity1/data/MoreRandomText.txt
client.upload(hdfs_path='/activity1/data/', local_path='./MoreRandomText.txt')

'/activity1/data/MoreRandomText.txt'

In [12]:
# 8. Recursively list the contents of the directory /activity1/
fnames = client.list('/activity1')
print(fnames)

fpaths = [
  psp.join(dpath, fname)
  for dpath, _, fnames in client.walk('/activity1')
  for fname in fnames
]

print(fpaths)

['data']
['/activity1/data/MoreRandomText.txt', '/activity1/data/NotSoRandomText.txt']


In [13]:
# 9. Remove the directory /activity1/ and all files/directories underneath it
client.delete(hdfs_path='/activity1', recursive=True)

True

In [14]:
client.list('/')

['rmstate']