Aim: Analyze and demonstrate the working of HDFS architecture and commands and interpret MapReduce concepts and job outputs without program execution.

In [45]:
import os

# Simulated HDFS root
hdfs_root = "/content/hdfs"

# Create directory structure
directories = [
    "user",
    "user/data",
    "user/data/input",
    "user/data/output",
    "tmp"
]

for dir_path in directories:
    os.makedirs(os.path.join(hdfs_root, dir_path), exist_ok=True)

os.listdir(hdfs_root)

#HDFS stores data in a hierarchical directory structure
#We simulate /user/data/input and /user/data/output
#This mirrors real HDFS paths like: hdfs dfs -mkdir /user/data/input

['tmp', 'user']

In [46]:
#2 Apply Various HDFS Commands (Interpretation + Simulation)
# Create a sample file
input_file = "/content/sample.txt"

with open(input_file, "w") as f:
    f.write("hadoop mapreduce hdfs")

input_file

'/content/sample.txt'

In [47]:
import shutil
shutil.copy(input_file, "/content/hdfs/user/data/input/sample.txt")

#Equivalent HDFS Command: hdfs dfs -put sample.txt /user/data/input/

'/content/hdfs/user/data/input/sample.txt'

In [48]:
os.listdir("/content/hdfs/user/data/input")

['sample.txt']

In [49]:
with open("/content/hdfs/user/data/input/sample.txt") as f:
    print(f.read())


hadoop mapreduce hdfs


In [50]:
os.remove("/content/hdfs/user/data/input/sample.txt")

In [51]:
#3. Interpret MapReduce Program (Character Count)
text = "hadoop"

In [52]:
#Mapper Phase
mapped = []

for char in text:
    mapped.append((char, 1))

mapped

[('h', 1), ('a', 1), ('d', 1), ('o', 1), ('o', 1), ('p', 1)]

In [53]:
#Shuffle & Sort Phase
from collections import defaultdict

shuffled = defaultdict(list)

for key, value in mapped:
    shuffled[key].append(value)

shuffled

defaultdict(list, {'h': [1], 'a': [1], 'd': [1], 'o': [1, 1], 'p': [1]})

In [54]:
#Reducer Phase
reduced = {}

for key, values in shuffled.items():
    reduced[key] = sum(values)

reduced

{'h': 1, 'a': 1, 'd': 1, 'o': 2, 'p': 1}

In [55]:
for k, v in reduced.items():
    print(f"{k} : {v}")


h : 1
a : 1
d : 1
o : 2
p : 1


In [56]:
#4. View & Analyze MapReduce Job Logs (Interpretation)
#Sample Job Log (Simulated)
job_log = """
INFO JobClient: Running job: job_2026_0001
INFO Mapper: Map tasks started
INFO Mapper: Map output records = 6
INFO Reducer: Reduce tasks started
INFO Reducer: Reduce output records = 5
INFO JobClient: Job completed successfully
"""

print(job_log)


INFO JobClient: Running job: job_2026_0001
INFO Mapper: Map tasks started
INFO Mapper: Map output records = 6
INFO Reducer: Reduce tasks started
INFO Reducer: Reduce output records = 5
INFO JobClient: Job completed successfully

