# **Big Data Practical â€“ Simulation Notebook (Full Exam Version)**
### **HDFS â€¢ MapReduce â€¢ Hive â€¢ Pig â€¢ Matrix Multiplication**

## **1. HDFS Simulation Setup**

In [None]:

import os, shutil

HDFS_ROOT = "/content/hdfs"
os.makedirs(HDFS_ROOT, exist_ok=True)

def hdfs_mkdir(path):
    os.makedirs(HDFS_ROOT + path, exist_ok=True)
    print("[HDFS] Directory Created:", HDFS_ROOT + path)

def hdfs_put(local, hdfs):
    shutil.copy(local, HDFS_ROOT + hdfs)
    print("[HDFS] Uploaded:", local, "â†’", HDFS_ROOT + hdfs)

def hdfs_cat(hdfs):
    with open(HDFS_ROOT + hdfs) as f:
        print("[HDFS] File Content:\n", f.read())

def hdfs_ls(path):
    print("[HDFS] Listing:", os.listdir(HDFS_ROOT + path))


## **2. HDFS File Operations (mkdir, put, cat, ls)**

In [None]:

hdfs_mkdir("/exam")
hdfs_mkdir("/exam/employee")

with open("/content/employee.txt", "w") as f:
    f.write("Employee data simulation for Hadoop practical exam.")

hdfs_put("/content/employee.txt", "/exam/employee/employee.txt")

hdfs_cat("/exam/employee/employee.txt")

hdfs_ls("/exam/employee")


## **3. MapReduce Simulation â€“ WordCount**

In [None]:

from collections import defaultdict

def map_wordcount(text):
    return [(word.lower(), 1) for word in text.split()]

def reduce_wordcount(mapped):
    counts = defaultdict(int)
    for word, val in mapped:
        counts[word] += val
    return counts

text = "Hadoop Map Reduce Word Count Hadoop Practical Simulation"
mapped = map_wordcount(text)
reduced = reduce_wordcount(mapped)

print("Mapped Output:", mapped)
print("Reduced Output:", dict(reduced))


## **4. MapReduce Simulation â€“ Matrix Multiplication**

In [None]:

def matrix_map(A, B):
    out=[]
    for i in range(len(A)):
        for k in range(len(A[0])):
            for j in range(len(B[0])):
                out.append(((i,j), A[i][k]*B[k][j]))
    return out

def matrix_reduce(mapped):
    res=defaultdict(int)
    for key,val in mapped:
        res[key]+=val
    return res

A = [[1,2],[3,4]]
B = [[5,6],[7,8]]

mapped = matrix_map(A, B)
reduced = matrix_reduce(mapped)

print("Matrix Multiplication Result:", dict(reduced))


## **5. Hive Simulation â€“ Table + Query**

In [None]:

employee_table = [
    {"id":1, "name":"Aman", "salary":50000},
    {"id":2, "name":"Riya", "salary":60000},
    {"id":3, "name":"John", "salary":55000},
]

def hive_select(condition):
    return [row for row in employee_table if condition(row)]

high_salary = hive_select(lambda row: row["salary"] > 55000)
high_salary


## **6. Pig Simulation â€“ Group & Sum**

In [None]:

sales = [
    ("North", "ProductA", 100),
    ("South", "ProductA", 150),
    ("North", "ProductB", 200),
]

grouped = {}
for region, product, amt in sales:
    grouped.setdefault(region, []).append(amt)

total_sales = {region: sum(values) for region, values in grouped.items()}
total_sales


## **Notebook Ready for Exam Submission ðŸš€**