Creating the dataframes for later use

In [4]:
import pandas as pd
import numpy as np

# -------------------
# Create DataFrame A
# -------------------
data_A = {
    'Emp_ID': [f'E{i:03d}' for i in range(1, 21)],
    'Dept_ID': np.random.choice(['D001', 'D002', 'D003', 'D004'], 20),
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Evelyn', 'Frank', 'Grace', 'Helen', 'Ian', 'Jane',
             'Karl', 'Lina', 'Mike', 'Nina', 'Owen', 'Paul', 'Queen', 'Rita', 'Steve', 'Tina'],
    'Age': np.random.randint(25, 50, 20),
    'Salary': np.random.randint(40000, 90000, 20)
}
df_A = pd.DataFrame(data_A)

# -------------------
# Create DataFrame B
# -------------------
data_B = {
    'Emp_ID': [f'E{i:03d}' for i in range(10, 30)],  # overlapping IDs from E010–E020
    'Dept_ID': np.random.choice(['D001', 'D002', 'D005'], 20),
    'Location': np.random.choice(['New York', 'London', 'Tokyo', 'Sydney'], 20),
    'Experience': np.random.randint(1, 15, 20),
    'Bonus': np.random.randint(2000, 10000, 20)
}
df_B = pd.DataFrame(data_B)

# Save both CSVs
df_A.to_csv(r'Dataframes\df_employees.csv', index=False)
df_B.to_csv(r'Dataframes\df_emp_details.csv', index=False)

print("DataFrames saved as df_employees.csv and df_emp_details.csv")


DataFrames saved as df_employees.csv and df_emp_details.csv


Creating student dataframe

In [8]:
import pandas as pd
import numpy as np

# ---------------------------
# 1️⃣ Create the datasets
# ---------------------------

# Student basic details
students = pd.DataFrame({
    'Student_ID': range(1, 11),
    'Name': ['Amal', 'Binu', 'Chathura', 'Dewmini', 'Eshan', 
             'Farah', 'Gayan', 'Hiruni', 'Ishara', 'Janith'],
    'Grade': ['A', 'B', 'A', 'C', 'B', 'A', 'C', 'B', 'A', 'B']
})

# Exam scores dataset
exam_scores = pd.DataFrame({
    'Student_ID': [1, 2, 3, 5, 6, 8, 9],
    'Math_Score': [85, 76, 92, 67, 88, 79, 95],
    'Science_Score': [90, 82, 89, 70, 85, 75, 91]
})

# Attendance dataset
attendance = pd.DataFrame({
    'Days_Present': [40, 38, 45, 30, 36, 43, 42, 33],
    'Days_Total': [45, 45, 45, 45, 45, 45, 45, 45]
}, index=['Amal', 'Binu', 'Chathura', 'Dewmini', 'Eshan', 
          'Farah', 'Gayan', 'Hiruni'])

# Class scores with multi-level index
class_scores = pd.DataFrame({
    'Class': ['A', 'A', 'B', 'B', 'C', 'C', 'A', 'B'],
    'Student_ID': [1, 3, 2, 5, 4, 7, 9, 8],
    'English_Score': [78, 85, 80, 70, 65, 75, 88, 74]
}).set_index(['Class', 'Student_ID'])


# ---------------------------
# 2️⃣ Save as CSVs (simulate your lecture setup)
# ---------------------------

students.to_csv('students.csv', index=False)
exam_scores.to_csv('exam_scores.csv', index=False)
attendance.to_csv('attendance.csv')  # Keep index for join
class_scores.to_csv('class_scores.csv')  # Multi-index saved


# ---------------------------
# 3️⃣ Load the CSV files back (as students would do in class)
# ---------------------------

students_df = pd.read_csv('students.csv')
exam_df = pd.read_csv('exam_scores.csv')
attendance_df = pd.read_csv('attendance.csv', index_col=0)  # index column retained
class_scores_df = pd.read_csv('class_scores.csv', header=[0,1], index_col=[0,1])  # Multi-index

In [9]:
import pandas as pd

# Create a 10-employee dataset
employee_data = {
    'EmployeeID': [100001, 100002, 100003, 100004, 100005, 100006, 100007, 100008, 100009, 100010],
    'FullName': [
        'Alice Johnson', 'Bob Smith', 'Charlie Brown', 'David Lee', 'Evelyn Davis',
        'Frank Moore', 'Grace Wilson', 'Helen Taylor', 'Ian Thomas', 'Jane White'
    ],
    'DOB': [
        '1985-04-12', '1990-05-20', '1988-08-15', '1992-11-01', '1987-03-22',
        '1991-12-05', '1989-07-30', '1993-09-17', '1986-06-25', '1994-01-10'
    ],
    'PhoneNumber': [
        '5551234001', '5551234002', '5551234003', '5551234004', '5551234005',
        '5551234006', '5551234007', '5551234008', '5551234009', '5551234010'
    ]
}

# Convert to DataFrame
df_employees = pd.DataFrame(employee_data)
df_employees


Unnamed: 0,EmployeeID,FullName,DOB,PhoneNumber
0,100001,Alice Johnson,1985-04-12,5551234001
1,100002,Bob Smith,1990-05-20,5551234002
2,100003,Charlie Brown,1988-08-15,5551234003
3,100004,David Lee,1992-11-01,5551234004
4,100005,Evelyn Davis,1987-03-22,5551234005
5,100006,Frank Moore,1991-12-05,5551234006
6,100007,Grace Wilson,1989-07-30,5551234007
7,100008,Helen Taylor,1993-09-17,5551234008
8,100009,Ian Thomas,1986-06-25,5551234009
9,100010,Jane White,1994-01-10,5551234010
