# 06 — Pandas Data Wrangling

## Learning goals
- Create DataFrames
- Clean/filter/sort data
- Group and aggregate

In [1]:
import pandas as pd

data = {
    "name": ["Amina", "Brian", "Chao", "Dina", "Eli"],
    "score": [88, 72, 95, 67, 81],
    "hours": [10, 7, 12, 5, 9],
}

df = pd.DataFrame(data)
df

Unnamed: 0,name,score,hours
0,Amina,88,10
1,Brian,72,7
2,Chao,95,12
3,Dina,67,5
4,Eli,81,9


In [2]:
# Filter and derive columns
filtered = df[df["score"] >= 80].copy()
filtered["efficiency"] = filtered["score"] / filtered["hours"]
filtered.sort_values("efficiency", ascending=False)

Unnamed: 0,name,score,hours,efficiency
4,Eli,81,9,9.0
0,Amina,88,10,8.8
2,Chao,95,12,7.916667


In [3]:
# Grouping example
bins = pd.cut(df["hours"], bins=[0, 6, 9, 12], labels=["low", "mid", "high"])
df.groupby(bins)["score"].mean()

hours
low     67.0
mid     76.5
high    91.5
Name: score, dtype: float64

In [None]:
# Practice task:
# Add a pass/fail column where pass is score >= 75.

In [7]:
import pandas as pd
import numpy as np

# Create DataFrame with student names and random scores
df = pd.DataFrame({
    "Student": [f"Student{i}" for i in range(1, 11)],
    "Score": np.random.randint(50, 101, 10)  # 10 random scores between 50-100
})

# 2️ Add Pass/Fail column (Pass if score >= 75)
df["Result"] = df["Score"].apply(lambda x: "Pass" if x >= 75 else "Fail")


#  keep only students who scored >= 50 and sort by Score descending
df_clean = df[df["Score"] >= 50].sort_values(by="Score", ascending=False)


# Count of Pass/Fail
result_counts = df_clean.groupby("Result")["Student"].count()

# Average score by Pass/Fail
average_scores = df_clean.groupby("Result")["Score"].mean()


print("")
print(df_clean)
print("")
print(result_counts)
print("")
print(average_scores)



     Student  Score Result
2   Student3     86   Pass
1   Student2     85   Pass
3   Student4     80   Pass
6   Student7     78   Pass
8   Student9     70   Fail
7   Student8     64   Fail
5   Student6     62   Fail
4   Student5     57   Fail
9  Student10     55   Fail
0   Student1     51   Fail

Result
Fail    6
Pass    4
Name: Student, dtype: int64

Result
Fail    59.833333
Pass    82.250000
Name: Score, dtype: float64
