# ML Lab Activity 1: The Space Mission Roster
Data analysis using Pandas.

In [1]:
import pandas as pd

# Sample astronaut candidate data
data = [
    {"Name": "Alice", "Country": "USA", "Experience": 6, "Fitness_Score": 92},
    {"Name": "Bob", "Country": "Canada", "Experience": 4, "Fitness_Score": 88},
    {"Name": "Charlie", "Country": "UK", "Experience": 2, "Fitness_Score": 85},
    {"Name": "Diana", "Country": "India", "Experience": 7, "Fitness_Score": 91},
    {"Name": "Ethan", "Country": "Australia", "Experience": 5, "Fitness_Score": 89}
]

# Create DataFrame
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Country,Experience,Fitness_Score
0,Alice,USA,6,92
1,Bob,Canada,4,88
2,Charlie,UK,2,85
3,Diana,India,7,91
4,Ethan,Australia,5,89


In [2]:
# Basic details
print("Shape:", df.shape)
print("Columns:", df.columns.tolist())
print("\nSummary Statistics:\n", df.describe())

Shape: (5, 4)
Columns: ['Name', 'Country', 'Experience', 'Fitness_Score']

Summary Statistics:
        Experience  Fitness_Score
count    5.000000       5.000000
mean     4.800000      89.000000
std      1.923538       2.738613
min      2.000000      85.000000
25%      4.000000      88.000000
50%      5.000000      89.000000
75%      6.000000      91.000000
max      7.000000      92.000000


In [3]:
# Filter astronauts
filtered_df = df[(df['Experience'] > 5) & (df['Fitness_Score'] > 87)]
filtered_df

Unnamed: 0,Name,Country,Experience,Fitness_Score
0,Alice,USA,6,92
3,Diana,India,7,91


In [4]:
# Evaluation function
def evaluate_candidate(row):
    if row['Fitness_Score'] >= 90 and row['Experience'] >= 5:
        return "Excellent"
    elif 80 <= row['Fitness_Score'] <= 89 or row['Experience'] >= 3:
        return "Good"
    else:
        return "Average"

# Add Evaluation column
df['Evaluation'] = df.apply(evaluate_candidate, axis=1)
df

Unnamed: 0,Name,Country,Experience,Fitness_Score,Evaluation
0,Alice,USA,6,92,Excellent
1,Bob,Canada,4,88,Good
2,Charlie,UK,2,85,Good
3,Diana,India,7,91,Excellent
4,Ethan,Australia,5,89,Good


In [5]:
# Sort by Fitness_Score descending
df_sorted = df.sort_values(by='Fitness_Score', ascending=False)
df_sorted

Unnamed: 0,Name,Country,Experience,Fitness_Score,Evaluation
0,Alice,USA,6,92,Excellent
3,Diana,India,7,91,Excellent
4,Ethan,Australia,5,89,Good
1,Bob,Canada,4,88,Good
2,Charlie,UK,2,85,Good


In [6]:
# Top 3 candidates
top_3 = df_sorted.head(3)
top_3

Unnamed: 0,Name,Country,Experience,Fitness_Score,Evaluation
0,Alice,USA,6,92,Excellent
3,Diana,India,7,91,Excellent
4,Ethan,Australia,5,89,Good


In [7]:
# Bonus: Average fitness score per country
avg_fitness_by_country = df.groupby('Country')['Fitness_Score'].mean().to_dict()
avg_fitness_by_country

{'Australia': 89.0, 'Canada': 88.0, 'India': 91.0, 'UK': 85.0, 'USA': 92.0}