In [None]:
# This cell is intentionally left blank for initialization or future use.

In [None]:
# Python Fundamentals: Variables, Control Flow, Functions, Reusable Code

# --- 1. Variables ---
# Let's start with variables. They are like containers that store information.

name = "Alice"
age = 25
height = 1.68

print("Hello, my name is", name)
print("I am", age, "years old.")
print("My height is", height, "meters.")

# --- 2. Control Flow (if/else) ---
# Control flow helps us make decisions.

if age >= 18:
    print("You are an adult ✅")
else:
    print("You are a minor ❌")

# --- 3. Loops ---
# Loops allow repetition without copy-pasting.

for i in range(5):
    print("Loop number:", i)

# While loop example
counter = 0
while counter < 3:
    print("Counter is at:", counter)
    counter += 1

# --- 4. Functions ---
# Functions let us reuse code and keep it organized.

def greet_user(user_name, user_age):
    print(f"Hello {user_name}, you are {user_age} years old!")

greet_user("Alice", 25)
greet_user("Bob", 30)

# --- 5. Reusable Code with Functions ---
# Example: calculate Body Mass Index (BMI)

def calculate_bmi(weight, height):
    """Returns the BMI given weight (kg) and height (m)."""
    bmi = weight / (height ** 2)
    return bmi

alice_bmi = calculate_bmi(60, 1.68)
print("Alice's BMI is:", round(alice_bmi, 2))

# --- Mini Data Exploration ---
# Let's play with a small dataset using Python basics.

students = [
    {"name": "Alice", "age": 25, "score": 85},
    {"name": "Bob", "age": 22, "score": 90},
    {"name": "Charlie", "age": 23, "score": 72},
]

# Calculate average score using a loop
total_score = 0
for student in students:
    total_score += student["score"]

average_score = total_score / len(students)
print("Average score of students is:", average_score)

# --- 🎯 Mission ---
# Task: Create a function `grade_students` that:
# - Takes the students list as input
# - Assigns "Pass" if score >= 75, otherwise "Fail"
# - Returns a new list with student names and their grade
#
# Example Output:
# [{"name": "Alice", "grade": "Pass"}, {"name": "Bob", "grade": "Pass"}, ...]

# 🚀 Your turn! Try coding below:



# ### **Step 1. Setup: Import Libraries**
# Every data science script starts by importing the tools we need. We'll import the libraries discussed above.

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# We'll also set a nice visual style for our plots
sns.set_style("whitegrid")


# ### **Step 2. Load the Data**
# We will load the COVID-19 dataset directly from the "Our World in Data" website using pandas.

url = 'https://covid.ourworldindata.org/data/owid-covid-data.csv'

print("Loading dataset from the web...")
df_covid = pd.read_csv(url)
print("Dataset loaded successfully!")


# ### **Step 3. Prepare the Data**
# Real-world data is often messy. Our preparation will involve three simple steps to make it easier to work with.

# #### Step 3a: Convert the 'date' column
# We need to tell pandas that the 'date' column contains actual dates, not just text.

df_covid['date'] = pd.to_datetime(df_covid['date'])

# #### Step 3b: Select only the columns we need
# The original table has over 60 columns! Let's choose just a few to keep it simple.

columns_to_keep = ['location', 'date', 'total_cases', 'total_deaths']
df_selected_cols = df_covid[columns_to_keep]

# #### Step 3c: Select just two countries for a clear comparison
# We'll focus on the United States and India.

countries_to_compare = ['United States', 'India']
df_subset = df_selected_cols[df_selected_cols['location'].isin(countries_to_compare)]

print("\nHere's a sample of our final, clean dataset:")
display(df_subset.head())


# ### **Step 4. Visualize the Data**
# Now we can create some plots to see the trends and tell a story with our data.

# #### Plot 1: Total Cases Over Time
# A line chart is the best way to show how a number changes over time.

plt.figure(figsize=(12, 7))
sns.lineplot(data=df_subset, x='date', y='total_cases', hue='location')
plt.title('Cumulative COVID-19 Cases: United States vs. India', fontsize=16)
plt.xlabel('Date')
plt.ylabel('Total Confirmed Cases (in tens of millions)')
plt.show()


# #### Plot 2: Total Deaths Over Time
# We can do the same for the total number of deaths.

plt.figure(figsize=(12, 7))
sns.lineplot(data=df_subset, x='date', y='total_deaths', hue='location')
plt.title('Cumulative COVID-19 Deaths: United States vs. India', fontsize=16)
plt.xlabel('Date')
plt.ylabel('Total Confirmed Deaths')
plt.show()


# #### Plot 3: Compare the Most Recent Totals
# A bar chart is perfect for comparing a final number between different groups.

# To get the latest data, we sort by date and get the last entry for each country.
latest_data = df_subset.sort_values('date').drop_duplicates('location', keep='last')

print("\nLatest available data for each country:")
display(latest_data)

plt.figure(figsize=(10, 6))
sns.barplot(data=latest_data, x='location', y='total_deaths', palette='mako')
plt.title('Total COVID-19 Deaths (Latest Data)', fontsize=16)
plt.xlabel('Country')
plt.ylabel('Total Deaths')
plt.show()


# ### **Step 5. Conclusion**
# In this exercise, we practiced the core skills of data analysis: loading, cleaning, and visualizing data. We went from a large, complex dataset to clear visualizations that tell a story.

# **Questions to think about:**
# 1. What other countries would be interesting to compare? Try changing the `countries_to_compare` list.
# 2. What other columns from the original dataset could you visualize?