<a href="https://colab.research.google.com/github/liljar2004-sudo/Kenjar_DTSC3020/blob/main/final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 1. Data Collection
courses = []

while True:
    course_name = input("Enter course name (or 'done' to finish): ").strip()
    if course_name.lower() == "done":
        break
    try:
        credits = int(input(f"Enter credits for {course_name}: "))
    except ValueError:
        print("Invalid input. Please enter an integer for credits.")
        continue

    course = {"name": course_name, "credits": credits}
    courses.append(course)

# 5. Edge Case: No courses entered
if not courses:
    print("No courses entered.")
else:
    # 2. Total Credit Calculation
    total_credits = sum(course["credits"] for course in courses)

    # 3. Enrollment Status
    if total_credits < 12:
        status = "Part-time student"
    elif 12 <= total_credits <= 18:
        status = "Full-time student"
    else:
        status = "Overload"

    # 4. Summary Output
    print(f"\nYou entered {len(courses)} course(s) totaling {total_credits} credits.")
    print(f"Enrollment status: {status}\n")

    for course in courses:
        print(f"{course['name']} — {course['credits']} credits")


In [None]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

# Load dataset and drop missing values for relevant columns
df = sns.load_dataset("penguins")
df = df.dropna(subset=["species", "bill_length_mm", "flipper_length_mm"])

# 1) Boxplot of Bill Length Across Species
plt.figure(figsize=(8, 6))
plt.boxplot([df[df["species"] == sp]["bill_length_mm"] for sp in df["species"].unique()],
            labels=df["species"].unique())
plt.title("Bill Length Distribution by Species")
plt.xlabel("Species")
plt.ylabel("Bill Length (mm)")
plt.grid(axis="y", linestyle="--", alpha=0.7)

# 2) Bar Chart Showing Penguins per Species
species_counts = df["species"].value_counts()
plt.figure(figsize=(8, 6))
plt.bar(species_counts.index, species_counts.values, color='skyblue')
plt.title("Penguin Count by Species")
plt.xlabel("Species")
plt.ylabel("Penguin Count")
plt.grid(axis="y", linestyle="--", alpha=0.7)

# 3) Bar Chart Showing Penguins per Island
island_counts = df["island"].value_counts()
plt.figure(figsize=(8, 6))
plt.bar(island_counts.index, island_counts.values, color='lightgreen')
plt.title("Penguin Count by Island")
plt.xlabel("Island")
plt.ylabel("Penguin Count")
plt.grid(axis="y", linestyle="--", alpha=0.7)

# Display all figures
plt.show()


In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# 1) Fetch & parse the webpage
url = "https://books.toscrape.com/catalogue/page-1.html"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

# Extract all book containers (elements with class "product_pod")
book_items = soup.find_all("article", class_="product_pod")

records = []

# 2) Extract information from each book
for item in book_items:
    # Title
    title = item.h3.a["title"].strip()

    # Price (e.g., "£53.74")
    price = item.find("p", class_="price_color").get_text(strip=True)

    # Availability (e.g., "In stock")
    availability = item.find("p", class_="instock availability").get_text(strip=True)

    # Star rating from class list: ["star-rating", "Three"]
    rating_classes = item.find("p", class_="star-rating")["class"]
    rating = [c for c in rating_classes if c != "star-rating"][0]

    records.append({
        "Title": title,
        "Price": price,
        "Availability": availability,
        "Rating": rating
    })

# Convert to DataFrame
df = pd.DataFrame(records)

# Display the completed DataFrame
print(df)


In [None]:
import re

users = [
    "Alice | alice@gmail.com | 123-456-7890 | 29",
    "Bob | bob_at_gmail.com | 111-222-3333 | twenty",
    "Cathy | cathy@yahoo.com | (123) 555-9999 | 101",
    "David | david@company | 999 888 7777 | -5",
    "Emma | emma@outlook.com | 123-456-7890 | 45"
]

def clean_users(user_list):

    result = {"valid": [], "invalid": []}

    # Email pattern: something@something.something
    email_pattern = r"[^@]+@[^@]+\.[^@]+"

    # Phone pattern: 123-456-7890 or (123) 456-7890 or 123 456 7890
    phone_pattern = r"(\d{3}-\d{3}-\d{4}|\(\d{3}\) \d{3}-\d{4}|\d{3} \d{3} \d{4})"

    for record in user_list:
        parts = [p.strip() for p in record.split("|")]

        if len(parts) != 4:
            result["invalid"].append(record)
            continue

        name, email, phone, age_str = parts

        # Validate email
        if not re.fullmatch(email_pattern, email):
            result["invalid"].append(record)
            continue

        # Validate phone
        if not re.fullmatch(phone_pattern, phone):
            result["invalid"].append(record)
            continue

        # Validate age
        try:
            age = int(age_str)
            if not (0 <= age <= 120):
                raise ValueError
        except:
            result["invalid"].append(record)
            continue

        # If all validations pass
        result["valid"].append({
            "name": name,
            "email": email,
            "phone": phone,
            "age": age
        })

    return result


# Part B — Count valid vs invalid
result = clean_users(users)
print("Valid users:", len(result["valid"]))
print("Invalid users:", len(result["invalid"]))

# Part C — Find Gmail users
for user in result["valid"]:
    if user["email"].endswith("gmail.com"):
        print(f"Gmail user: {user['name']} ({user['email']})")


In [None]:
import streamlit as st
import pandas as pd

# Step 1 – Create DataFrame
df = pd.DataFrame({
    "Name": ["Alice", "Bob", "Carol", "David"],
    "Math": [90, 75, 82, 95],
    "Science": [85, 88, 91, 89]
})

# Step 2 – Sidebar Inputs
subject = st.sidebar.selectbox("Select Subject", ["Math", "Science"])
min_grade = st.sidebar.slider("Minimum Grade", min_value=0, max_value=100, value=80)

# Step 3 – Filter the Data
filtered_df = df[df[subject] >= min_grade]

# Step 4 – Display the Results
st.subheader("Students Meeting the Criteria")

if not filtered_df.empty:
    st.dataframe(filtered_df)
else:
    st.warning("No students meet this criteria.")
