<a href="https://colab.research.google.com/github/suryatejaganji/DATA-ANALYSIS-USING-PYTHON/blob/main/week_1_30_7_25_python.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd


completion_times = [22, 27, 35, 31, 28, 24, 40, 18, 29, 33]


def classify(time):
    if time < 25:
        return "Fast Learner"
    elif 25 <= time <= 30:
        return "On Time"
    else:
        return "Needs Support"


categories = [classify(t) for t in completion_times]


df = pd.DataFrame({
    "Completion_Time": completion_times,
    "Category": categories
})


df["Category"] = pd.Categorical(
    df["Category"],
    categories=["Fast Learner", "On Time", "Needs Support"],
    ordered=True
)


print(df)

   Completion_Time       Category
0               22   Fast Learner
1               27        On Time
2               35  Needs Support
3               31  Needs Support
4               28        On Time
5               24   Fast Learner
6               40  Needs Support
7               18   Fast Learner
8               29        On Time
9               33  Needs Support


In [3]:

print("Length of Completion Time Data:", len(completion_times))


print("Data Type of Completion Time Data:", type(completion_times))


print("\nDataFrame Structure:")
print(df.info())


print("\nFrequency of Each Category:")
print(df["Category"].value_counts())


print("\nProportion of Each Category (%):")
print((df["Category"].value_counts(normalize=True) * 100).round(2))


Length of Completion Time Data: 10
Data Type of Completion Time Data: <class 'list'>

DataFrame Structure:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 2 columns):
 #   Column           Non-Null Count  Dtype   
---  ------           --------------  -----   
 0   Completion_Time  10 non-null     int64   
 1   Category         10 non-null     category
dtypes: category(1), int64(1)
memory usage: 354.0 bytes
None

Frequency of Each Category:
Category
Needs Support    4
Fast Learner     3
On Time          3
Name: count, dtype: int64

Proportion of Each Category (%):
Category
Needs Support    40.0
Fast Learner     30.0
On Time          30.0
Name: proportion, dtype: float64


In [5]:
import numpy as np


mean_time = np.mean(df["Completion_Time"])


median_time = np.median(df["Completion_Time"])


std_dev = np.std(df["Completion_Time"])

print("=== DESCRIPTIVE STATISTICS ===")
print(f"Mean Completion Time: {mean_time:.2f} hours")
print(f"Median Completion Time: {median_time:.2f} hours")
print(f"Standard Deviation: {std_dev:.2f} hours")


passed_count = df[df["Category"].isin(["Fast Learner", "On Time"])].shape[0]
pass_rate = (passed_count / len(df)) * 100

print(f"\nNumber of Students Passed: {passed_count}")
print(f"Pass Rate: {pass_rate:.2f}%")


=== DESCRIPTIVE STATISTICS ===
Mean Completion Time: 28.70 hours
Median Completion Time: 28.50 hours
Standard Deviation: 6.13 hours

Number of Students Passed: 6
Pass Rate: 60.00%


In [None]:
import matplotlib.pyplot as plt

print("\n=== LEVEL 5: DYNAMIC CLASSIFICATION ===")

# Ask user to enter new expected completion threshold
user_threshold = input("Enter new expected completion time (default = 30): ")

# If user presses Enter, keep default = 30
if user_threshold.strip() == "":
    user_threshold = 30
else:
    user_threshold = int(user_threshold)

# Dynamic classification rules:
# Fast Learner: < (threshold - 5)
# On Time: between (threshold - 5) and threshold
# Needs Support: > threshold

def dynamic_classify(time):
    fast_limit = user_threshold - 5
    if time < fast_limit:
        return "Fast Learner"
    elif fast_limit <= time <= user_threshold:
        return "On Time"
    else:
        return "Needs Support"

# Apply dynamic classification
df["Dynamic_Category"] = df["Completion_Time"].apply(dynamic_classify)

# Convert to ordered categorical type
df["Dynamic_Category"] = pd.Categorical(
    df["Dynamic_Category"],
    categories=["Fast Learner", "On Time", "Needs Support"],
    ordered=True
)

# Display updated categories
print("\nReclassified Student Categories:")
print(df[["Completion_Time", "Dynamic_Category"]])

# -----------------------------
# Visualization: Bar Chart
# -----------------------------
plt.figure(figsize=(7, 5))
df["Dynamic_Category"].value_counts().plot(kind="bar")

plt.title(f"Student Distribution by Category (Threshold = {user_threshold} hours)")
plt.xlabel("Learner Type")
plt.ylabel("Number of Students")
plt.grid(axis="y", linestyle="--")
plt.tight_layout()
plt.show()


=== LEVEL 5: DYNAMIC CLASSIFICATION ===
