<a href="https://colab.research.google.com/github/saudaziz555/AI-Powered-Student-Exam-Performance-Predictor/blob/main/Predicator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 2

In [1]:
pip install streamlit pyngrok pandas scikit-learn xgboost joblib matplotlib seaborn openpyxl -q

In [2]:
%%writefile app.py
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import io
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report


st.set_page_config(page_title="Student Exam Predictor", layout="wide")

#sidebar
with st.sidebar:
    st.title("Student Dashboard")
    st.markdown("---")

    st.header("Upload Data")
    uploaded_file = st.file_uploader("Upload Excel File", type=["xlsx", "xls"])

    st.header("Settings")
    threshold = st.slider("Pass Mark Threshold", 0, 100, 20)
    retrain = st.button("Retrain Model")

# title
st.title("Student Exam Prediction Dashboard")
st.markdown("This dashboard analyzes student outcomes and predicts pass/fail status using AI.")

# train Model Function
def train_model(file, threshold):
    try:
        data = pd.read_excel(file)
    except Exception as e:
        st.error(f"Failed to read file: {e}")
        return None, None

    mark_column = [col for col in data.columns if 'mark' in col.lower()]
    if not mark_column:
        st.error("No column containing 'mark' found.")
        return None, None
    mark_col = mark_column[0]

    data['pass'] = (data[mark_col] >= threshold).astype(int)
    outcome_cols = [col for col in data.columns if 'outcome' in col.lower()]
    if not outcome_cols:
        st.error("No 'outcome' columns found.")
        return None, None

    id_cols = [col for col in data.columns if 'id' in col.lower()]
    if not id_cols:
        st.error("No 'ID' column found.")
        return None, None
    student_id_col = id_cols[0]

    X = data[outcome_cols]
    y = data['pass']

    X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)
    model = XGBClassifier(eval_metric='logloss', random_state=42)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    st.toast(f"Model trained with accuracy: {acc:.2%}", icon="✅")

    joblib.dump(model, "model.pkl")
    data['pass_probability'] = model.predict_proba(X)[:, 1]

    results = data[[student_id_col, 'pass', 'pass_probability', mark_col]]
    return results, model

# main
if 'data' not in st.session_state or retrain:
    if uploaded_file:
        st.session_state.data, st.session_state.model = train_model(uploaded_file, threshold)
    else:
        st.info("Please upload an Excel file.")

if 'data' in st.session_state and st.session_state.data is not None:
    df = st.session_state.data

    #metric cards
    col1, col2, col3, col4 = st.columns(4)
    col1.metric("Total Students", f"{len(df)}")
    col2.metric("Pass Rate", f"{df['pass'].mean():.2%}")
    col3.metric("Average Mark", f"{df.iloc[:, 3].mean():.2f}")
    col4.metric("Most Frequent Mark", f"{df.iloc[:, 3].mode().values[0]}")

    #distribution
    st.subheader("Pass Probability Distribution")
    fig1, ax1 = plt.subplots(figsize=(8, 4))
    sns.histplot(df['pass_probability'], bins=10, kde=True, ax=ax1, color='skyblue')
    ax1.set_xlabel("Pass Probability")
    ax1.set_ylabel("Number of Students")
    st.pyplot(fig1)

    # feature importance
    try:
        model = st.session_state.model
        importance = model.feature_importances_
        features = model.get_booster().feature_names

        st.subheader("Feature Importance")
        fig2, ax2 = plt.subplots(figsize=(8, 4))
        sns.barplot(x=importance, y=features, palette="viridis", ax=ax2)
        ax2.set_xlabel("Importance")
        ax2.set_ylabel("Feature")
        st.pyplot(fig2)
    except Exception as e:
        st.warning(f"Could not plot feature importance: {e}")

    # show table and download
    st.subheader("Prediction Results")
    st.dataframe(df.style.format({'pass_probability': '{:.2%}'}))

    buffer = io.BytesIO()
    df.to_excel(buffer, index=False, engine='openpyxl')
    buffer.seek(0)
    st.download_button("Download Results as Excel", data=buffer, file_name="student_predictions.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
else:
    st.info("Upload data and train the model to view results.")


Overwriting app.py


In [3]:
from pyngrok import ngrok
import subprocess
import time
from google.colab import userdata

ngrok_token = userdata.get('NGROK_TOKEN')
ngrok.set_auth_token(ngrok_token)


# Start Streamlit in the background
process = subprocess.Popen(['streamlit', 'run', 'app.py'])

time.sleep(5)  # Wait a few seconds for the server to start

# Open tunnel to port 8501 (default Streamlit port)
public_url = ngrok.connect(8501)
print(f"Streamlit app is running at: {public_url}")



Streamlit app is running at: NgrokTunnel: "https://2f9c-34-81-166-117.ngrok-free.app" -> "http://localhost:8501"
