<a href="https://colab.research.google.com/github/muhammadshehryar-codes/git-lab/blob/main/FinalProject.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
import zipfile
import io

# Extract the uploaded zip file
for fn in uploaded.keys():
    with zipfile.ZipFile(io.BytesIO(uploaded[fn]), 'r') as zip_ref:
        zip_ref.extractall()  # Extracts all files into current working directory

In [None]:
import pandas as pd

# Load the main training dataset
df = pd.read_csv("/content/Admission_Predict.csv")

# Display shape and first few rows
print("Shape of dataset:", df.shape)
df.head()

In [None]:
df.info()

In [None]:
df.describe()

#EDA

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Basic info
print("\n🔹 Dataset Info:")
df.info()

print("\n🔹 Missing Values:")
print(df.isnull().sum())

# Remove serial number column if present
if 'Serial No.' in df.columns:
    df.drop(columns=['Serial No.'], inplace=True)

# Rename target column for clarity (optional)
df.rename(columns={"Chance of Admit ": "Chance"}, inplace=True)

# Create binary class for classification
df['Admit'] = df['Chance'].apply(lambda x: 1 if x >= 0.75 else 0)

# Drop original probability column (optional)
df.drop(columns=['Chance'], inplace=True)

# Show class distribution
print("\nClass Distribution:")
print(df['Admit'].value_counts())

# Plot class distribution
sns.countplot(x='Admit', data=df)
plt.title("Admit vs Not Admit")
plt.xticks([0, 1], ['Not Admit', 'Admit'])
plt.show()

#Preprocessing

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Separate features and target
X = df.drop('Admit', axis=1)
y = df['Admit']

# Train-test split (80% train, 20% test), random_state for reproducibility
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()

# Fit on train, transform both train and test
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("Preprocessing done: Data split and scaled.")

#Model Training

##KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# K-Nearest Neighbors (k=5)
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_scaled, y_train)
y_pred_knn = knn.predict(X_test_scaled)

print("KNN Accuracy:", accuracy_score(y_test, y_pred_knn))
print("\nClassification Report:\n", classification_report(y_test, y_pred_knn))

cm_knn = confusion_matrix(y_test, y_pred_knn)
plt.figure(figsize=(6,4))
sns.heatmap(cm_knn, annot=True, fmt='d', cmap='Greens', xticklabels=['Not Admit', 'Admit'], yticklabels=['Not Admit', 'Admit'])
plt.title('Confusion Matrix - KNN')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()

In [None]:
from sklearn.metrics import f1_score,precision_score,recall_score
print("KNN Metrics:")
print("Accuracy:", accuracy_score(y_test, y_pred_knn))
print("F1 Score:", f1_score(y_test, y_pred_knn))
print("Precision:", precision_score(y_test, y_pred_knn))
print("Recall:", recall_score(y_test, y_pred_knn))

cm = confusion_matrix(y_test, y_pred_knn)
print("Confusion Matrix:\n", cm)

## Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Logistic Regression
lr = LogisticRegression(random_state=42)
lr.fit(X_train_scaled, y_train)
y_pred_lr = lr.predict(X_test_scaled)

print("Logistic Regression Accuracy:", accuracy_score(y_test, y_pred_lr))
print("\nClassification Report:\n", classification_report(y_test, y_pred_lr))

cm_lr = confusion_matrix(y_test, y_pred_lr)
plt.figure(figsize=(6,4))
sns.heatmap(cm_lr, annot=True, fmt='d', cmap='Blues', xticklabels=['Not Admit', 'Admit'], yticklabels=['Not Admit', 'Admit'])
plt.title('Confusion Matrix - Logistic Regression')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()


In [None]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix

# Assuming y_test and y_pred_lr exist
print("Logistic Regression Metrics:")
print("Accuracy:", accuracy_score(y_test, y_pred_lr))
print("F1 Score:", f1_score(y_test, y_pred_lr))
print("Precision:", precision_score(y_test, y_pred_lr))
print("Recall:", recall_score(y_test, y_pred_lr))

cm = confusion_matrix(y_test, y_pred_lr)
print("Confusion Matrix:\n", cm)

## Decision tree

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Decision Tree
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train_scaled, y_train)
y_pred_dt = dt.predict(X_test_scaled)

print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred_dt))
print("\nClassification Report:\n", classification_report(y_test, y_pred_dt))

cm_dt = confusion_matrix(y_test, y_pred_dt)
plt.figure(figsize=(6,4))
sns.heatmap(cm_dt, annot=True, fmt='d', cmap='Oranges', xticklabels=['Not Admit', 'Admit'], yticklabels=['Not Admit', 'Admit'])
plt.title('Confusion Matrix - Decision Tree')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()

In [None]:
print("Decision Tree Metrics:")
print("Accuracy:", accuracy_score(y_test, y_pred_dt))
print("F1 Score:", f1_score(y_test, y_pred_dt))
print("Precision:", precision_score(y_test, y_pred_dt))
print("Recall:", recall_score(y_test, y_pred_dt))

cm = confusion_matrix(y_test, y_pred_dt)
print("Confusion Matrix:\n", cm)

## Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Random Forest
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train_scaled, y_train)
y_pred_rf = rf.predict(X_test_scaled)

print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print("\nClassification Report:\n", classification_report(y_test, y_pred_rf))

cm_rf = confusion_matrix(y_test, y_pred_rf)
plt.figure(figsize=(6,4))
sns.heatmap(cm_rf, annot=True, fmt='d', cmap='Purples', xticklabels=['Not Admit', 'Admit'], yticklabels=['Not Admit', 'Admit'])
plt.title('Confusion Matrix - Random Forest')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()

In [None]:
print("Random Forest Metrics:")
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print("F1 Score:", f1_score(y_test, y_pred_rf))
print("Precision:", precision_score(y_test, y_pred_rf))
print("Recall:", recall_score(y_test, y_pred_rf))

cm = confusion_matrix(y_test, y_pred_rf)
print("Confusion Matrix:\n", cm)

In [None]:
import tensorflow as tf
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# Build the ANN model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(16, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    tf.keras.layers.Dense(8, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')  # Binary classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=16, validation_split=0.2, verbose=0)

# Evaluate & predict
loss, accuracy = model.evaluate(X_test_scaled, y_test, verbose=0)
print(f"ANN Test Accuracy: {accuracy:.4f}")

y_pred_prob = model.predict(X_test_scaled)
y_pred_ann = (y_pred_prob > 0.5).astype(int).flatten()

print("\nClassification Report:\n", classification_report(y_test, y_pred_ann))

cm_ann = confusion_matrix(y_test, y_pred_ann)
plt.figure(figsize=(6,4))
sns.heatmap(cm_ann, annot=True, fmt='d', cmap='Reds', xticklabels=['Not Admit', 'Admit'], yticklabels=['Not Admit', 'Admit'])
plt.title('Confusion Matrix - ANN')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Load and preprocess data
df = pd.read_csv("Admission_Predict.csv")  # Use your dataset path
df = df.drop(columns=['Serial No.'], errors='ignore')  # Drop ID column if exists

# Binary classification: Convert Chance of Admit into 0 (not admit) and 1 (admit)
df['Admit'] = (df['Chance of Admit '] >= 0.75).astype(int)
df = df.drop(columns=['Chance of Admit '])

X = df.drop(columns=['Admit']).values
y = df['Admit'].values

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# K-Fold Cross Validation
kf = KFold(n_splits=4, shuffle=True, random_state=42)

fold = 1
all_reports = []

for train_index, test_index


In [None]:
# For ANN predictions y_pred_ann (binary labels) and y_test

print("ANN Metrics:")
print("Accuracy:", accuracy_score(y_test, y_pred_ann))
print("F1 Score:", f1_score(y_test, y_pred_ann))
print("Precision:", precision_score(y_test, y_pred_ann))
print("Recall:", recall_score(y_test, y_pred_ann))

cm = confusion_matrix(y_test, y_pred_ann)
print("Confusion Matrix:\n", cm)

#Save our Best Model and Scaler for Deployment

In [None]:
# Save the ANN model (TensorFlow/Keras)
model.save('ann_model.h5')

# Save the scaler using pickle
import pickle
with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

Load the ANN model and scaler (for Streamlit or any other use)

In [None]:
from tensorflow.keras.models import load_model
import pickle

# Load the ANN model
loaded_ann_model = load_model('ann_model.h5')

# Load the scaler
with open('scaler.pkl', 'rb') as f:
    loaded_scaler = pickle.load(f)

#Streamlit App Code to Load ANN Model + Scaler & Predict Admission

In [None]:
!pip install streamlit

In [None]:
!pip install streamlit pyngrok --quiet

In [None]:
!ngrok config add-authtoken 2zuKB4a5DLqmVihoCdTWVX8KYxn_6tjxFJkPzGAGAaM5zFgRj

In [None]:
%%writefile app.py
import streamlit as st
import pickle
import numpy as np
from tensorflow.keras.models import load_model

# Load the ANN model
loaded_ann_model = load_model('/content/ann_model.h5')

# Load the scaler
with open('/content/scaler.pkl', 'rb') as f:
    loaded_scaler = pickle.load(f)

st.title("University Admission Predictor")

# Input form
gre = st.number_input("GRE Score (0-340)", 0, 340, 300)
toefl = st.number_input("TOEFL Score (0-120)", 0, 120, 100)
uni_rating = st.selectbox("University Rating (1-5)", [1, 2, 3, 4, 5])
sop = st.slider("Statement of Purpose Strength (1.0 - 5.0)", 1.0, 5.0, 3.0, step=0.5)
lor = st.slider("LOR Strength (1.0 - 5.0)", 1.0, 5.0, 3.0, step=0.5)
cgpa = st.number_input("CGPA (2.0 - 4.0)", 0.0, 4.0, 3.0,step=0.1)
research = st.selectbox("Research Experience", [1, 0])

# Create feature array
features = np.array([[gre, toefl, uni_rating, sop, lor, cgpa, research]])

# Predict button
if st.button("Predict Admission"):
    result = loaded_ann_model.predict(features)
    st.success(f"Admission Status: {'Admission Will Be Granted (1)' if result[0] == 1 else 'Admission Will Not Be Granted (0)'}")

In [None]:
# Kill any previous ngrok sessions
!pkill -f ngrok

# Set up new ngrok tunnel
from pyngrok import ngrok
public_url = ngrok.connect(8501)
print(f"Streamlit app is live at: {public_url}")

# Run streamlit app
!streamlit run app.py &