In [4]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [19]:
import streamlit as st
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler

# Load the trained model
model = load_model("hackathon_attendance_model.h5")


# Streamlit UI
st.title("Hackathon Attendance Prediction")

# User Input Form
with st.form("attendance_form"):
    downloaded_tickets = st.selectbox("Downloaded Tickets", [0, 1])
    engagement_level = st.slider("Engagement Level", 0, 1,2,3)
    interested_in_job = st.selectbox("Interested in Job Opportunities?", [0, 1])
    long_distance_travel = st.selectbox("Long Distance Travel?", [0, 1])
    questionnaire = st.selectbox("Filled the Questionnaire?", ["Yes", "No"])

    submit = st.form_submit_button("Predict Attendance")

# Preprocess and Predict
if submit:
    # Convert categorical 'Yes/No' to numerical (Yes = 1, No = 0)
    questionnaire_num = 1 if questionnaire == "Yes" else 0

    # Prepare input array
    user_input = np.array([[downloaded_tickets, engagement_level, interested_in_job, long_distance_travel, questionnaire_num]])

    # Scale the input data
    user_input_scaled = scaler.transform(user_input)

    # Make prediction
    prediction = model.predict(user_input_scaled)
    predicted_class = int(prediction > 0.5)

    # Show Prediction
    if predicted_class == 1:
        st.success("✅ The person is **likely to ATTEND** the hackathon!")
    else:
        st.error("❌ The person is **NOT likely to attend** the hackathon.")


In [31]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization  # Import BatchNormalization here
from tensorflow.keras.callbacks import EarlyStopping

# Set random seed for reproducibility
np.random.seed(42)

# Generate 500 rows of synthetic data
n_samples = 500

# Generate features with realistic distributions
person_ids = np.arange(1, n_samples + 1)
registered = np.random.binomial(1, 0.65, n_samples)  # 65% registered

# Create engagement level based on registration
# If not registered, engagement level is 0
engagement_level = np.zeros(n_samples, dtype=int)
for i in range(n_samples):
    if registered[i] == 1:  # If registered
        engagement_level[i] = np.random.choice([1, 2, 3], p=[0.3, 0.5, 0.2])  # Distribution for registered users

# Set other features (engagement, job interest, long distance, questionnaire, hackathon attendance) for unregistered users to 0
interested_in_job = np.where(registered == 1, np.random.binomial(1, 0.60, n_samples), 0)
long_distance = np.where(registered == 1, np.random.binomial(1, 0.70, n_samples), 0)

# Generate questionnaire completion with bias toward registered users
questionnaire = np.zeros(n_samples, dtype=int)
for i in range(n_samples):
    if registered[i] == 1:  # If registered
        questionnaire_prob = 0.7  # 70% of registered users complete the questionnaire
        questionnaire[i] = np.random.binomial(1, questionnaire_prob)

# Previous hackathon attendance with bias toward engaged users
prev_hack = np.zeros(n_samples, dtype=int)
for i in range(n_samples):
    if registered[i] == 1:  # If registered
        prev_hack_prob = 0.2 + 0.1 * engagement_level[i]  # Base 20% plus 10% per engagement level
        prev_hack_prob = min(prev_hack_prob, 0.8)  # Cap at 80% probability
        prev_hack[i] = np.random.binomial(1, prev_hack_prob)

# Create dataframe
data = pd.DataFrame({
    'PersonId': person_ids,
    'Registered': registered,
    'Engagement_Level': engagement_level,
    'Interested_In_Job_Opp': interested_in_job,
    'Long_Distance': long_distance,
    'Questionnaire': questionnaire,
    'Prev_Hack': prev_hack
})

# Create a more realistic target variable based on features
attendance_prob = np.zeros(n_samples)
for i in range(n_samples):
    if registered[i] == 0:
        # Unregistered users have very low chance of attendance
        attendance_prob[i] = 0.02  # 2% base probability for unregistered
    else:
        # For registered users, calculate based on other factors
        attendance_prob[i] = (
            0.15 +  # base probability for registered users
            0.15 * (engagement_level[i] / 3) +  # higher engagement increases probability
            0.05 * interested_in_job[i] +  # slight boost for job interest
            -0.10 * long_distance[i] +  # long distance decreases probability
            0.20 * questionnaire[i] +  # completing questionnaire increases probability
            0.25 * prev_hack[i]  # previous attendance is a strong predictor
        )

# Add some random noise to make the relationship less deterministic
noise = np.random.normal(0, 0.05, n_samples)
attendance_prob += noise

# Ensure probabilities are between 0 and 1
attendance_prob = np.clip(attendance_prob, 0.01, 0.99)

# Generate Final_Attendance based on calculated probabilities
data['Final_Attendance'] = np.random.binomial(1, attendance_prob, n_samples)

# Convert boolean columns to Yes/No for consistency with your example
boolean_columns = ['Registered', 'Interested_In_Job_Opp', 'Long_Distance', 
                   'Questionnaire', 'Prev_Hack', 'Final_Attendance']

for col in boolean_columns:
    data[col] = np.where(data[col] == 1, 'Yes', 'No')

# Convert categorical 'Yes'/'No' to 1/0 for scaling
data[boolean_columns] = data[boolean_columns].applymap(lambda x: 1 if x == 'Yes' else 0)

# Save the dataset to CSV
data.to_csv('hackathon_attendance_dataset.csv', index=False)

# Now read the dataset back (this is the critical step to use the saved data)
data = pd.read_csv('hackathon_attendance_dataset.csv')

# Prepare data for training
X = data.drop(columns=['PersonId', 'Final_Attendance'])
y = data['Final_Attendance']

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the neural network architecture
model = Sequential()

# Input Layer and first Hidden Layer with Batch Normalization
model.add(Dense(128, input_dim=X_train_scaled.shape[1], activation='relu'))
model.add(BatchNormalization())  # Add Batch Normalization
model.add(Dropout(0.3))  # Dropout layer
# Second Hidden Layer
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.4))  # Dropout layer
# Second Hidden Layer
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))  # Dropout layer

# Third Hidden Layer
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.3))  # Dropout layer

# Output Layer (1 neuron, sigmoid activation)
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model with early stopping
history = model.fit(X_train_scaled, y_train, epochs=100, batch_size=32, 
                    validation_data=(X_test_scaled, y_test), callbacks=[early_stopping])

# Evaluate the model
y_pred_nn = (model.predict(X_test_scaled) > 0.5).astype(int)
accuracy_nn = accuracy_score(y_test, y_pred_nn)
print(f"Improved Neural Network Accuracy: {accuracy_nn * 100:.2f}%")
print("Classification Report for Neural Network:")
print(classification_report(y_test, y_pred_nn))






Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Improved Neural Network Accuracy: 74.00%
Classification Report for Neural Network:
              precision    recall  f1-score   support

           0       0.77      0.82      0.80        62
           1       0.68      0.61      0.64        38

    accuracy                           0.74       100
   macro avg       0.72      0.71      0.72       100
weighted avg       0.74      0.74      0.74       100

