In [None]:
import pandas as pd
import numpy as np

# Set a seed for reproducibility
np.random.seed(42)

# Number of students
num_students = 100

# Generate data for 'hours_studied' (between 0 and 100)
hours_studied = np.random.uniform(0, 100, num_students)

# Generate data for 'attendance' (percentage between 0 and 100)
attendance = np.random.uniform(0, 100, num_students)

# Create a 'pass/fail' column based on a simple rule
# For example, students who study more and have higher attendance are more likely to pass
pass_fail = []
for i in range(num_students):
    # Simple rule: higher hours studied and attendance increase chances of passing
    probability_of_passing = (hours_studied[i] * 0.6 + attendance[i] * 0.4) / 100
    if np.random.rand() < probability_of_passing:
        pass_fail.append('Pass')
    else:
        pass_fail.append('Fail')

# Create a dictionary to store the data
data = {
    'hours_studied': hours_studied,
    'attendance': attendance,
    'pass_fail': pass_fail
}

# Create a Pandas DataFrame
df = pd.DataFrame(data)

# Display the first few rows of the dataset
print(df.head())

In [None]:
import matplotlib.pyplot as plt

# Assuming you have a 'history' object from model training (e.g., from TensorFlow's model.fit)
# If you are using another framework, the attribute names for accuracy and loss might differ.
# Replace 'history.history' with the appropriate attribute name for your framework.
# Example using a hypothetical history object with 'accuracy', 'val_accuracy', 'loss', and 'val_loss' keys:

# Sample dummy data for demonstration (replace with your actual history data)
history_data = {
    'accuracy': [0.6, 0.7, 0.75, 0.8, 0.82],
    'val_accuracy': [0.55, 0.68, 0.72, 0.79, 0.81],
    'loss': [0.5, 0.4, 0.35, 0.3, 0.28],
    'val_loss': [0.6, 0.45, 0.4, 0.32, 0.3]
}

# Plot training and validation accuracy
plt.figure(figsize=(12, 6))
plt.plot(history_data['accuracy'], label='Training Accuracy')
if 'val_accuracy' in history_data:
    plt.plot(history_data['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)
plt.show()

# Plot training and validation loss
plt.figure(figsize=(12, 6))
plt.plot(history_data['loss'], label='Training Loss')
if 'val_loss' in history_data:
    plt.plot(history_data['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Compute the correlation matrix
# Convert 'pass_fail' column to numeric (0 for Fail, 1 for Pass)
df['pass_fail_numeric'] = df['pass_fail'].apply(lambda x: 1 if x == 'Pass' else 0)

# Compute the correlation matrix using only numeric columns
# Explicitly select numeric columns or drop the original 'pass_fail' column
correlation_matrix = df[['hours_studied', 'attendance', 'pass_fail_numeric']].corr()

# Display the correlation matrix
print("Correlation Matrix:")
print(correlation_matrix)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Separate features (X) and target variable (y)
X = df[['hours_studied', 'attendance']]
y = df['pass_fail_numeric'] # Use the numeric version of pass_fail

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model (optional, but recommended)
print("Model Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Now, let's predict for new, hypothetical students:
# Example: Student 1 studied 70 hours, attendance 85%
#          Student 2 studied 30 hours, attendance 40%
new_students_data = pd.DataFrame({
    'hours_studied': [70, 30],
    'attendance': [85, 40]
})

# Predict the probability of passing for the new students
predictions_proba = model.predict_proba(new_students_data)
print("\nPrediction Probabilities for New Students (Fail, Pass):\n", predictions_proba)

# Predict the class (Pass or Fail) for the new students
predictions_class_numeric = model.predict(new_students_data)

# Convert the numeric predictions back to 'Pass'/'Fail'
predictions_class = ['Pass' if pred == 1 else 'Fail' for pred in predictions_class_numeric]
print("\nPredicted Class for New Students:\n", predictions_class)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.lines import Line2D
import pandas as pd # Ensure pandas is imported if not already in this cell

# Assuming df, X_test, y_test, y_pred, new_students_data are already defined from previous cells

# Create a DataFrame for test results to plot predictions
test_results_df = X_test.copy()
test_results_df['predicted_pass_fail'] = ['Pass' if pred == 1 else 'Fail' for pred in y_pred]

# Create a DataFrame for new students with predicted class for plotting
new_students_data_plot = new_students_data.copy()
# Reuse the predictions_class generated previously
new_students_data_plot['predicted_pass_fail'] = predictions_class


# --- Combined Plot of Original, Test, and New Students ---

plt.figure(figsize=(12, 8))

# Plot original data (training + test) - using the original pass_fail for clarity
# Removed the explicit 'label' argument here, as hue handles the legend entries for Pass/Fail
sns.scatterplot(x='hours_studied', y='attendance', hue='pass_fail', data=df,
                palette={'Pass': 'blue', 'Fail': 'red'}, marker='o', s=80, alpha=0.5)

# Plot test data predictions - use 'x' markers for predicted outcomes on the test set
# Use the new test_results_df which includes predicted_pass_fail
sns.scatterplot(x='hours_studied', y='attendance', hue='predicted_pass_fail', data=test_results_df,
                palette={'Pass': 'lightblue', 'Fail': 'salmon'}, marker='x', s=100, legend=False) # No legend for this layer

# Plot the new students' predictions - use '*' markers
# Use the new_students_data_plot which includes predicted_pass_fail
sns.scatterplot(x='hours_studied', y='attendance', hue='predicted_pass_fail', data=new_students_data_plot,
                palette={'Pass': 'green', 'Fail': 'orange'}, s=200, marker='*') # Removed label here as well, handled by custom legend


plt.title('Student Outcomes: Original Data, Test Predictions, and New Student Predictions')
plt.xlabel('Hours Studied')
plt.ylabel('Attendance (%)')
plt.grid(True)

# Combine the legend handles and labels manually for better control
# Get handles and labels from the plots where hue was used
handles, labels = plt.gca().get_legend_handles_labels()

# Manually define the desired legend entries and their corresponding artists
# These labels should match the hue values and the custom proxy artists
custom_handles = []
custom_labels = []

# Add entries for Original Data (from the first scatterplot's hue)
# Assuming 'Pass' and 'Fail' are the hue values in df
custom_handles.append(Line2D([0], [0], linestyle='none', marker='o', markersize=8, color='blue', label='Original Data (Pass)'))
custom_labels.append('Original Data (Pass)')
custom_handles.append(Line2D([0], [0], linestyle='none', marker='o', markersize=8, color='red', label='Original Data (Fail)'))
custom_labels.append('Original Data (Fail)')

# Add custom legend entries for the 'Predicted Pass/Fail' from test data
# Create proxy artists for the test set predictions based on the existing palette
proxy_lightblue = Line2D([0], [0], linestyle='none', marker='x', markersize=10, color='lightblue', label='Test Prediction (Pass)')
proxy_salmon = Line2D([0], [0], linestyle='none', marker='x', markersize=10, color='salmon', label='Test Prediction (Fail)')
custom_handles.extend([proxy_lightblue, proxy_salmon])
custom_labels.extend(['Test Prediction (Pass)', 'Test Prediction (Fail)'])

# Add custom legend entries for the new students' predictions
# Create proxy artists for the new student predictions based on the existing palette
proxy_green = Line2D([0], [0], linestyle='none', marker='*', markersize=12, color='green', label='New Student Prediction (Pass)')
proxy_orange = Line2D([0], [0], linestyle='none', marker='*', markersize=12, color='orange', label='New Student Prediction (Fail)')
custom_handles.extend([proxy_green, proxy_orange])
custom_labels.extend(['New Student Prediction (Pass)', 'New Student Prediction (Fail)'])


# Display the combined legend
plt.legend(handles=custom_handles, labels=custom_labels, title='Outcome Type', loc='best')

plt.show()