### Load Dataset in Jupyter Notebook

In [11]:
import pandas as pd
import numpy as np
import pandas as pd

# Load the dataset
data = pd.read_csv('student_pass.csv')
print(data.head())

### Visualize the Data

In [33]:
import matplotlib.pyplot as plt

# Scatter plot of Hours Studied vs. Passed
plt.scatter(data['Hours Studied'], data['Passed'], color='blue')
plt.xlabel('Hours Studied')
plt.ylabel('Passed (1 = Yes, 0 = No)')
plt.title('Pass Status vs. Study Hours')
plt.show()

### Implement Logistic Regression

In [25]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Split data into features and labels
X = data[['Hours Studied']]
y = data['Passed']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict the test set
y_pred = model.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

### Visualize the Logistic Curve

In [44]:
import numpy as np

# Generate values for hours to predict
hours = np.linspace(0, 12, 100).reshape(-1, 1)

# Predict probabilities
predicted_probabilities = model.predict_proba(hours)[:, 1]

# Plot the logistic regression curve
plt.scatter(data['Hours Studied'], data['Passed'], color='blue', label='Data')
plt.plot(hours, predicted_probabilities, color='red', label='Logistic Curve')
plt.xlabel('Hours Studied')
plt.ylabel('Probability of Passing')
plt.title('Logistic Regression Curve')
plt.legend()
plt.show()


### Make Predictions

In [49]:
# Predict for a specific number of hours
new_hours = [[3], [7], [10]]  # You can change these values
predictions = model.predict(new_hours)
for hour, pred in zip(new_hours, predictions):
    print(f"Hours Studied: {hour[0]} -> Predicted Pass: {pred}")
