In [1]:
import pandas as pd
import statsmodels.api as sm
import numpy as np
from scipy.stats import norm

In [2]:
# Read the CSV file
data = pd.read_csv('data_in_class_three.csv')

# Define the dependent variable (ETS_scaled_score) and independent variable (ECON_GPA)
dependent_var = data['ETS_scaled_score']
independent_var = data['ECON_GPA']

# Add a constant term to the independent variable
independent_var_with_constant = sm.add_constant(independent_var)

In [3]:
# Run the OLS regression
model = sm.OLS(dependent_var, independent_var_with_constant)
results = model.fit()



In [4]:
# Set the value of ECON_GPA
econ_gpa = 3.4

# Create an array with the constant term and the ECON_GPA value
econ_gpa_with_constant = np.array([1, econ_gpa])

# Calculate the expected value of ETS_scaled_score conditional on ECON_GPA
expected_value = results.predict(econ_gpa_with_constant)

# Compute the 95% confidence interval for the expected value
confidence_interval = results.get_prediction(econ_gpa_with_constant).conf_int(alpha=0.05)

# Compute the 95% prediction interval for the value of ETS_scaled_score conditional on ECON_GPA being 3.4
prediction_interval = results.get_prediction(econ_gpa_with_constant).conf_int(alpha=0.05, obs=True)

# Calculate the expected value and standard error for ETS_scaled_score when ECON_GPA = 3.4
mean = expected_value[0]
std_error = results.get_prediction(econ_gpa_with_constant).se_obs[0]


In [5]:
def probability_ets_target(ets_target):
    # Calculate the z-score
    z_score = (ets_target - mean) / std_error

    # Calculate the probability using the cumulative distribution function (CDF) of the normal distribution
    probability = 1 - norm.cdf(z_score)
    
    return probability

# Define the target ETS_scaled_score values
ets_target_157 = 157
ets_target_169 = 169

# Calculate the probabilities
probability_157 = probability_ets_target(ets_target_157)
probability_169 = probability_ets_target(ets_target_169)

# Print the probabilities
print(f"Probability that a student with an ECON_GPA of {econ_gpa} will have an ETS_scaled_score of {ets_target_157} or higher: {probability_157}")
print(f"Probability that a student with an ECON_GPA of {econ_gpa} will have an ETS_scaled_score of {ets_target_169} or higher: {probability_169}")

Probability that a student with an ECON_GPA of 3.4 will have an ETS_scaled_score of 157 or higher: 0.6816987741852465
Probability that a student with an ECON_GPA of 3.4 will have an ETS_scaled_score of 169 or higher: 0.3039880016194577


In [6]:
prediction_interval

array([[138.77239279, 186.73429099]])

In [7]:
confidence_interval

array([[160.82901046, 164.67767332]])

In [8]:
expected_value

array([162.75334189])