In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import Lasso

# Read the training data
df_train = pd.read_csv('colleges_train.csv')

# Define the features and target variable
features = ['adm_rate', 'satv25', 'satv50', 'satv75', 'satm25', 'satm50', 'satm75',
            'pell_grant_rate', 'fed_loan_rate', 'ug', 'ug_men', 'ug_women', 'ug_white',
            'ug_black', 'ug_hispanic', 'ug_asian', 'ug_25plus', 'first_gen',
            'faculty_salary', 'ft_faculty_rate', 'math_deg', 'engi_deg', 'bio_deg',
            'sci_deg', 'endowment', 'booksupply', 'roomboard']

X_train = df_train[features]
y_train = df_train['tuition']

# Number of bootstrap samples
n_bootstrap = 1000
prediction_intervals = []

# Perform bootstrapping
for _ in range(n_bootstrap):
    # Generate bootstrap sample
    bootstrap_indices = np.random.choice(len(X_train), size=len(X_train), replace=True)
    X_boot = X_train.iloc[bootstrap_indices]
    y_boot = y_train.iloc[bootstrap_indices]

    # Train Lasso model
    lasso_model = Lasso(alpha=0.1, random_state=42)
    lasso_model.fit(X_boot, y_boot)

    # Make predictions
    y_pred = lasso_model.predict(X_train)

    # Calculate prediction interval for each point
    prediction_interval = np.percentile(y_pred, [5, 95])
    prediction_intervals.append(prediction_interval)

# Create a DataFrame to store prediction intervals
prediction_intervals_df = pd.DataFrame(prediction_intervals, columns=['Lower Interval', 'Upper Interval'])

# Save DataFrame to CSV
prediction_intervals_df.to_csv('prediction_intervals.csv', index=False)
