In [4]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.utils import resample

data = {
    'Length': [100, 50, 75, 65, 90, 110, 115, 150],
    'Duration': [15.3, 10.6, 11.1, 11, 14.2, 15.4, 16.4, 24.5]
}

df = pd.DataFrame(data)

new_bridge_length = 120

df['Length_squared'] = df['Length'] ** 2
X = df[['Length', 'Length_squared']]
y = df['Duration']
n_bootstraps = 10000
bootstrap_predictions = []

for i in range(n_bootstraps):
    X_resampled, y_resampled = resample(X, y, random_state=i)
    
    model = LinearRegression()
    model.fit(X_resampled, y_resampled)
    
    new_bridge_features = pd.DataFrame([[new_bridge_length, new_bridge_length ** 2]], columns=['Length', 'Length_squared'])
    predicted_duration = model.predict(new_bridge_features)[0]
    
    bootstrap_predictions.append(predicted_duration)
    
mean_duration = np.mean(bootstrap_predictions)
std_duration = np.std(bootstrap_predictions)

print(f"Estimated Duration for 120m bridge: {mean_duration:.2f} months")
print(f"Standard Deviation of the estimate: {std_duration:.2f} months")


Estimated Duration for 120m bridge: 17.63 months
Standard Deviation of the estimate: 0.85 months
