In [3]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Sample data (area in sqft, material quality (1-5), labor cost, total cost)
data = {
    'Area': [500, 1000, 1500, 2000, 2500],
    'Material_Quality': [3, 4, 2, 5, 3],
    'Labor_Cost': [20000, 35000, 40000, 50000, 60000],
    'Total_Cost': [120000, 200000, 180000, 300000, 250000]
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Features and Target
X = df[['Area', 'Material_Quality', 'Labor_Cost']]
y = df['Total_Cost']

# Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict and Evaluate
y_pred = model.predict(X_test)
print(f"Predicted Costs: {y_pred}")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred)}")
print(f"R2 Score: {r2_score(y_test, y_pred)}")

# Predicting a new cost
new_data = pd.DataFrame({'Area': [1800], 'Material_Quality': [4], 'Labor_Cost': [45000]})
predicted_cost = model.predict(new_data)
print(f"Predicted Cost for New Data: {predicted_cost[0]}")

Predicted Costs: [210899.86073799]
Mean Squared Error: 118806964.10758555
R2 Score: nan
Predicted Cost for New Data: 244391.45642382916


