In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

# -----------------------------
# Part I - Mathematical Functions
# -----------------------------

# RMSE Function
def calculate_rmse(actual, predicted):
    return np.sqrt(np.mean((np.array(actual) - np.array(predicted))**2))

# MAE Function
def calculate_mae(actual, predicted):
    return np.mean(np.abs(np.array(actual) - np.array(predicted)))

# Accuracy Function
def calculate_accuracy(actual, predicted):
    actual = np.array(actual)
    predicted = np.array(predicted)
    return np.mean(actual == predicted)

# Load housing data
housing_df = pd.read_csv("housing data.csv")
rmse_result = calculate_rmse(housing_df["sale price"], housing_df["sale price pred"])
mae_result = calculate_mae(housing_df["sale price"], housing_df["sale price pred"])
print("Housing Data:")
print("RMSE:", rmse_result)
print("MAE:", mae_result)
print("")

# Load mushroom data
mushroom_df = pd.read_csv("mushroom data.csv")
accuracy_result = calculate_accuracy(mushroom_df["actual"], mushroom_df["predicted"])
print("Mushroom Data Accuracy:", accuracy_result)
print("")

# Plotting the Error Function
def f(p):
    return 0.005*p**6 - 0.27*p**5 + 5.998*p**4 - 69.919*p**3 + 449.17*p**2 - 1499.7*p + 2028

p_vals = np.linspace(0, 10, 1000)
f_vals = f(p_vals)

plt.figure(figsize=(10, 5))
plt.plot(p_vals, f_vals)
plt.title("Error Function")
plt.xlabel("p")
plt.ylabel("f(p)")
plt.grid(True)
plt.show()

min_index = np.argmin(f_vals)
min_p = p_vals[min_index]
min_error = f_vals[min_index]
print("Minimum error occurs at p ≈", round(min_p, 3))
print("Minimum error value ≈", round(min_error, 3))
print("")

# -----------------------------
# Part II - Scatterplot and Linear Regression
# -----------------------------

# Load car data
car_df = pd.read_csv("car data.csv")

# Scatterplot
plt.scatter(car_df["weight"], car_df["mpg"])
plt.title("Highway MPG vs Weight")
plt.xlabel("Weight (lbs)")
plt.ylabel("Highway MPG")
plt.grid(True)
plt.show()

# Linear regression using Scikit-learn
X = car_df[["weight"]]
y = car_df["mpg"]
model = LinearRegression()
model.fit(X, y)

slope = model.coef_[0]
intercept = model.intercept_
print("Best-Fit Line from Model:")
print(f"Slope: {slope}")
print(f"Intercept: {intercept}")

# Plot best-fit line
plt.scatter(car_df["weight"], car_df["mpg"])
plt.plot(car_df["weight"], model.predict(X), color='red', label="Best-Fit Line")
plt.title("Linear Regression Line")
plt.xlabel("Weight")
plt.ylabel("Highway MPG")
plt.grid(True)
plt.legend()
plt.show()

# RMSE of best-fit line
predicted_mpg = model.predict(X)
regression_rmse = calculate_rmse(y, predicted_mpg)
print("Regression Line RMSE:", regression_rmse)

# Predict mpg for car weight = 3200 lbs
mpg_3200 = model.predict([[3200]])[0]
print("Predicted MPG for 3200 lbs car:", round(mpg_3200, 2))


FileNotFoundError: [Errno 2] No such file or directory: 'housing data.csv'