In [7]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import mean_absolute_error
from math import sqrt
import numpy as np
import pandas as pd


def perform_linear_regression(X, y):
    model = LinearRegression()
    model.fit(X, y)
    y_pred = model.predict(X)

    with open("regression_results.txt", "w") as file:
        file.write(f"Model Coefficients: {model.coef_}\n")
        file.write(f"Model Intercept: {model.intercept_}\n")

        mse = mean_squared_error(y, y_pred)
        rmse = sqrt(mse)
        mae = mean_absolute_error(y, y_pred)
        r2 = r2_score(y, y_pred)
        file.write(f"Mean Squared Error: {mse:.2f}\n")
        file.write(f"R-squared Score: {r2:.4f}\n")
        file.write(f"Mean Absolute Error: {mae:.2f}\n")
        file.write(f"Root Mean Squared Error: {rmse:.4f}\n")

    return model

In [9]:
from joblib import dump
import os

df = pd.read_csv(os.path.join(os.getcwd(), "..", "data", "climate_change_AQI.csv"))
target_var = ["Ozone"]
features = [
    "Longitude",
    "Latitude",
    "Sine",
    "Cosine",
    "Land_Surface_Temp(C)",
    "Sea_Ice_Extent(10^6 sq km)",
    "Sea_Surface_Temp(C)",
]
X = df[features]
y = df[target_var]
lr_model = perform_linear_regression(X, y)
dump(
    lr_model,
    os.path.join(
        os.getcwd(),  "linear_regression_model.joblib"
    ),
)
print("results saved to linear_regression_model.joblib")

results saved to linear_regression_model.joblib
