In [21]:
import pandas as pd

In [120]:
pip install pandas

Note: you may need to restart the kernel to use updated packages.


In [58]:
import numpy as np

In [48]:
pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install ucimlrepo

Collecting ucimlrepo
  Downloading ucimlrepo-0.0.7-py3-none-any.whl.metadata (5.5 kB)
Downloading ucimlrepo-0.0.7-py3-none-any.whl (8.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.7
Note: you may need to restart the kernel to use updated packages.


In [7]:
from ucimlrepo import fetch_ucirepo 

In [110]:
forest_fires = fetch_ucirepo(id=162) 

In [116]:
X = forest_fires.data.features 
y = forest_fires.data.targets 

In [140]:
df = pd.concat([X, y], axis=1)

In [172]:
df.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,7,0,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0
1,7,4,10,5,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0
2,7,4,10,2,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0
3,8,6,7,0,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0
4,8,6,7,3,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0


In [152]:
import matplotlib.pyplot as plt
%matplotlib inline

In [158]:
from sklearn.model_selection import train_test_split

In [164]:
from sklearn.linear_model import LinearRegression

In [166]:
from sklearn.preprocessing import StandardScaler

In [189]:
import seaborn as sns

In [205]:
model = LinearRegression()
model.fit(X_train, y_train)

In [227]:
def gradient_descent(X, y, learning_rate=0.01, num_iterations=1000):
    m, n = X.shape
    theta = np.zeros(n)
    errors = []  # Use a list to store MSE values
    
    for _ in range(num_iterations):
        predictions = X.dot(theta)
        errors_current = predictions - y
        gradient = X.T.dot(errors_current) / m
        theta -= learning_rate * gradient
        mse = (errors_current ** 2).mean()
        errors.append(mse)
    
    return theta, errors

In [229]:
df_encoded = pd.get_dummies(df, columns=['month', 'day'], drop_first=True)
X = df_encoded.drop('area', axis=1)
y = df_encoded['area']

In [219]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [231]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [223]:
learning_rates = [0.001, 0.01, 0.1]
num_iterations_list = [100, 500, 1000]
results = []

In [233]:
for lr in learning_rates:
    for num_iter in num_iterations_list:
        theta, errors = gradient_descent(X_train, y_train, learning_rate=lr, num_iterations=num_iter)
        final_mse = errors[-1]
        results.append({'learning_rate': lr, 'num_iterations': num_iter, 'mse': final_mse})

In [235]:
import json

In [251]:
log_file = 'gradient_descent_log.json'

with open(log_file, 'w') as f:
    json.dump(results, f, indent=4)

In [241]:
best_result = min(results, key=lambda x: x['mse'])
print(f"Best hyperparameters: {best_result}")

Best hyperparameters: {'learning_rate': 0.1, 'num_iterations': 1000, 'mse': 2096.449509804474}


In [245]:
best_lr = best_result['learning_rate']
best_num_iter = best_result['num_iterations']
theta_best, _ = gradient_descent(X_train, y_train, learning_rate=best_lr, num_iterations=best_num_iter)

In [247]:
predictions = X_test.dot(theta_best)
mse_test = ((predictions - y_test) ** 2).mean()
print(f"Test MSE: {mse_test}")

Test MSE: 11953.662845602592
