In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/temperature-data/temperature-sentinel-dataset.csv


In [2]:
import numpy as np
import pandas as pd

dataset=pd.read_csv('/kaggle/input/temperature-data/temperature-sentinel-dataset.csv')
dataset.head()

Unnamed: 0,Filename,Band1_Mean,Band2_Mean,Band3_Mean,Band4_Mean,Band5_Mean,Band6_Mean,Band7_Mean,Band8_Mean,Band9_Mean,Band10_Mean,Band11_Mean,Band12_Mean,Band13_Mean,Temperature
0,Erie_Lake_Temperature_619_2021-06-06.tif_water...,1417.045174,1147.480578,860.596438,565.669875,502.098189,485.323683,490.046855,413.121209,463.311085,101.470774,6.698107,325.263124,254.138988,11.108861
1,Erie_Lake_Temperature_619_2021-06-07.tif_water...,1417.045174,1147.480578,860.596438,565.669875,502.098189,485.323683,490.046855,413.121209,463.311085,101.470774,6.698107,325.263124,254.138988,10.785201
2,Erie_Lake_Temperature_619_2021-06-08.tif_water...,1417.045174,1147.480578,860.596438,565.669875,502.098189,485.323683,490.046855,413.121209,463.311085,101.470774,6.698107,325.263124,254.138988,10.675139
3,Erie_Lake_Temperature_619_2021-06-09.tif_water...,1417.045174,1147.480578,860.596438,565.669875,502.098189,485.323683,490.046855,413.121209,463.311085,101.470774,6.698107,325.263124,254.138988,10.458424
4,Erie_Lake_Temperature_619_2021-06-10.tif_water...,1417.045174,1147.480578,860.596438,565.669875,502.098189,485.323683,490.046855,413.121209,463.311085,101.470774,6.698107,325.263124,254.138988,10.251806


Data Preprocessing

In [3]:
dataset.dropna(inplace=True)
dataset.count()

Filename       483
Band1_Mean     483
Band2_Mean     483
Band3_Mean     483
Band4_Mean     483
Band5_Mean     483
Band6_Mean     483
Band7_Mean     483
Band8_Mean     483
Band9_Mean     483
Band10_Mean    483
Band11_Mean    483
Band12_Mean    483
Band13_Mean    483
Temperature    483
dtype: int64

> **Support Vector Regression**

In [4]:
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, mean_absolute_percentage_error
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.model_selection import KFold,GridSearchCV,RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings("ignore")
from joblib import dump

In [5]:
features = ['Band2_Mean','Band3_Mean','Band4_Mean','Band5_Mean','Band6_Mean','Band7_Mean','Band8_Mean','Band9_Mean']
label = ['Temperature']

X = dataset.loc[:, features].values
y = dataset.loc[:, label].values

#Test-train split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Model Development
svr_model = SVR(kernel='rbf')

param_dist = {
    'C': [x for x in range(100, 1001, 10)],
    'epsilon': [x for x in np.arange(0.001, 0.1, 0.001)]
}

random_search = RandomizedSearchCV(SVR(), param_distributions=param_dist, n_iter=200, cv=5, scoring='neg_mean_squared_error', random_state=42)
random_search.fit(X_train, y_train)

randomDf = pd.DataFrame(random_search.cv_results_) 
best_svr_model = random_search.best_estimator_

y_pred = best_svr_model.predict(X_test)

test_r2 = r2_score(y_test, y_pred)
test_rmse = np.sqrt(mean_squared_error(y_test, y_pred))

#Model Evaluation
print(f"Best Parameters: {random_search.best_params_}")
print(f"R2 Score: {test_r2}")
print(f"RMSE: {test_rmse}")

svr_model_filename = 'temperature-svr.joblib'
dump(best_svr_model, svr_model_filename)

print(f"Best SVR model saved to {svr_model_filename}")

Best Parameters: {'epsilon': 0.085, 'C': 960}
R2 Score: 0.6359596140462129
RMSE: 2.071815004505052
Best SVR model saved to temperature-svr.joblib


> **Random Forest**

In [7]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import pandas as pd

features = ['Band2_Mean','Band3_Mean','Band4_Mean','Band5_Mean','Band6_Mean','Band7_Mean','Band8_Mean','Band9_Mean']
label = ['Temperature']

X = dataset.loc[:, features].values
y = dataset.loc[:, label].values

#Test-train split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Model Development
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_search = GridSearchCV(RandomForestRegressor(random_state=42), param_grid, cv=5, scoring='neg_mean_squared_error')

grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_

best_rf_model = grid_search.best_estimator_
y_pred_tuned = best_rf_model.predict(X_test)

r2_tuned = r2_score(y_test, y_pred_tuned)
rmse_rf=np.sqrt(mean_squared_error(y_test, y_pred_tuned))

#Model Evaluation
print(f'R2 Score : {r2_tuned}')
print(f'RMSE: {rmse_rf}')
print(f'Best Hyperparameters: {best_params}')

rf_model_filename = 'temperature-rf.joblib'
dump(best_rf_model, rf_model_filename)

print(f"Best RandomForestRegressor model saved to {rf_model_filename}")

R2 Score : 0.8132335762450562
RMSE: 1.48397160959567
Best Hyperparameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}
Best RandomForestRegressor model saved to temperature-rf.joblib


> **Artifical Neural Network**

In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam

2024-03-17 09:15:14.005186: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-17 09:15:14.005320: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-17 09:15:14.168229: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [9]:
features = ['Band2_Mean','Band3_Mean','Band4_Mean','Band5_Mean','Band6_Mean','Band7_Mean','Band8_Mean','Band9_Mean']
label = ['Temperature']

X = dataset.loc[:, features].values
y = dataset.loc[:, label].values

#Test-train split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
#Model development
ann = Sequential() 
ann.add(Dense(units=16, activation="relu"))# Initializing the ANN
ann.add(Dense(units=32, activation="relu"))
ann.add(Dense(units=64, activation="relu"))
ann.add(Dense(units=64, activation="relu"))
ann.add(Dense(units=32, activation="relu"))
ann.add(Dense(units=16, activation="relu"))
ann.add(Dense(units=1))   # Output Layer

early_stopping = EarlyStopping(monitor='val_loss', patience=10)
ann.compile(optimizer="adam",loss="mean_squared_error")
ann.fit(x=X_train, y=y_train, epochs=100, batch_size=2,validation_data=(X_test,y_test))

Epoch 1/100


I0000 00:00:1710666929.335052     114 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

<keras.src.callbacks.History at 0x7e5ff4a1eef0>

In [11]:
prediction =ann.predict(X_test)
print()

#Model Evaluation
from sklearn import metrics
print("R2:",metrics.explained_variance_score(y_test,prediction))
print("RMSE:",np.sqrt(metrics.mean_squared_error(y_test,prediction)))


R2: 0.20018164028515206
RMSE: 3.20680070950624


In [12]:
from keras.models import save_model
import pickle

with open('temperature_dataset.pkl', 'wb') as file:
    pickle.dump((X_train, X_test, y_train, y_test), file)
ann.save('temperature-ann.h5')

> **Multiple Linear Regression**

In [13]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd

features = ['Band2_Mean','Band3_Mean','Band4_Mean','Band5_Mean','Band6_Mean','Band7_Mean','Band8_Mean','Band9_Mean']
label = ['Temperature']

X = dataset.loc[:, features].values
y = dataset.loc[:, label].values

#Test-train split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
y_pred = linear_model.predict(X_test)

# Evaluate the model
r2 = r2_score(y_test, y_pred)
print(f"R^2 Score: {r2}")
rmse =np.sqrt(mean_squared_error(y_test, y_pred))
print(f'RMSE: {rmse}')

linear_model_filename = 'temperature-linear_regression.joblib'
dump(linear_model, linear_model_filename)

print(f"Linear Regression model saved to {linear_model_filename}")

R^2 Score: 0.3893481575601152
RMSE: 2.683323454799037
Linear Regression model saved to temperature-linear_regression.joblib
