In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import warnings
warnings.filterwarnings('ignore')

In [7]:
data = pd.read_csv('Temp Plot(2).csv')

In [8]:
data.head()

Unnamed: 0,Date,Rainfall(inch),Temperature(0.9m),Temperature(1.8m),Air Temp F,Air Temp C
0,10/3/2016,0.0,25.567,26.171,75.9,24.388889
1,10/4/2016,0.0,25.713,26.1,78.6,25.888889
2,10/5/2016,0.06,25.946,26.1,82.6,28.111111
3,10/6/2016,0.39,26.446,26.158,82.8,28.222222
4,10/7/2016,0.0,26.663,26.3,72.3,22.388889


In [9]:
data.isna().sum()

Date                   0
Rainfall(inch)         1
Temperature(0.9m)     12
Temperature(1.8m)    353
Air Temp F            11
Air Temp C            11
dtype: int64

In [10]:
#Handling Missing values
# Mean imputation for missing values
data['Rainfall(inch)'].fillna(data['Rainfall(inch)'].mean(), inplace=True)
data['Temperature(0.9m)'].fillna(data['Temperature(0.9m)'].mean(), inplace=True)
data['Temperature(1.8m)'].fillna(data['Temperature(1.8m)'].mean(), inplace=True)
data['Air Temp F'].fillna(data['Air Temp F'].mean(), inplace=True)
data['Air Temp C'].fillna(data['Air Temp C'].mean(), inplace=True)

In [11]:
data.isna().sum()

Date                 0
Rainfall(inch)       0
Temperature(0.9m)    0
Temperature(1.8m)    0
Air Temp F           0
Air Temp C           0
dtype: int64

In [12]:
#creating day of year column

data['Date'] = pd.to_datetime(data['Date'])
data['day_of_year'] = data['Date'].dt.dayofyear

In [13]:
#Droping irrelevant features
data.drop(columns=['Date','Air Temp F'],axis=1,inplace=True)

In [14]:
data.describe()

Unnamed: 0,Rainfall(inch),Temperature(0.9m),Temperature(1.8m),Air Temp C,day_of_year
count,1165.0,1165.0,1165.0,1165.0,1165.0
mean,0.071375,20.59214,21.291655,20.302811,190.738197
std,0.252654,5.86034,4.164601,8.746264,106.805718
min,0.0,8.504,11.2,-4.888889,1.0
25%,0.0,15.533,19.2,13.777778,98.0
50%,0.0,20.337,21.291655,21.222222,195.0
75%,0.0,25.975,24.283,27.888889,288.0
max,4.14,30.808,29.7,36.111111,366.0


In [15]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1165 entries, 0 to 1164
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Rainfall(inch)     1165 non-null   float64
 1   Temperature(0.9m)  1165 non-null   float64
 2   Temperature(1.8m)  1165 non-null   float64
 3   Air Temp C         1165 non-null   float64
 4   day_of_year        1165 non-null   int32  
dtypes: float64(4), int32(1)
memory usage: 41.1 KB


In [16]:
def remove_outliers(df, columns):
    Q1 = df[columns].quantile(0.25)
    Q3 = df[columns].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    return df[~((df[columns] < lower_bound) | (df[columns] > upper_bound)).any(axis=1)]

In [17]:
cols=['Rainfall(inch)','Temperature(0.9m)','Temperature(1.8m)','Air Temp C','day_of_year']
data = remove_outliers(data, cols)

In [18]:
data.shape

(906, 5)

In [53]:
X = data[['Air Temp C', 'Rainfall(inch)', 'day_of_year']]
y = data[['Temperature(0.9m)', 'Temperature(1.8m)']]

In [54]:
X.head()

Unnamed: 0,Air Temp C,Rainfall(inch),day_of_year
0,24.388889,0.0,277
1,25.888889,0.0,278
4,22.388889,0.0,281
5,18.666667,0.0,282
6,18.611111,0.0,283


In [55]:
y.head()

Unnamed: 0,Temperature(0.9m),Temperature(1.8m)
0,25.567,26.171
1,25.713,26.1
4,26.663,26.3
5,25.85,26.258
6,25.046,25.929


In [56]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [57]:
X.shape,X_train.shape,X_test.shape

((906, 3), (634, 3), (272, 3))

In [77]:
#Starndardization (Scaling the data)

# from sklearn.preprocessing import StandardScaler
scaler = MinMaxScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [78]:
X_train

array([[0.7100271 , 0.        , 0.41758242],
       [1.        , 0.        , 0.55494505],
       [0.59078591, 0.        , 0.17032967],
       ...,
       [0.84823848, 0.        , 0.75274725],
       [0.68292683, 0.        , 0.26098901],
       [0.33739837, 0.        , 0.09340659]])

In [79]:
# Machine Learning Libraries
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error,mean_squared_error, r2_score
import numpy as np

In [80]:
def evaluate_model(y_true, y_pred):
    """Calculate evaluation metrics."""
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2 = r2_score(y_true, y_pred)
    return mae, rmse, r2

In [81]:
# Define models
models = {
    "Linear Regression": LinearRegression(),
    "Decision Tree Regressor": DecisionTreeRegressor(),
    "Random Forest Regressor": RandomForestRegressor(),
    'XGBRegressor':XGBRegressor(),
    # "Support Vector Regressor": SVR(kernel='linear')
}

In [82]:
def train_and_evaluate(models, X_train, y_train, X_test, y_test):
    """Train and evaluate models, and identify the best model based on R² score."""
    results = {}
    best_model_name = None
    best_r2_score = -float('inf')  # Initialize to a very low value

    for model_name, model in models.items():
        print(f"Training {model_name}...")

        try:
            model.fit(X_train, y_train)  # Train the model

            # Make predictions
            y_train_pred = model.predict(X_train)
            y_test_pred = model.predict(X_test)

            # Evaluate Train and Test dataset
            model_train_metrics = evaluate_model(y_train, y_train_pred)
            model_test_metrics = evaluate_model(y_test, y_test_pred)

            # Store results
            results[model_name] = {
                "train_metrics": model_train_metrics,
                "test_metrics": model_test_metrics
            }

            # Print results
            print(f'{model_name} Performance:')
            print('- Training set:')
            print(f"  - RMSE: {model_train_metrics[1]:.4f}, MAE: {model_train_metrics[0]:.4f}, R2: {model_train_metrics[2]:.4f}")
            print('- Test set:')
            print(f"  - RMSE: {model_test_metrics[1]:.4f}, MAE: {model_test_metrics[0]:.4f}, R2: {model_test_metrics[2]:.4f}")
            print('=' * 30)

            # Check for the best model based on the R² score of the test set
            if model_test_metrics[2] > best_r2_score:
                best_r2_score = model_test_metrics[2]
                best_model_name = model_name

        except Exception as e:
            print(f"Error training {model_name}: {e}")

    # Set the best model variable
    best_model = models[best_model_name] if best_model_name else None

    print(f"The best model is: {best_model_name} with an R² score of {best_r2_score:.4f}")
    
    return results, best_model

# Call the function
results, best_model = train_and_evaluate(models, X_train, y_train, X_test, y_test)


Training Linear Regression...
Linear Regression Performance:
- Training set:
  - RMSE: 2.8582, MAE: 2.3105, R2: 0.6413
- Test set:
  - RMSE: 2.9162, MAE: 2.3814, R2: 0.6222
Training Decision Tree Regressor...
Decision Tree Regressor Performance:
- Training set:
  - RMSE: 0.0787, MAE: 0.0062, R2: 0.9997
- Test set:
  - RMSE: 2.4037, MAE: 1.4561, R2: 0.7085
Training Random Forest Regressor...
Random Forest Regressor Performance:
- Training set:
  - RMSE: 0.7239, MAE: 0.5048, R2: 0.9740
- Test set:
  - RMSE: 1.9703, MAE: 1.3485, R2: 0.8031
Training XGBRegressor...
XGBRegressor Performance:
- Training set:
  - RMSE: 0.2832, MAE: 0.1995, R2: 0.9962
- Test set:
  - RMSE: 2.1745, MAE: 1.4892, R2: 0.7614
The best model is: Random Forest Regressor with an R² score of 0.8031


In [83]:
best_model

In [84]:
X

Unnamed: 0,Air Temp C,Rainfall(inch),day_of_year
0,24.388889,0.0,277
1,25.888889,0.0,278
4,22.388889,0.0,281
5,18.666667,0.0,282
6,18.611111,0.0,283
...,...,...,...
1158,20.302811,0.0,339
1159,20.302811,0.0,340
1160,20.302811,0.0,341
1161,20.302811,0.0,342


In [86]:
# Making predictions for a new sample
#inputs order - AirTemp C, Solar radiation, Precipitation, day_of_year

new_data = np.array([[24.388889,0.0,277]])  # Example values
new_data_scaled = scaler.transform(new_data)
predicted_values = best_model.predict(new_data)
predicted_values

array([[24.24898   , 23.69818897]])

In [87]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input

In [88]:

input_features = 3
output_features = 2

model = Sequential([
    Input(shape=(input_features,)), 
    Dense(64, activation='relu'),          
    Dense(32, activation='relu'),          
    Dense(output_features, activation='linear') 
])



In [89]:

model.compile(optimizer='adam', loss='mse', metrics=['mae'])


In [90]:

model.summary()

In [91]:

model.fit(X_train, y_train, epochs=50, batch_size=1, verbose=1)

Epoch 1/50
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 250.0850 - mae: 12.7161
Epoch 2/50
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 10.9515 - mae: 2.6704
Epoch 3/50
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 9.4716 - mae: 2.5086
Epoch 4/50
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 8.6781 - mae: 2.3249
Epoch 5/50
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 7.5455 - mae: 2.1739
Epoch 6/50
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 7.9113 - mae: 2.1212
Epoch 7/50
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 6.5892 - mae: 2.0126
Epoch 8/50
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 7.6555 - mae: 2.1336
Epoch 9/50
[1m634/634[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m

<keras.src.callbacks.history.History at 0x173fcfd82f0>

In [92]:
loss, mae = model.evaluate(X_test, y_test, verbose=0)
print(f'Test Loss: {loss}, Test MAE: {mae}')

Test Loss: 4.230185508728027, Test MAE: 1.6051833629608154


In [93]:
y_pred = model.predict(X_test)
plt.figure(figsize=(5, 5))
plt.scatter(y_test, y_pred, alpha=0.5)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
plt.title(f"ANN\nR^2: {r2_score(y_test, y_pred):.4f}, MSE: {mean_squared_error(y_test, y_pred):.4f}")
plt.xlabel("Actual Values")
plt.ylabel("Predicted Values")

plt.savefig('ANN_comparison.png')
plt.close()

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step


In [41]:
X

Unnamed: 0,Air Temp C,Rainfall(inch),day_of_year
0,24.388889,0.0,277
1,25.888889,0.0,278
4,22.388889,0.0,281
5,18.666667,0.0,282
6,18.611111,0.0,283
...,...,...,...
1158,20.302811,0.0,339
1159,20.302811,0.0,340
1160,20.302811,0.0,341
1161,20.302811,0.0,342


In [94]:
def expand_intermediate_equation(model):
    input_vars = ['Air Temp', 'Rainfall', 'day_of_year']  # Air Temp, Rainfall, day_of_year
    equation = input_vars
    
    final_equation = ""

    for i, layer in enumerate(model.layers):
        weights, biases = layer.get_weights()
        output_vars = []

        layer_equation = []
        for j in range(weights.shape[1]):
            terms = [f"({weights[k, j]:.3f} * {equation[k]})" for k in range(len(equation))]
            layer_equation.append(f"({' + '.join(terms)} + {biases[j]:.3f})")

        if layer.activation.__name__ == 'relu':
            layer_equation = [f"ReLU({eq})" for eq in layer_equation]
        equation = layer_equation
        if i == len(model.layers) - 1:
            final_equation = equation[0]

    print(f"Final Expanded Equation: \nOutput = {final_equation}")

expand_intermediate_equation(model)


Final Expanded Equation: 
Output = ((-0.022 * ReLU(((0.005 * ReLU(((-0.158 * Air Temp) + (0.192 * Rainfall) + (-0.242 * day_of_year) + 0.000))) + (-0.209 * ReLU(((-0.323 * Air Temp) + (-0.251 * Rainfall) + (0.160 * day_of_year) + 0.307))) + (-0.197 * ReLU(((-0.022 * Air Temp) + (0.123 * Rainfall) + (0.211 * day_of_year) + 0.272))) + (0.031 * ReLU(((-0.001 * Air Temp) + (0.153 * Rainfall) + (-0.197 * day_of_year) + 0.000))) + (0.071 * ReLU(((0.209 * Air Temp) + (0.249 * Rainfall) + (-0.082 * day_of_year) + 0.366))) + (0.049 * ReLU(((-0.353 * Air Temp) + (0.029 * Rainfall) + (0.086 * day_of_year) + 0.363))) + (-0.030 * ReLU(((-0.136 * Air Temp) + (0.189 * Rainfall) + (-0.223 * day_of_year) + 0.000))) + (-0.242 * ReLU(((0.058 * Air Temp) + (-0.252 * Rainfall) + (-0.124 * day_of_year) + 0.142))) + (-0.208 * ReLU(((0.038 * Air Temp) + (0.077 * Rainfall) + (-1.476 * day_of_year) + 0.821))) + (0.106 * ReLU(((-0.244 * Air Temp) + (-0.054 * Rainfall) + (-0.065 * day_of_year) + 0.000))) + (-0.01

In [95]:
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score
# Evaluate the model
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse:.4f}')
print(f'Mean Absolute Error: {mae:.4f}')
print(f'R² Score: {r2:.4f}')

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Mean Squared Error: 4.2302
Mean Absolute Error: 1.6052
R² Score: 0.7915


In [96]:
#inputs order - AirTemp C,radiation, day_of_year

new_data = np.array([[24.388889,0.039,277]])  # Example values
new_data_scaled = scaler.transform(new_data)
predicted_values = best_model.predict(new_data_scaled)
predicted_values

array([[25.80761, 26.34639]])