In [173]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import warnings
warnings.filterwarnings('ignore')

In [174]:
data = pd.read_csv('Temp Plot(2).csv')

In [175]:
data.head()

Unnamed: 0,Date,Rainfall(inch),Temperature(0.9m),Temperature(1.8m),Air Temp F,Air Temp C
0,10/3/2016,0.0,25.567,26.171,75.9,24.388889
1,10/4/2016,0.0,25.713,26.1,78.6,25.888889
2,10/5/2016,0.06,25.946,26.1,82.6,28.111111
3,10/6/2016,0.39,26.446,26.158,82.8,28.222222
4,10/7/2016,0.0,26.663,26.3,72.3,22.388889


In [176]:
data.isna().sum()

Date                   0
Rainfall(inch)         1
Temperature(0.9m)     12
Temperature(1.8m)    353
Air Temp F            11
Air Temp C            11
dtype: int64

In [177]:
#Handling Missing values
# Mean imputation for missing values
data['Rainfall(inch)'].fillna(data['Rainfall(inch)'].mean(), inplace=True)
data['Temperature(0.9m)'].fillna(data['Temperature(0.9m)'].mean(), inplace=True)
data['Temperature(1.8m)'].fillna(data['Temperature(1.8m)'].mean(), inplace=True)
data['Air Temp F'].fillna(data['Air Temp F'].mean(), inplace=True)
data['Air Temp C'].fillna(data['Air Temp C'].mean(), inplace=True)

In [178]:
data.isna().sum()

Date                 0
Rainfall(inch)       0
Temperature(0.9m)    0
Temperature(1.8m)    0
Air Temp F           0
Air Temp C           0
dtype: int64

In [179]:
#creating day of year column

data['Date'] = pd.to_datetime(data['Date'])
data['day_of_year'] = data['Date'].dt.dayofyear

In [180]:
#Droping irrelevant features
data.drop(columns=['Date','Air Temp F'],axis=1,inplace=True)

In [181]:
data.describe()

Unnamed: 0,Rainfall(inch),Temperature(0.9m),Temperature(1.8m),Air Temp C,day_of_year
count,1165.0,1165.0,1165.0,1165.0,1165.0
mean,0.071375,20.59214,21.291655,20.302811,190.738197
std,0.252654,5.86034,4.164601,8.746264,106.805718
min,0.0,8.504,11.2,-4.888889,1.0
25%,0.0,15.533,19.2,13.777778,98.0
50%,0.0,20.337,21.291655,21.222222,195.0
75%,0.0,25.975,24.283,27.888889,288.0
max,4.14,30.808,29.7,36.111111,366.0


In [182]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1165 entries, 0 to 1164
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Rainfall(inch)     1165 non-null   float64
 1   Temperature(0.9m)  1165 non-null   float64
 2   Temperature(1.8m)  1165 non-null   float64
 3   Air Temp C         1165 non-null   float64
 4   day_of_year        1165 non-null   int32  
dtypes: float64(4), int32(1)
memory usage: 41.1 KB


In [183]:
def remove_outliers(df, columns):
    Q1 = df[columns].quantile(0.25)
    Q3 = df[columns].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    return df[~((df[columns] < lower_bound) | (df[columns] > upper_bound)).any(axis=1)]

In [184]:
cols=['Rainfall(inch)','Temperature(0.9m)','Temperature(1.8m)','Air Temp C','day_of_year']
data = remove_outliers(data, cols)

In [185]:
data.shape

(906, 5)

In [147]:
X = data[['Air Temp C', 'Rainfall(inch)', 'day_of_year']]
y = data[['Temperature(0.9m)', 'Temperature(1.8m)']]

In [148]:
X.head()

Unnamed: 0,Air Temp C,Rainfall(inch),day_of_year
0,24.388889,0.0,277
1,25.888889,0.0,278
4,22.388889,0.0,281
5,18.666667,0.0,282
6,18.611111,0.0,283


In [149]:
y.head()

Unnamed: 0,Temperature(0.9m),Temperature(1.8m)
0,25.567,26.171
1,25.713,26.1
4,26.663,26.3
5,25.85,26.258
6,25.046,25.929


In [150]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [151]:
X.shape,X_train.shape,X_test.shape

((906, 3), (724, 3), (182, 3))

In [152]:
#Starndardization (Scaling the data)

# from sklearn.preprocessing import StandardScaler
scaler = MinMaxScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [153]:
X_train

array([[0.35501355, 0.        , 0.03296703],
       [0.7804878 , 0.        , 0.79395604],
       [0.24932249, 0.        , 0.90384615],
       ...,
       [0.84823848, 0.        , 0.75274725],
       [0.68292683, 0.        , 0.26098901],
       [0.33739837, 0.        , 0.09340659]])

In [154]:
# Machine Learning Libraries
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error,mean_squared_error, r2_score
import numpy as np

In [155]:
def evaluate_model(y_true, y_pred):
    """Calculate evaluation metrics."""
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2 = r2_score(y_true, y_pred)
    return mae, rmse, r2

In [156]:
# Define models
models = {
    "Linear Regression": LinearRegression(),
    "Decision Tree Regressor": DecisionTreeRegressor(),
    "Random Forest Regressor": RandomForestRegressor(),
    'XGBRegressor':XGBRegressor(),
    # "Support Vector Regressor": SVR(kernel='linear')
}

In [157]:
def train_and_evaluate(models, X_train, y_train, X_test, y_test):
    """Train and evaluate models, and identify the best model based on R² score."""
    results = {}
    best_model_name = None
    best_r2_score = -float('inf')  # Initialize to a very low value

    for model_name, model in models.items():
        print(f"Training {model_name}...")

        try:
            model.fit(X_train, y_train)  # Train the model

            # Make predictions
            y_train_pred = model.predict(X_train)
            y_test_pred = model.predict(X_test)

            # Evaluate Train and Test dataset
            model_train_metrics = evaluate_model(y_train, y_train_pred)
            model_test_metrics = evaluate_model(y_test, y_test_pred)

            # Store results
            results[model_name] = {
                "train_metrics": model_train_metrics,
                "test_metrics": model_test_metrics
            }

            # Print results
            print(f'{model_name} Performance:')
            print('- Training set:')
            print(f"  - RMSE: {model_train_metrics[1]:.4f}, MAE: {model_train_metrics[0]:.4f}, R2: {model_train_metrics[2]:.4f}")
            print('- Test set:')
            print(f"  - RMSE: {model_test_metrics[1]:.4f}, MAE: {model_test_metrics[0]:.4f}, R2: {model_test_metrics[2]:.4f}")
            print('=' * 30)

            # Check for the best model based on the R² score of the test set
            if model_test_metrics[2] > best_r2_score:
                best_r2_score = model_test_metrics[2]
                best_model_name = model_name

        except Exception as e:
            print(f"Error training {model_name}: {e}")

    # Set the best model variable
    best_model = models[best_model_name] if best_model_name else None

    print(f"The best model is: {best_model_name} with an R² score of {best_r2_score:.4f}")
    
    return results, best_model

# Call the function
results, best_model = train_and_evaluate(models, X_train, y_train, X_test, y_test)


Training Linear Regression...
Linear Regression Performance:
- Training set:
  - RMSE: 2.8276, MAE: 2.2957, R2: 0.6515
- Test set:
  - RMSE: 3.0553, MAE: 2.4713, R2: 0.5688
Training Decision Tree Regressor...
Decision Tree Regressor Performance:
- Training set:
  - RMSE: 0.0737, MAE: 0.0055, R2: 0.9998
- Test set:
  - RMSE: 2.4602, MAE: 1.4687, R2: 0.6732
Training Random Forest Regressor...
Random Forest Regressor Performance:
- Training set:
  - RMSE: 0.7131, MAE: 0.4874, R2: 0.9748
- Test set:
  - RMSE: 2.0502, MAE: 1.4138, R2: 0.7755
Training XGBRegressor...
XGBRegressor Performance:
- Training set:
  - RMSE: 0.3339, MAE: 0.2245, R2: 0.9947
- Test set:
  - RMSE: 2.2945, MAE: 1.5552, R2: 0.7206
The best model is: Random Forest Regressor with an R² score of 0.7755


In [158]:
best_model

In [159]:
X

Unnamed: 0,Air Temp C,Rainfall(inch),day_of_year
0,24.388889,0.0,277
1,25.888889,0.0,278
4,22.388889,0.0,281
5,18.666667,0.0,282
6,18.611111,0.0,283
...,...,...,...
1158,20.302811,0.0,339
1159,20.302811,0.0,340
1160,20.302811,0.0,341
1161,20.302811,0.0,342


In [160]:
# Making predictions for a new sample
#inputs order - AirTemp C, Solar radiation, Precipitation, day_of_year

new_data = np.array([[24.388889,0.0,277]])  # Example values
new_data_scaled = scaler.transform(new_data)
predicted_values = best_model.predict(new_data)
predicted_values

array([[24.91204   , 24.03318862]])

### ANN

In [161]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input

In [162]:

input_features = 3
output_features = 2

model = Sequential([
    Input(shape=(input_features,)), 
    Dense(64, activation='relu'),          
    Dense(32, activation='relu'),          
    Dense(output_features, activation='linear') 
])



In [163]:

model.compile(optimizer='adam', loss='mse', metrics=['mae'])


In [164]:

model.summary()

In [165]:

model.fit(X_train, y_train, epochs=70, batch_size=1, verbose=1)

Epoch 1/70
[1m724/724[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - loss: 165.7518 - mae: 9.9150 
Epoch 2/70
[1m724/724[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 9.8961 - mae: 2.5529
Epoch 3/70
[1m724/724[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 8.1935 - mae: 2.2509
Epoch 4/70
[1m724/724[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 7.6509 - mae: 2.1956
Epoch 5/70
[1m724/724[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 7.3221 - mae: 2.1217
Epoch 6/70
[1m724/724[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 6.9479 - mae: 2.0609
Epoch 7/70
[1m724/724[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 5.8205 - mae: 1.8820
Epoch 8/70
[1m724/724[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 6.1997 - mae: 1.9235
Epoch 9/70
[1m724/724[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 

<keras.src.callbacks.history.History at 0x173ff45b3b0>

In [166]:
loss, mae = model.evaluate(X_test, y_test, verbose=0)
print(f'Test Loss: {loss}, Test MAE: {mae}')

Test Loss: 3.781299591064453, Test MAE: 1.4791103601455688


In [167]:
y_pred = model.predict(X_test)
plt.figure(figsize=(5, 5))
plt.scatter(y_test, y_pred, alpha=0.5)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
plt.title(f"ANN\nR^2: {r2_score(y_test, y_pred):.4f}, MSE: {mean_squared_error(y_test, y_pred):.4f}")
plt.xlabel("Actual Values")
plt.ylabel("Predicted Values")

plt.savefig('ANN_comparison.png')
plt.close()

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step


In [168]:
X

Unnamed: 0,Air Temp C,Rainfall(inch),day_of_year
0,24.388889,0.0,277
1,25.888889,0.0,278
4,22.388889,0.0,281
5,18.666667,0.0,282
6,18.611111,0.0,283
...,...,...,...
1158,20.302811,0.0,339
1159,20.302811,0.0,340
1160,20.302811,0.0,341
1161,20.302811,0.0,342


In [169]:
def expand_intermediate_equation(model):
    input_vars = ['Air Temp', 'Rainfall', 'day_of_year']  # Air Temp, Rainfall, day_of_year
    equation = input_vars
    
    final_equation = ""

    for i, layer in enumerate(model.layers):
        weights, biases = layer.get_weights()
        output_vars = []

        layer_equation = []
        for j in range(weights.shape[1]):
            terms = [f"({weights[k, j]:.3f} * {equation[k]})" for k in range(len(equation))]
            layer_equation.append(f"({' + '.join(terms)} + {biases[j]:.3f})")

        if layer.activation.__name__ == 'relu':
            layer_equation = [f"ReLU({eq})" for eq in layer_equation]
        equation = layer_equation
        if i == len(model.layers) - 1:
            final_equation = equation[0]

    print(f"Final Expanded Equation: \nOutput = {final_equation}")

expand_intermediate_equation(model)


Final Expanded Equation: 
Output = ((0.097 * ReLU(((-0.046 * ReLU(((-0.291 * Air Temp) + (-0.087 * Rainfall) + (0.238 * day_of_year) + 0.348))) + (-0.232 * ReLU(((0.144 * Air Temp) + (-0.275 * Rainfall) + (0.204 * day_of_year) + 0.313))) + (0.188 * ReLU(((-0.017 * Air Temp) + (-0.059 * Rainfall) + (-0.285 * day_of_year) + 0.000))) + (-0.146 * ReLU(((0.672 * Air Temp) + (0.096 * Rainfall) + (0.129 * day_of_year) + -0.322))) + (0.026 * ReLU(((0.109 * Air Temp) + (0.256 * Rainfall) + (0.217 * day_of_year) + 0.143))) + (-0.053 * ReLU(((0.201 * Air Temp) + (0.224 * Rainfall) + (-0.030 * day_of_year) + 0.267))) + (0.130 * ReLU(((0.091 * Air Temp) + (0.135 * Rainfall) + (0.075 * day_of_year) + 0.313))) + (-0.026 * ReLU(((-0.121 * Air Temp) + (0.214 * Rainfall) + (0.014 * day_of_year) + 0.147))) + (-0.181 * ReLU(((0.059 * Air Temp) + (-0.195 * Rainfall) + (-0.513 * day_of_year) + 0.433))) + (0.009 * ReLU(((0.620 * Air Temp) + (-0.107 * Rainfall) + (-0.466 * day_of_year) + 0.055))) + (-0.126 * 

In [170]:
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score
# Evaluate the model
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse:.4f}')
print(f'Mean Absolute Error: {mae:.4f}')
print(f'R² Score: {r2:.4f}')

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 186us/step
Mean Squared Error: 3.7813
Mean Absolute Error: 1.4791
R² Score: 0.8052


In [171]:
#inputs order - AirTemp C,radiation, day_of_year

new_data = np.array([[24.388889,0.039,277]])  # Example values
new_data_scaled = scaler.transform(new_data)
predicted_values = best_model.predict(new_data_scaled)
predicted_values

array([[25.80622, 26.3011 ]])