In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split

# Load the data
data = pd.read_excel('/content/Final.xlsx')
data['date'] = pd.to_datetime(data['date'])
temperature_data = data[['date', 'tavg']].set_index('date')

# Creating lagged features
temperature_data['lag1'] = temperature_data['tavg'].shift(1)
temperature_data['lag2'] = temperature_data['tavg'].shift(2)
temperature_data['lag3'] = temperature_data['tavg'].shift(3)
temperature_data.dropna(inplace=True)  # Drop rows with NaN values

# Prepare the features and labels
X = temperature_data[['lag1', 'lag2', 'lag3']]
y = temperature_data['tavg']

# Spliting into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initializing and train the Random Forest model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict the temperatures
y_pred = model.predict(X_test)

# Calculating and printing the model performance
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Predict next month's temperatures
last_values = temperature_data[['lag1', 'lag2', 'lag3']].iloc[-1].values.reshape(1, -1)
predicted_temps = []
for _ in range(30):  # Assuming you want to predict 30 days
    next_temp = model.predict(last_values)
    predicted_temps.append(next_temp[0])
    # Update the last_values for the next prediction
    last_values = np.array([[next_temp[0], last_values[0, 0], last_values[0, 1]]])

# Create dates for the predictions
last_date = temperature_data.index[-1]
predicted_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=30)

# Create a DataFrame for plotting
predicted_df = pd.DataFrame({
    'date': predicted_dates,
    'predicted_temperature': predicted_temps
})

# Ploting
fig = go.Figure()
fig.add_trace(go.Scatter(x=predicted_df['date'], y=predicted_df['predicted_temperature'], mode='lines+markers', name='Predicted Temperature'))
fig.update_layout(title='Predicted Temperatures for Next Month Using Random Forest', xaxis_title='Date', yaxis_title='Temperature (°C)', xaxis=dict(rangeslider=dict(visible=True)))
fig.show()


Mean Squared Error: 13.220157612196767




In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split

# Load the data
data = pd.read_excel('Final.xlsx')
data['date'] = pd.to_datetime(data['date'])
temperature_data = data[['date', 'tavg']].set_index('date')

# Create lagged features
temperature_data['lag1'] = temperature_data['tavg'].shift(1)
temperature_data['lag2'] = temperature_data['tavg'].shift(2)
temperature_data['lag3'] = temperature_data['tavg'].shift(3)
temperature_data.dropna(inplace=True)  # Drop rows with NaN values

# Prepare the features and labels
X = temperature_data[['lag1', 'lag2', 'lag3']]
y = temperature_data['tavg']

# Split into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Random Forest model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict the temperatures on the test set
y_pred = model.predict(X_test)

# Calculate and print the model performance
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse:.3f}')
print(f'Root Mean Squared Error: {rmse:.3f}')
print(f'Mean Absolute Error: {mae:.3f}')
print(f'R² Score: {r2:.3f}')

# Predict next month's temperatures
last_values = temperature_data[['lag1', 'lag2', 'lag3']].iloc[-1].values.reshape(1, -1)
predicted_temps = []
predicted_dates = pd.date_range(start=temperature_data.index[-1] + pd.Timedelta(days=1), periods=30)
for date in predicted_dates:
    next_temp = model.predict(last_values)
    predicted_temps.append(next_temp[0])
    last_values = np.array([[next_temp[0], last_values[0, 0], last_values[0, 1]]])

# Gather historical temperatures for the same month over the last 3 years
historical_dates = [predicted_dates.year[i] - j for i in range(len(predicted_dates)) for j in range(1, 4)]
historical_temps = temperature_data.loc[temperature_data.index.isin(historical_dates)]

# Create a DataFrame for plotting
predicted_df = pd.DataFrame({
    'date': predicted_dates,
    'predicted_temperature': predicted_temps
})
historical_df = historical_temps.reset_index()

# Ploting
fig = go.Figure()
fig.add_trace(go.Scatter(x=predicted_df['date'], y=predicted_df['predicted_temperature'], mode='lines+markers', name='Predicted Temperature'))
fig.add_trace(go.Scatter(x=historical_df['date'], y=historical_df['tavg'], mode='lines+markers', name='Historical Temperatures'))
fig.update_layout(title='Predicted vs Historical Temperatures for the Same Month', xaxis_title='Date', yaxis_title='Temperature (°C)', xaxis=dict(rangeslider=dict(visible=True)))
fig.show()


Mean Squared Error: 13.220
Root Mean Squared Error: 3.636
Mean Absolute Error: 2.694
R² Score: 0.883



X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomFo

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.metrics import mean_squared_error, r2_score
import plotly.graph_objects as go

# Load the data
data = pd.read_excel('Final.xlsx')
data['date'] = pd.to_datetime(data['date'])
temperature_data = data[['date', 'tavg']].set_index('date')

# Normalize the temperature data
scaler = MinMaxScaler(feature_range=(0, 1))
temperature_data['scaled_tavg'] = scaler.fit_transform(temperature_data[['tavg']])

# Function to create sequences and corresponding labels
def create_dataset(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset) - look_back - 1):
        a = dataset[i:(i + look_back)]
        X.append(a)
        Y.append(dataset[i + look_back])
    return np.array(X), np.array(Y)

# Prepare the data for LSTM
look_back = 30  # number of past days to use for predicting the next day
dataX, dataY = create_dataset(temperature_data['scaled_tavg'].values, look_back)
dataX = np.reshape(dataX, (dataX.shape[0], dataX.shape[1], 1))

# Splitting data into training and testing sets
train_size = int(len(dataX) * 0.9)
test_size = len(dataX) - train_size
trainX, testX = dataX[0:train_size], dataX[train_size:len(dataX)]
trainY, testY = dataY[0:train_size], dataY[train_size:len(dataY)]

# Build the LSTM model
model = Sequential()
model.add(LSTM(50, input_shape=(look_back, 1)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')

# Fit the model
model.fit(trainX, trainY, epochs=100, batch_size=32, verbose=2)

# Make predictions
train_predict = model.predict(trainX)
test_predict = model.predict(testX)

# Invert predictions and actual values to original scale
train_predict = scaler.inverse_transform(train_predict)
trainY = scaler.inverse_transform([trainY])
test_predict = scaler.inverse_transform(test_predict)
testY = scaler.inverse_transform([testY])

# Calculate metrics
test_mse = mean_squared_error(testY[0], test_predict[:,0])
test_rmse = np.sqrt(test_mse)
test_r2 = r2_score(testY[0], test_predict[:,0])

print(f'Mean Squared Error for July 2023: {test_mse:.3f}')
print(f'Root Mean Squared Error for July 2023: {test_rmse:.3f}')
print(f'R² Score for July 2023: {test_r2:.3f}')

# Plotting
fig = go.Figure()
fig.add_trace(go.Scatter(x=np.arange(len(testY[0])), y=testY[0], mode='lines+markers', name='Actual Temperature'))
fig.add_trace(go.Scatter(x=np.arange(len(test_predict)), y=test_predict[:,0], mode='lines+markers', name='Predicted Temperature'))
fig.update_layout(title='Predicted vs Actual Temperatures for July 2023', xaxis_title='Days', yaxis_title='Temperature (°C)')
fig.show()


Epoch 1/100
104/104 - 7s - loss: 0.0252 - 7s/epoch - 68ms/step
Epoch 2/100
104/104 - 1s - loss: 0.0070 - 1s/epoch - 11ms/step
Epoch 3/100
104/104 - 1s - loss: 0.0068 - 1s/epoch - 10ms/step
Epoch 4/100
104/104 - 1s - loss: 0.0067 - 1s/epoch - 10ms/step
Epoch 5/100
104/104 - 1s - loss: 0.0064 - 1s/epoch - 10ms/step
Epoch 6/100
104/104 - 1s - loss: 0.0063 - 1s/epoch - 10ms/step
Epoch 7/100
104/104 - 1s - loss: 0.0062 - 1s/epoch - 10ms/step
Epoch 8/100
104/104 - 1s - loss: 0.0059 - 1s/epoch - 13ms/step
Epoch 9/100
104/104 - 2s - loss: 0.0057 - 2s/epoch - 16ms/step
Epoch 10/100
104/104 - 1s - loss: 0.0054 - 1s/epoch - 10ms/step
Epoch 11/100
104/104 - 1s - loss: 0.0052 - 1s/epoch - 10ms/step
Epoch 12/100
104/104 - 1s - loss: 0.0049 - 1s/epoch - 10ms/step
Epoch 13/100
104/104 - 1s - loss: 0.0046 - 1s/epoch - 10ms/step
Epoch 14/100
104/104 - 1s - loss: 0.0044 - 1s/epoch - 10ms/step
Epoch 15/100
104/104 - 1s - loss: 0.0043 - 1s/epoch - 11ms/step
Epoch 16/100
104/104 - 2s - loss: 0.0041 - 2s/epo

In [None]:
import plotly.graph_objects as go
import numpy as np


test_dates = np.arange('2023-07-01', '2023-08-01', dtype='datetime64[D]')
actual_temperatures = np.random.normal(loc=25, scale=5, size=len(test_dates))  # Simulated data
predicted_temperatures = actual_temperatures + np.random.normal(loc=0, scale=2, size=len(test_dates))  # Simulated data

# Plotting
fig = go.Figure()

fig.add_trace(go.Scatter(x=test_dates, y=actual_temperatures,
                         mode='lines+markers', name='Actual Temperature',
                         line=dict(color='blue', width=2),
                         marker=dict(size=5, color='blue', symbol='circle')))

# Predicted Temperature Trace
fig.add_trace(go.Scatter(x=test_dates, y=predicted_temperatures,
                         mode='lines+markers', name='Predicted Temperature',
                         line=dict(color='red', width=2, dash='dash'),
                         marker=dict(size=5, color='red', symbol='x')))


fig.update_layout(
    title='Predicted vs Actual Temperatures for July 2023',
    xaxis_title='Date',
    yaxis_title='Temperature (°C)',
    xaxis=dict(showline=True, showgrid=False, linecolor='black',
               linewidth=2, ticks='outside', tickfont=dict(
                   family='Arial', size=12, color='black'),
               tickformat='%b %d'),
    yaxis=dict(showline=True, showgrid=True, gridcolor='lightgrey',
               linecolor='black', linewidth=2, ticks='outside',
               tickfont=dict(family='Arial', size=12, color='black')),
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    plot_bgcolor='white'
)

fig.show()


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense
from sklearn.metrics import mean_squared_error
import plotly.graph_objects as go
from datetime import timedelta

# Load the data
data = pd.read_excel('Final.xlsx')
data['date'] = pd.to_datetime(data['date'])
temperature_data = data[['date', 'tavg']].set_index('date')

# Normalize the temperature data
scaler = MinMaxScaler(feature_range=(0, 1))
temperature_data['scaled_tavg'] = scaler.fit_transform(temperature_data[['tavg']])

# Function to create sequences and corresponding labels
def create_dataset(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset) - look_back):
        a = dataset[i:(i + look_back)]
        X.append(a)
        Y.append(dataset[i + look_back])
    return np.array(X), np.array(Y)

# Prepare the data for LSTM
look_back = 30  # number of past days to use for predicting the next day
dataX, dataY = create_dataset(temperature_data['scaled_tavg'].values, look_back)
dataX = np.reshape(dataX, (dataX.shape[0], dataX.shape[1], 1))

# Splitting data into training
trainX, trainY = dataX, dataY

# Build the LSTM model
model = Sequential()
model.add(LSTM(50, input_shape=(look_back, 1)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')

# Fit the model
model.fit(trainX, trainY, epochs=100, batch_size=32, verbose=2)

# Predicting the next month's temperatures day by day
last_batch = trainX[-1:].reshape(1, look_back, 1)  # Last batch in training set
predicted_temps = []
for _ in range(30):  # Assuming you want to predict 30 days
    next_temp = model.predict(last_batch)
    predicted_temps.append(next_temp[0, 0])
    # Reshape next_temp and append it to last_batch
    next_temp = np.reshape(next_temp, (1, 1, 1))  # Reshape to (1, 1, 1)
    last_batch = np.append(last_batch[:, 1:, :], next_temp, axis=1)


# Inverse transform the predicted temperatures
predicted_temps = scaler.inverse_transform([predicted_temps])[0]

# Generate dates for the predictions
last_date = temperature_data.index[-1]
predicted_dates = [last_date + timedelta(days=i) for i in range(1, 31)]

# Plotting
fig = go.Figure()
fig.add_trace(go.Scatter(x=predicted_dates, y=predicted_temps, mode='lines+markers', name='Predicted Temperature'))
fig.update_layout(title='Predicted Temperatures for the Upcoming Month', xaxis_title='Date', yaxis_title='Temperature (°C)')
fig.show()


Epoch 1/100
115/115 - 7s - loss: 0.0304 - 7s/epoch - 64ms/step
Epoch 2/100
115/115 - 2s - loss: 0.0075 - 2s/epoch - 16ms/step
Epoch 3/100
115/115 - 1s - loss: 0.0073 - 1s/epoch - 10ms/step
Epoch 4/100
115/115 - 1s - loss: 0.0069 - 1s/epoch - 10ms/step
Epoch 5/100
115/115 - 1s - loss: 0.0066 - 1s/epoch - 10ms/step
Epoch 6/100
115/115 - 1s - loss: 0.0064 - 1s/epoch - 10ms/step
Epoch 7/100
115/115 - 1s - loss: 0.0062 - 1s/epoch - 11ms/step
Epoch 8/100
115/115 - 2s - loss: 0.0058 - 2s/epoch - 16ms/step
Epoch 9/100
115/115 - 1s - loss: 0.0057 - 1s/epoch - 11ms/step
Epoch 10/100
115/115 - 1s - loss: 0.0055 - 1s/epoch - 11ms/step
Epoch 11/100
115/115 - 1s - loss: 0.0051 - 1s/epoch - 11ms/step
Epoch 12/100
115/115 - 1s - loss: 0.0048 - 1s/epoch - 11ms/step
Epoch 13/100
115/115 - 1s - loss: 0.0045 - 1s/epoch - 10ms/step
Epoch 14/100
115/115 - 1s - loss: 0.0045 - 1s/epoch - 10ms/step
Epoch 15/100
115/115 - 1s - loss: 0.0044 - 1s/epoch - 10ms/step
Epoch 16/100
115/115 - 1s - loss: 0.0041 - 1s/epo