In [2]:
#import data 
import datetime
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense


data = pd.read_csv("weather_data.csv")

# Rename the columns
data = data.rename(columns={
    'LST_DATE': 'Date',
    'T_DAILY_MAX': 'MaxTemperature',
    'T_DAILY_MIN': 'MinTemperature',
    'T_DAILY_AVG': 'AvgTemperature',
    'P_DAILY_CALC': 'Precipitation'
})
# Remove the unnamed column
data = data.loc[:, ~data.columns.str.contains('^Unnamed')]

# # Convert the date column to datetime format
data['Date'] = pd.to_datetime(data['Date'], format= '%Y%m%d')
data['Year'] = data['Date'].dt.year

# Replace all occurrences of -9999 with NaN
data.replace(-9999, pd.NA, inplace=True)

# Drop rows with NaN values
data.dropna(inplace=True)

data

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


Unnamed: 0,Date,MaxTemperature,MinTemperature,AvgTemperature,Precipitation,Year
0,2014-01-01,8.7,-1.8,2.9,0.0,2014
1,2014-01-02,4.0,-1.1,0.8,5.5,2014
2,2014-01-03,-0.7,-9.5,-6.4,0.0,2014
3,2014-01-04,1.0,-9.8,-4.5,0.0,2014
4,2014-01-05,5.6,-1.3,1.4,14.5,2014
...,...,...,...,...,...,...
3647,2023-12-27,10.3,7.0,8.8,12.1,2023
3648,2023-12-28,11.9,5.9,9.5,0.0,2023
3649,2023-12-29,7.5,2.6,5.2,1.6,2023
3650,2023-12-30,8.0,2.0,4.1,0.0,2023


In [11]:
# Assuming `df` is your DataFrame and it has a column 'avg_temp' for average temperatures
data = data['AvgTemperature'].values.reshape(-1,1)

# Normalize the data
scaler = MinMaxScaler(feature_range=(0, 1))
data = scaler.fit_transform(data)

# Splitting dataset into training (first 9 years) and testing data (last year)
train_size = int(len(data) * 0.9)
test_size = len(data) - train_size
train, test = data[0:train_size, :], data[train_size:len(data), :]

# Convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        X.append(a)
        Y.append(dataset[i + look_back, 0])
    return np.array(X), np.array(Y)

# reshape into X=t and Y=t+1
look_back = 30
X_train, Y_train = create_dataset(train, look_back)
X_test, Y_test = create_dataset(test, look_back)

# reshape input to be [samples, time steps, features]
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

# Create and fit the LSTM network
model = Sequential()
model.add(LSTM(50, input_shape=(1, look_back)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X_train, Y_train, epochs=100, batch_size=1, verbose=2)

# Making predictions
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

# Invert predictions
train_predict = scaler.inverse_transform(train_predict)
Y_train = scaler.inverse_transform([Y_train])
test_predict = scaler.inverse_transform(test_predict)
Y_test = scaler.inverse_transform([Y_test])

# Plotting
trainPredictPlot = np.empty_like(data)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(train_predict)+look_back, :] = train_predict

testPredictPlot = np.empty_like(data)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(train_predict)+(look_back*2)+1:len(data)-1, :] = test_predict

plt.plot(scaler.inverse_transform(data))
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.show()

# Predicting 50 years into the

# Assuming each year has roughly 365.25 days on average (accounting for leap years)
days_in_future = 50 * 365.25
future_predictions = test_predict[-1].reshape(1, 1, look_back)  # Start prediction from the last test data point

# Generating future data points
future_values = []
for i in range(int(days_in_future)):
    prediction = model.predict(future_predictions)
    future_predictions = np.append(future_predictions[:, :, 1:], prediction.reshape(1, 1, 1), axis=2)
    future_values.append(prediction)

# Reshape future values for inverse transformation
future_values = np.array(future_values).reshape(-1, 1)
future_values = scaler.inverse_transform(future_values)

# Plotting future predictions
plt.plot(range(len(data)), scaler.inverse_transform(data), label="Historical Data")
plt.plot(range(len(data), len(data) + len(future_values)), future_values, label="Future Predictions")
plt.legend()
plt.show()

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices