In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.impute import SimpleImputer

In [16]:
historical_weather = pd.read_csv(r"C:\0A___________________________________\Predictor 1.0\historical_weather.csv")
test_data = pd.read_csv(r"C:\0A___________________________________\Predictor 1.0\submission_key.csv")

In [17]:
historical_weather.drop(columns=['snow_depth_mm'], inplace=True)

In [18]:
historical_weather['date'] = pd.to_datetime(historical_weather['date'])
test_data['date'] = pd.to_datetime(test_data['date'])

In [19]:
historical_weather['city_id'] = historical_weather['city_id'].str.extract('(\d+)').astype(int)
test_data['city_id'] = test_data['city_id'].str.extract('(\d+)').astype(int)

In [20]:
imputer = SimpleImputer(strategy='mean')

In [21]:
historical_weather[['avg_temp_c', 'min_temp_c', 'max_temp_c', 'precipitation_mm','avg_wind_dir_deg', 'avg_wind_speed_kmh']] = imputer.fit_transform(historical_weather[['avg_temp_c', 'min_temp_c', 'max_temp_c', 'precipitation_mm','avg_wind_dir_deg', 'avg_wind_speed_kmh']])

In [22]:
# scaler = StandardScaler()
# scaler = scaler.fit(historical_weather[['avg_temp_c', 'min_temp_c', 'max_temp_c', 'precipitation_mm','avg_wind_dir_deg', 'avg_wind_speed_kmh']])
# historical_weather_scaled = scaler.transform(historical_weather[['avg_temp_c', 'min_temp_c', 'max_temp_c', 'precipitation_mm','avg_wind_dir_deg', 'avg_wind_speed_kmh']])

In [23]:
# def prepare_data_for_lstm(data, n_past=14, n_future=7):
#     X, Y = [], []
#     for i in range(n_past, len(data) - n_future + 1):
#         X.append(data[i - n_past:i, :])  # past days data
#         Y.append(data[i:i + n_future, 0])  # future days target (avg_temp_c)
#     return np.array(X), np.array(Y)

# trainX, trainY = prepare_data_for_lstm(historical_weather_scaled)

In [24]:
# # # Define the LSTM model
# model = Sequential()
# model.add(LSTM(64, activation='relu', input_shape=(trainX.shape[1], trainX.shape[2]), return_sequences=True))
# model.add(LSTM(32, activation='relu', return_sequences=False))
# model.add(Dropout(0.2))
# model.add(Dense(trainY.shape[1]))  # Output layer for predicting avg_temp_c

# model.compile(optimizer='adam', loss='mse')
# model.summary()

In [25]:
# history = model.fit(trainX, trainY, epochs=10, batch_size=16, validation_split=0.1, verbose=1)

In [26]:
# plt.plot(history.history['loss'], label='Training loss')
# plt.plot(history.history['val_loss'], label='Validation loss')
# plt.legend()

In [27]:
# predictions = []
# for index, row in test_data.iterrows():
#     city_id = row['city_id']
#     date = row['date']
#     city_data = historical_weather[(historical_weather['city_id'] == city_id) & (historical_weather['date'] <= date)]
#     city_data_scaled = scaler.transform(city_data[['avg_temp_c', 'min_temp_c', 'max_temp_c', 'precipitation_mm','avg_wind_dir_deg', 'avg_wind_speed_kmh']])
    
#     # Reshape data for LSTM input
#     X = np.expand_dims(city_data_scaled[-14:, :], axis=0)  # Using last 14 days data
#     prediction = model.predict(X)
#     predictions.append(prediction[0][0])  # Predicted avg_temp_c for the date

# # Create submission dataframe
# submission = pd.DataFrame({
#     'submission_ID': test_data['submission_ID'],
#     'avg_temp_c': predictions
# })

# # Save submission to CSV file
# submission.to_csv('my_submission.csv', index=False)

In [29]:
# Load and preprocess the data
# df = pd.read_csv('/mnt/data/historical_weather.csv')
# df['date'] = pd.to_datetime(df['date'])
# df = create_features(df)

from sklearn.preprocessing import MinMaxScaler

# Function to prepare data for LSTM
def prepare_lstm_data(df, city_id, sequence_length=30):
    city_df = df[df['city_id'] == city_id].sort_values('date')
    features = ['avg_temp_c', 'min_temp_c', 'max_temp_c', 'precipitation_mm','avg_wind_dir_deg', 'avg_wind_speed_kmh']
    city_df = city_df[features].dropna()
    
    scaler = MinMaxScaler()
    city_scaled = scaler.fit_transform(city_df)
    
    X, y = [], []
    for i in range(sequence_length, len(city_scaled) - 7):
        X.append(city_scaled[i-sequence_length:i])
        y.append(city_scaled[i:i+7, 0])  # Predict next 7 days of avg_temp_c

    X, y = np.array(X), np.array(y)
    return X, y, scaler

# Function to build and train LSTM model
def train_lstm(X_train, y_train):
    model = Sequential()
    model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(LSTM(units=50))
    model.add(Dense(7))  # Predicting 7 days
    model.compile(loss='mean_squared_error', optimizer='adam')
    
    model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.1)
    return model

# Function to make predictions
def predict_next_7_days(model, X, scaler, num_features):
    prediction_scaled = model.predict(X[-1].reshape(1, X.shape[1], X.shape[2]))
    padding = np.zeros((7, num_features - 1))
    prediction = scaler.inverse_transform(np.hstack([prediction_scaled.reshape(-1, 1), padding]))[:, 0]
    return prediction

# List to store predictions for all cities
all_cities_predictions = []

# Iterate through all unique city IDs
unique_city_ids = historical_weather['city_id'].unique()

for city_id in unique_city_ids:
    X, y, scaler = prepare_lstm_data(historical_weather, city_id)
    if X.shape[0] > 0:  # Ensure there is enough data for training
        model = train_lstm(X, y)
        num_features = X.shape[2]
        predictions = predict_next_7_days(model, X, scaler, num_features)
        all_cities_predictions.append({
            'city_id': city_id,
            'predictions': predictions
        })

# Display the predictions
all_cities_predictions

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20

[{'city_id': 1,
  'predictions': array([8.24363872, 7.97343735, 8.07268351, 6.98694178, 7.80995498,
         7.74650383, 7.34193583])},
 {'city_id': 2,
  'predictions': array([15.82902601, 15.5604712 , 15.95413749, 15.9443984 , 15.77990558,
         15.85306516, 15.83451233])},
 {'city_id': 3,
  'predictions': array([26.35204153, 26.44014103, 26.47615368, 26.40870122, 26.38453786,
         26.30963315, 26.3767491 ])},
 {'city_id': 4,
  'predictions': array([0.32659196, 0.21409788, 0.27249292, 0.47727064, 0.57711587,
         0.73731192, 1.20751881])},
 {'city_id': 5,
  'predictions': array([24.46372275, 24.13047335, 24.24264855, 24.24129356, 24.43847448,
         24.50198047, 24.72625821])},
 {'city_id': 7,
  'predictions': array([16.51430912, 15.74073866, 15.72890454, 15.52878914, 15.73087204,
         15.89990719, 14.974761  ])},
 {'city_id': 8,
  'predictions': array([2.25863271, 1.69274159, 1.6540963 , 1.46613833, 1.41331029,
         1.40654589, 1.74969977])},
 {'city_id': 9,
  'p

In [31]:
print(len(all_cities_predictions))

100
