In [1]:
import pandas as pd
import numpy as np
import shapely
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, concatenate, Dropout, UpSampling2D, Reshape
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
for device in gpu_devices:
    tf.config.experimental.set_memory_growth(device, True)
from sklearn.preprocessing import StandardScaler, MinMaxScaler


In [2]:
df = pd.read_pickle('./final.pkl')

In [3]:
mb_am_temp = 56
mb_af_temp = 63
mb_pm_temp = 60

In [4]:
# Function to map AMafPM to ref_temp
def map_to_ref_temp(row):
    if row['AMafPM'] == [1,0,0]:
        return 56
    elif row['AMafPM'] == [0,1,0]:
        return 63
    elif row['AMafPM'] == [0,0,1]:
        return 60
    else:
        return None  # or a default value

# Apply the function to create the new column
df['ref_temp'] = df.apply(map_to_ref_temp, axis=1)

In [5]:
landuse_dim, water_dim, natural_dim, highway_dim  = [len(df.iloc[0,:]['feature_grids'][x][0][0]) for x in df.iloc[0,:]['feature_grids'].keys()]

In [6]:
height_input = Input(shape=(64, 64, 1), name='height')
landuse_input = Input(shape=(64, 64, landuse_dim), name='landuse')
water_input = Input(shape=(64, 64, water_dim), name='water')
natural_input = Input(shape=(64, 64, natural_dim), name='natural')
highway_input = Input(shape=(64, 64, highway_dim), name='highway')
time_input = Input(shape=(3,), name='time_of_day')  # Assuming 3 time periods: AM, Afternoon, PM
ref_temp_input = Input(shape=(1,), name='ref_temp')

# Example CNN Layer for each input (adjust as needed)
def create_cnn_block(input_layer, filters):
    x = Conv2D(filters, (3, 3), activation='relu')(input_layer)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(filters, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    return x

In [7]:
# Creating CNN blocks for each spatial input
height_cnn = create_cnn_block(height_input, 32)
landuse_cnn = create_cnn_block(landuse_input, 32)
water_cnn = create_cnn_block(water_input, 32)
natural_cnn = create_cnn_block(natural_input, 32)
highway_cnn = create_cnn_block(highway_input, 32)

In [8]:
# Concatenating all the CNN outputs
concatenated = concatenate([Flatten()(height_cnn), 
                            Flatten()(landuse_cnn), 
                            Flatten()(water_cnn), 
                            Flatten()(natural_cnn), 
                            Flatten()(highway_cnn),
                            time_input,
                            ref_temp_input])  # Adding time input here

In [9]:
# Dense layers (adjust as needed)
x = Dense(256, activation='relu')(concatenated)
x = Dense(128, activation='relu')(x)

# Reshape to get a grid structure back
x = Dense(64*64, activation='relu')(x)
x = Reshape((64, 64, 1))(x)

# Upsample (if needed, depending on the size of the output from the CNN blocks)
# x = UpSampling2D(size=(2, 2))(x)

# Output layer to get the final 64x64 grid
output = Conv2D(1, (3, 3), activation='linear', padding='same')(x)

# Create Model
model = Model(inputs=[height_input, landuse_input, water_input, natural_input, highway_input, time_input,ref_temp_input], 
              outputs=output)

# Compile Model
model.compile(optimizer='adam', loss='mse')  # Use mean squared error as loss function

# Model Summary
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 height (InputLayer)            [(None, 64, 64, 1)]  0           []                               
                                                                                                  
 landuse (InputLayer)           [(None, 64, 64, 10)  0           []                               
                                ]                                                                 
                                                                                                  
 water (InputLayer)             [(None, 64, 64, 4)]  0           []                               
                                                                                                  
 natural (InputLayer)           [(None, 64, 64, 7)]  0           []                           

                                                                                                  
 conv2d_10 (Conv2D)             (None, 64, 64, 1)    10          ['reshape[0][0]']                
                                                                                                  
Total params: 8,643,754
Trainable params: 8,643,754
Non-trainable params: 0
__________________________________________________________________________________________________


In [10]:
df['landuse']=df['feature_grids'].apply(lambda x: x['landuse'])
df['water']=df['feature_grids'].apply(lambda x: x['water'])
df['natural']=df['feature_grids'].apply(lambda x: x['natural'])
df['highway']=df['feature_grids'].apply(lambda x: x['highway'])


In [11]:
scaler = MinMaxScaler()

# Find global min and max
global_min = np.min([np.min(subgrid) for subgrid in df["Subgrid"]])
global_max = np.max([np.max(subgrid) for subgrid in df["Subgrid"]])

# Fit the scaler using global min and max
scaler.fit(np.array([[global_min], [global_max]]))

# Apply the same scaling to each subgrid
scaled_subgrids = []

for subgrid in df["Subgrid"]:
    scaled_subgrid = scaler.transform(subgrid.flatten().reshape(-1, 1))
    scaled_subgrids.append(scaled_subgrid.reshape(subgrid.shape))

# Assign scaled subgrids back to DataFrame
df["Subgrid"] = pd.Series(scaled_subgrids, index=df.index)

In [12]:
def rotate_and_augment(df):
    # Creating an empty DataFrame to store augmented data
    augmented_data = pd.DataFrame()
    
    # Iterating through each row in the original DataFrame
    for _, row in df.iterrows():
        # Storing original and rotated versions of each grid in a list
        all_versions = []
        
        # Original data
        original_data = {
            "Subgrid": row["Subgrid"],
            "height_grid": row["height_grid"],
            "landuse": row["landuse"],
            "water": row["water"],
            "natural": row["natural"],
            "highway": row["highway"],
            # ... include other non-rotated columns as needed
            "AMafPM": row["AMafPM"],
            "ref_temp": row["ref_temp"]
        }
        all_versions.append(original_data)
        
        # Creating rotated versions
        for angle in [90, 180, 270]:
            rotated_data = {
                "Subgrid": np.rot90(row["Subgrid"], k=angle//90),
                "height_grid": np.rot90(row["height_grid"], k=angle//90),
                "landuse": np.rot90(row["landuse"], k=angle//90, axes=(0, 1)),
                "water": np.rot90(row["water"], k=angle//90, axes=(0, 1)),
                "natural": np.rot90(row["natural"], k=angle//90, axes=(0, 1)),
                "highway": np.rot90(row["highway"], k=angle//90, axes=(0, 1)),
                # ... include other non-rotated columns as needed
                "AMafPM": row["AMafPM"],
                "ref_temp": row["ref_temp"]
            }
            all_versions.append(rotated_data)
        
        # Appending all versions to the augmented_data DataFrame
        augmented_data = augmented_data.append(all_versions, ignore_index=True)
    
    return augmented_data
df = rotate_and_augment(df)

  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.appe

  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.appe

  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.append(all_versions, ignore_index=True)
  augmented_data = augmented_data.appe

In [13]:
from sklearn.model_selection import train_test_split
test_size_ratio = 0.2  # Here, 20% of the data goes to the test set.

# Split the data
train_df, test_df = train_test_split(df, test_size=test_size_ratio, random_state=42)

In [14]:
height_grid = np.array(train_df['height_grid'].tolist())
landuse = np.array(train_df['landuse'].tolist())
water = np.array(train_df['water'].tolist())
natural = np.array(train_df['natural'].tolist())
highway = np.array(train_df['highway'].tolist())
AMafPM = np.array(train_df['AMafPM'].tolist())
ref_temp = np.array(train_df['ref_temp'].tolist())
target = np.array(train_df['Subgrid'].tolist())

In [15]:
height_grid_test = np.array(test_df['height_grid'].tolist())
landuse_test = np.array(test_df['landuse'].tolist())
water_test = np.array(test_df['water'].tolist())
natural_test = np.array(test_df['natural'].tolist())
highway_test = np.array(test_df['highway'].tolist())
AMafPM_test = np.array(test_df['AMafPM'].tolist())
ref_temp_test = np.array(test_df['ref_temp'].tolist())
target_test = np.array(test_df['Subgrid'].tolist())

In [16]:
#df['ref_temp'] = (df['ref_temp'] - df['ref_temp'].mean()) / df['ref_temp'].std()

In [17]:
%load_ext tensorboard
from datetime import datetime
from packaging import version

import tensorflow as tf
from tensorflow import keras

print("TensorFlow version: ", tf.__version__)
assert version.parse(tf.__version__).release[0] >= 2, \
    "This notebook requires TensorFlow 2.0 or above."
import tensorboard
tensorboard.__version__
logdir="logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)


TensorFlow version:  2.9.2


In [18]:
train = model.fit([height_grid, landuse, water, natural, highway, AMafPM, ref_temp],
                    target,
                    epochs=100,
                    batch_size=32,
                    validation_split=0.2,
                 callbacks=[tensorboard_callback])  # Adjust as needed

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [19]:
x = model.evaluate(x=[height_grid_test, landuse_test, water_test, natural_test, highway_test, AMafPM_test,ref_temp_test], y=target_test)




In [20]:
x

0.08640959858894348

In [21]:
normalized_mse = x
original_mse = normalized_mse * (global_max - global_min)

# Optionally, compute RMSE
original_rmse = np.sqrt(original_mse)

# Display
print(f"Original Scale MSE: {original_mse}")
print(f"Original Scale RMSE: {original_rmse}")


Original Scale MSE: 1.8435454105201643
Original Scale RMSE: 1.35777222335713


In [22]:
%tensorboard --logdir logs --host localhost


Reusing TensorBoard on port 6006 (pid 11484), started 1:37:06 ago. (Use '!kill 11484' to kill it.)

In [23]:
height_flatten = height_grid.reshape(height_grid.shape[0], -1)
landuse_flatten = landuse.reshape(landuse.shape[0], -1)
water_flatten = water.reshape(water.shape[0], -1)
natural_flatten = natural.reshape(natural.shape[0], -1)
highway_flatten = highway.reshape(highway.shape[0], -1)

# Concatenate all flattened grids along with AMafPM and ref_temp
X_train = np.concatenate([
    height_flatten, 
    landuse_flatten, 
    water_flatten, 
    natural_flatten, 
    highway_flatten,
    AMafPM,  # Assuming AMafPM is already in the correct shape (n_samples, 3)
    ref_temp.reshape(-1, 1)  # Make sure ref_temp is a column vector
], axis=1)

# Flatten the target grid for regression-based models
y_train = target.reshape(target.shape[0], -1)

##### TEST #####
height_flatten_test = height_grid_test.reshape(height_grid_test.shape[0], -1)
landuse_flatten_test = landuse_test.reshape(landuse_test.shape[0], -1)
water_flatten_test = water_test.reshape(water_test.shape[0], -1)
natural_flatten_test = natural_test.reshape(natural_test.shape[0], -1)
highway_flatten_test = highway_test.reshape(highway_test.shape[0], -1)

# Concatenate all flattened grids along with AMafPM and ref_temp
X_test = np.concatenate([
    height_flatten_test, 
    landuse_flatten_test, 
    water_flatten_test, 
    natural_flatten_test, 
    highway_flatten_test,
    AMafPM_test,  # Assuming AMafPM is already in the correct shape (n_samples, 3)
    ref_temp_test.reshape(-1, 1)  # Make sure ref_temp is a column vector
], axis=1)

# Flatten the target grid for regression-based models
y_test = target_test.reshape(target_test.shape[0], -1)


# Split into train and test sets
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Print shapes for confirmation
print("Shapes of X_train, X_test, y_train, y_test:", X_train.shape, X_test.shape, y_train.shape, y_test.shape)


Shapes of X_train, X_test, y_train, y_test: (2035, 94212) (509, 94212) (2035, 4096) (509, 4096)


In [24]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Assuming your data is already split into train and test
model_lr = LinearRegression()
model_lr.fit(X_train, y_train)  # Ensure X_train is appropriately reshaped if needed
predictions_lr = model_lr.predict(X_test)
mse_lr = mean_squared_error(y_test, predictions_lr) 
rmse_lr = np.sqrt(mse_lr * (global_max - global_min))
print(f"Linear Regression MSE: {mse_lr} RMSE: {rmse_lr}")

Linear Regression MSE: 2.2331363380580534e+21 RMSE: 218274773028.35608


In [None]:
from sklearn.ensemble import RandomForestRegressor

model_rf = RandomForestRegressor(n_estimators=100,max_depth=5,n_jobs=-1)

In [None]:
model_rf.fit(X_train, y_train)
predictions_rf = model_rf.predict(X_test)
mse_rf = mean_squared_error(y_test, predictions_rf)
rmse_rf = np.sqrt(mse_rf * (global_max - global_min))

print(f"Random Forest MSE: {mse_rf} RMSE: {rmse_rf}")

In [29]:
print(f"Random Forest MSE: {mse_rf} RMSE: {rmse_rf}")

Random Forest MSE: 0.0024511910524040196 RMSE: 0.22868332985460324


In [32]:
from sklearn.neighbors import KNeighborsRegressor

model_knn = KNeighborsRegressor(n_neighbors=5)
model_knn.fit(X_train, y_train)

In [33]:
predictions_knn = model_knn.predict(X_test)
mse_knn = mean_squared_error(y_test, predictions_knn)
rmse_knn = np.sqrt(mse_knn * (global_max - global_min))
print(f"k-NN MSE: {mse_knn} RMSE: {rmse_knn}")

k-NN MSE: 0.06766103208065033 RMSE: 1.2014763355255127


In [None]:
from sklearn.tree import DecisionTreeRegressor

model_dt = DecisionTreeRegressor()
model_dt.fit(X_train, y_train)
predictions_dt = model_dt.predict(X_test)
mse_dt = mean_squared_error(y_test, predictions_dt)
rmse_dt = np.sqrt(mse_dt * (global_max - global_min))
print(f"Decision Tree MSE: {mse_dt} RMSE: {rmse_dt}")


In [36]:
rmse_dt

0.31159520050745637

In [37]:
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression

pca = PCA(n_components=10)  # Adjust the number of components based on your analysis
model_pcr = make_pipeline(pca, LinearRegression())

In [38]:
model_pcr.fit(X_train, y_train)


In [39]:
predictions_pcr = model_pcr.predict(X_test)
mse_pcr = mean_squared_error(y_test, predictions_pcr)
rmse_pcr = np.sqrt(mse_pcr * (global_max - global_min))

print(f"PCR MSE: {mse_pcr} RMSE: {rmse_pcr}")

PCR MSE: 0.057319741465772386 RMSE: 1.1058546220547494


In [41]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

def build_mlp_model(input_shape, output_units):
    model = Sequential([
        Dense(128, activation='relu', input_shape=(input_shape,)),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(output_units, activation='linear')  # linear activation for regression
    ])
    return model

# Get the number of features from the input shape and the number of targets from the output shape
input_shape = X_train.shape[1]
output_units = y_train.shape[1]  # Assuming y_train is already reshaped to (n_samples, n_outputs)

# Build the model
model = build_mlp_model(input_shape, output_units)

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Print model summary
model.summary()

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

# Evaluate the model on the test set
test_loss = model.evaluate(X_test, y_test)
print(f'Test MSE: {test_loss}')

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 128)               12059264  
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense_4 (Dense)             (None, 64)                8256      
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_5 (Dense)             (None, 4096)              266240    
                                                                 
Total params: 12,333,760
Trainable params: 12,333,760
Non-trainable params: 0
_________________________________________________________________
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50


In [42]:
np.sqrt(test_loss * (global_max - global_min))

1.1140797188210037