In [1]:
import torch
import torch.nn as nn

# Define the LSTM model class
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # LSTM layer
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        
        # Fully connected layer
        self.fc = nn.Linear(hidden_size, output_size)
        
        # Sigmoid activation for output
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        # Initialize hidden and cell state
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))
        
        # Take the output of the last time step
        out = out[:, -1, :]
        
        # Pass through the fully connected layer and sigmoid activation
        out = self.fc(out)
        out = self.sigmoid(out)
        
        return out


In [6]:
# Load dataset from CSV file
file_name = 'reshaped_counties_by_year.csv'
data_df = pd.read_csv(file_name)

# Print the column names to check
print(data_df.columns)

# Once you've found the correct column, replace 'normalized_values' with the correct column name
# Example:
# data = data_df['your_column_name'].values


Index(['GeoName', 'Year', 'All industry total',
       'Personal income (thousands of dollars)', 'Population (persons) 3/',
       'Total employment', 'Wages and salaries'],
      dtype='object')


In [7]:
import torch
from torch.utils.data import DataLoader, Dataset
import pandas as pd

# Load dataset from CSV file
file_name = 'reshaped_counties_by_year.csv'
data_df = pd.read_csv(file_name)

# Select relevant columns for training
relevant_columns = [
    'All industry total',
    'Personal income (thousands of dollars)',
    'Population (persons) 3/',
    'Total employment',
    'Wages and salaries'
]

# Create a new DataFrame that only keeps the relevant columns along with Year for partitioning
data_df = data_df[['Year'] + relevant_columns]

# Split the dataset into training and testing based on the Year
train_data = data_df[data_df['Year'] < 2019][relevant_columns].values  # Data up to 2018
test_data = data_df[data_df['Year'] >= 2019][relevant_columns].values  # Data from 2019 onward

# Custom dataset class for time series data
class TimeSeriesDataset(Dataset):
    def __init__(self, data, input_sequence_length, target_sequence_length):
        self.data = data
        self.input_seq_len = input_sequence_length
        self.target_seq_len = target_sequence_length

    def __len__(self):
        return len(self.data) - self.input_seq_len - self.target_seq_len

    def __getitem__(self, index):
        # Get input sequence
        input_seq = self.data[index : index + self.input_seq_len]
        
        # Get target value (next value after the input sequence)
        target_seq = self.data[index + self.input_seq_len : index + self.input_seq_len + self.target_seq_len]
        
        return torch.tensor(input_seq, dtype=torch.float32), torch.tensor(target_seq, dtype=torch.float32)

# Hyperparameters
input_sequence_length = 12  # Predict based on the past 12 months
target_sequence_length = 1   # Predict one future value

# Create dataset and dataloader for training and testing
train_dataset = TimeSeriesDataset(train_data, input_sequence_length, target_sequence_length)
test_dataset = TimeSeriesDataset(test_data, input_sequence_length, target_sequence_length)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [25]:
# Hyperparameters
num_epochs = 1000  # Number of epochs to train
learning_rate = 0.001  # Learning rate

# Instantiate the model, define the loss function and optimizer
input_size = 5  # Number of input features
hidden_size = 64  # Size of hidden layer
num_layers = 2  # Number of LSTM layers
output_size = 5  # We are predicting one value

model = LSTMModel(input_size, hidden_size, num_layers, output_size)

# Loss function and optimizer
# Use MSELoss for multi-value regression
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


In [27]:
# Training loop
for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    for inputs, targets in train_loader:
        # Check shape before forward pass
        # print(f"Original input shape: {inputs.shape}")
        # print(f"Original target shape: {targets.shape}")  # Added for debugging
        
        # Zero the gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(inputs)  # Directly pass inputs
        
        # Ensure targets have the correct shape
        targets = targets.view(-1, 5)  # Ensure shape is (batch_size, 5)
        
        # Calculate loss
        loss = criterion(outputs, targets)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
    
    # Print the loss every 100 epochs
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [100/1000], Loss: 0.0003
Epoch [200/1000], Loss: 0.0000
Epoch [300/1000], Loss: 0.0033
Epoch [400/1000], Loss: 0.0001
Epoch [500/1000], Loss: 0.0001
Epoch [600/1000], Loss: 0.0002
Epoch [700/1000], Loss: 0.0001
Epoch [800/1000], Loss: 0.0001
Epoch [900/1000], Loss: 0.0000
Epoch [1000/1000], Loss: 0.0000


In [29]:
# Validation loop (with your test_loader as val_loader)
model.eval()  # Set model to evaluation mode
val_loss = 0.0

with torch.no_grad():  # No gradient tracking during validation
    for val_inputs, val_targets in test_loader:  # test_loader used as validation loader
        val_outputs = model(val_inputs)
        val_targets = val_targets.view(-1, 5)  # Adjust to match model's output
        loss = criterion(val_outputs, val_targets)
        val_loss += loss.item()

# Calculate average validation loss
val_loss /= len(test_loader)  # test_loader used as val_loader
print(f'Validation Loss: {val_loss:.4f}')


Validation Loss: 0.0000


In [38]:
import pickle

# Assuming 'model' is your trained model
with open('trained_model.pkl', 'wb') as file:  # Change the filename if needed
    pickle.dump(model, file)


In [39]:
# Load your trained model
with open('trained_model.pkl', 'rb') as file:  # Use the name you saved the model with
    model = pickle.load(file)


In [43]:
import numpy as np
import pickle  # Assuming you're using pickle to save/load your model

# Load your trained model (update the filename as needed)
with open('trained_model.pkl', 'rb') as file:
    model = pickle.load(file)

def normalize_prediction(prediction):
    # Assuming the model outputs a single prediction
    min_val = 0  # Assuming the minimum score is 0
    max_val = 1  # Assuming the maximum score is 1
    normalized = (prediction - min_val) / (max_val - min_val)
    return normalized

def get_county_score(county_features):
    input_data = np.array(county_features).reshape(1, -1)  # Reshape for model input
    prediction = model.predict(input_data)  # Get prediction for the county
    normalized_score = normalize_prediction(prediction)  # Normalize the prediction
    return normalized_score[0]  # Return the single score

# Function to gather user input for county features
def get_user_input():
    features = []
    num_features = 3  # Change this to the number of features your model requires
    print(f"Please enter {num_features} feature values for the county (for the year 2022):")
    for i in range(num_features):
        value = float(input(f"Feature {i + 1}: "))  # Assuming the features are numerical
        features.append(value)
    return features

# Example usage
county_name = input("Enter the county name: ")  # Get county name from user
county_features = get_user_input()  # Get features from user
score = get_county_score(county_features)

print(f"The predicted score for {county_name} in 2022 is: {score:.2f}")


Enter the county name:  Adam


Please enter 3 feature values for the county (for the year 2022):


Feature 1:  1500
Feature 2:  500
Feature 3:  1200


AttributeError: 'LSTMModel' object has no attribute 'predict'