In [29]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.preprocessing import StandardScaler

In [77]:
import pandas as pd
from functools import reduce

START_YEAR = 1963
END_YEAR = 2023

dataframes = [
    pd.read_csv('FORINV.csv'),
    pd.read_csv('GINI.csv'),
    pd.read_csv('HCI.csv'),
    pd.read_csv('INFLATION.csv'),
    pd.read_csv('INTRATE.csv'),
    pd.read_csv('ITAX.csv'),
    #pd.read_csv('POVERTY.csv'),
    pd.read_csv('RCONSPEND.csv'),
    pd.read_csv('RGDP.csv'),
    pd.read_csv('RMEDEARN.csv'),
    pd.read_csv('RMEDHOUINC.csv'),
    pd.read_csv('UNRATE.csv')
]

merged_data = reduce(lambda left, right: pd.merge(left, right, on='DATE', how='left'), dataframes)
new_columns = ['DATE', 'FOREIGN INVESTMENT', 'GINI', 'HUMAN CAPITAL INDEX', 'INFLATION', 'INTEREST RATE', 'HIGHEST INCOME TAX', 'RCONSPEND', 'REAL GDP', 'RMEDEARN', 'RMEDHOUINC', 'UNEMPLOYMENT RATE']
merged_data.columns = new_columns

merged_data = merged_data[merged_data['GINI'].notna()]
merged_data['DATE'] = pd.to_datetime(merged_data['DATE'])
merged_data['YEAR'] = merged_data['DATE'].dt.year
merged_data = merged_data.drop(columns = ['DATE'])
merged_data = merged_data.drop_duplicates(subset='YEAR', keep='first')

# Interpolate missing values
merged_data = merged_data.interpolate(method='bfill', limit_direction='backward')
merged_data = merged_data.interpolate(method='ffill', limit_direction='forward')

merged_data.to_csv('dataset/merged_data.csv', index=False)

# The last line will automatically print the DataFrame in Jupyter
merged_data

Unnamed: 0,FOREIGN INVESTMENT,GINI,HUMAN CAPITAL INDEX,INFLATION,INTEREST RATE,HIGHEST INCOME TAX,RCONSPEND,REAL GDP,RMEDEARN,RMEDHOUINC,UNEMPLOYMENT RATE,YEAR
65,276.0,37.6,2.825467,1.239669,3.0,91.0,11181.0,3893.482,335.0,56780.0,5.7,1963
69,340.0,38.1,2.861085,1.278912,3.5,77.0,11181.0,4135.553,335.0,56780.0,5.6,1964
73,720.0,37.5,2.897152,1.585169,4.0,70.0,11181.0,4362.111,335.0,56780.0,4.9,1965
77,488.0,37.8,2.928371,3.015075,4.5,70.0,11181.0,4731.888,335.0,56780.0,4.0,1966
81,836.0,36.9,2.959925,2.772786,4.5,70.0,11181.0,4870.299,335.0,56780.0,3.9,1967
85,1436.0,36.3,2.99182,4.271796,4.5,75.25,11181.0,5057.553,335.0,56780.0,3.7,1968
89,1300.0,36.0,3.024058,5.462386,5.5,77.0,11181.0,5283.597,335.0,56780.0,3.4,1969
93,2480.0,36.6,3.056644,5.838255,6.0,71.75,11181.0,5300.652,335.0,56780.0,3.9,1970
97,900.0,36.9,3.089885,4.292767,5.23,70.0,11181.0,5443.619,335.0,56780.0,5.9,1971
101,-836.0,36.7,3.123488,3.272278,4.5,70.0,11181.0,5632.649,335.0,56780.0,5.8,1972


In [83]:
# Load your merged dataset
dataset_path = 'dataset/merged_data.csv'  # Adjust the file path
data = pd.read_csv(dataset_path)

# Drop rows with missing values
#data = data.dropna()

# Extract features and target
features = data.drop(['YEAR', 'GINI'], axis=1)  # Adjust the target column name
target = data['GINI']  # Adjust the target column name

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)

# Create a PyTorch DataLoader
train_dataset = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = torch.utils.data.TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# Define your neural network
class RegressionNN(nn.Module):
    def __init__(self, input_size):
        super(RegressionNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 50)
        self.fc2 = nn.Linear(50, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Hyperparameters
input_size = len(new_columns) - 2  # Adjust based on your feature columns
output_size = 1
learning_rate = 0.001
batch_size = 64
num_epochs = 2500

# Model, loss function, and optimizer
model = RegressionNN(input_size=input_size).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for batch_idx, (data, targets) in enumerate(train_loader):
        data = data.to(device=device)
        targets = targets.to(device=device)

        scores = model(data)
        loss = criterion(scores, targets.view(-1, 1))  # Reshape targets for MSELoss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    average_loss = total_loss / len(train_loader)
    if((epoch+1)%10==0):
      print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {average_loss:.4f}')

# Evaluate on test data
model.eval()
total_loss = 0

with torch.no_grad():
    for data, targets in test_loader:
        data = data.to(device=device)
        targets = targets.to(device=device)

        scores = model(data)
        loss = criterion(scores, targets.view(-1, 1))

        total_loss += loss.item()

average_loss = total_loss / len(test_loader)
print(f'Test Loss: {average_loss:.4f}')

Epoch [10/2500], Loss: 1468.1797
Epoch [20/2500], Loss: 1453.1963
Epoch [30/2500], Loss: 1437.2800
Epoch [40/2500], Loss: 1420.1334
Epoch [50/2500], Loss: 1401.2841
Epoch [60/2500], Loss: 1380.3818
Epoch [70/2500], Loss: 1357.1223
Epoch [80/2500], Loss: 1331.2559
Epoch [90/2500], Loss: 1302.5087
Epoch [100/2500], Loss: 1270.7515
Epoch [110/2500], Loss: 1235.7839
Epoch [120/2500], Loss: 1197.5741
Epoch [130/2500], Loss: 1156.1840
Epoch [140/2500], Loss: 1111.9196
Epoch [150/2500], Loss: 1065.0460
Epoch [160/2500], Loss: 1015.9326
Epoch [170/2500], Loss: 964.9894
Epoch [180/2500], Loss: 912.6486
Epoch [190/2500], Loss: 859.4214
Epoch [200/2500], Loss: 805.9230
Epoch [210/2500], Loss: 752.7167
Epoch [220/2500], Loss: 700.2767
Epoch [230/2500], Loss: 649.0504
Epoch [240/2500], Loss: 599.3295
Epoch [250/2500], Loss: 551.4318
Epoch [260/2500], Loss: 505.7492
Epoch [270/2500], Loss: 462.5533
Epoch [280/2500], Loss: 421.9806
Epoch [290/2500], Loss: 384.0988
Epoch [300/2500], Loss: 348.9368
Epo

'/content'