In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Load data
data = pd.read_csv('yield.csv')

# # Define the mappings
# state_mapping = {
#     'Jammu and Kashmir':0, 'Jharkhand':1, 'Bihar':2, 'Uttarakhand':3, 'Chhattisgarh':4, 'Punjab':5, 'Arunachal Pradesh':6, 'Odisha':7, 
#     'Kerala':8, 'Himachal Pradesh':9, 'Tamil Nadu':10, 'Andaman and Nicobar Islands':11
# }

In [10]:
# crop_mapping = {
#     'Black pepper': 0, 'Mesta': 1, 'Sannhamp': 2, 'Cauliflower': 3, 'Tomato': 4, 'Citrus Fruit': 5, 'Lentil': 6, 'Dry ginger': 7, 
#     'Drum Stick': 8, 'Bottle Gourd': 9, 'Beet Root': 10, 'Cardamom': 11, 'Onion': 12, 'Yam': 13, 'Samai': 14, 'other oilseeds': 15, 
#     'Bhindi': 16, 'Groundnut': 17, 'Turnip': 18, 'Cowpea(Lobia)': 19, 'Sesamum': 20, 'Pear': 21, 'Ginger': 22, 'Urad': 23, 
#     'Cashewnut Raw': 24, 'Papaya': 25, 'Brinjal': 26, 'Rubber': 27, 'Khesari': 28, 'Soyabean': 29, 'Maize': 30, 'Cabbage': 31, 
#     'Castor seed': 32, 'Tea': 33, 'Other Rabi pulses': 34, 'Dry chillies': 35, 'Other Cereals & Millets': 36, 'Linseed': 37, 
#     'Jack Fruit': 38, 'Rice': 39, 'Beans & Mutter(Vegetable)': 40, 'Moong(Green Gram)': 41, 'Coconut ': 42, 'Plums': 43, 'Tapioca': 44, 
#     'Cotton(lint)': 45, 'Korra': 46, 'Potato': 47, 'Pulses total': 48, 'Coriander': 49, 'Wheat': 50, 'Ber': 51, 'Sugarcane': 52, 
#     'Pome Granet': 53, 'Coffee': 54, 'Pome Fruit': 55, 'Niger seed': 56, 'Bajra': 57, 'Varagu': 58, 'Moth': 59, 'Redish': 60, 'Jute': 61, 
#     'Water Melon': 62, 'other misc. pulses': 63, 'Arecanut': 64, 'Safflower': 65, 'Jowar': 66, 'Sweet potato': 67, 'Carrot': 68, 
#     'Grapes': 69, 'Guar seed': 70, 'Other Kharif pulses': 71, 'Lab-Lab': 72, 'Cond-spcs other': 73, 'Oilseeds total': 74, 'Pineapple': 75, 
#     'Apple': 76, 'Total foodgrain': 77, 'Arhar/Tur': 78, 'Horse-gram': 79, 'Bitter Gourd': 80, 'Peach': 81, 'Cucumber': 82, 'Ragi': 83, 
#     'Barley': 84, 'Paddy': 85, 'Tobacco': 86, 'Ribed Guard': 87, 'Orange': 88, 'Turmeric': 89, 'Garlic': 90, 'Small millets': 91, 
#     'Snak Guard': 92, 'Litchi': 93, 'Mango': 94, 'Other Vegetables': 95, 'Peas & beans (Pulses)': 96, 'Masoor': 97, 'Blackgram': 98, 
#     'Banana': 99, 'Other Fresh Fruits': 100, 'Gram': 101, 'Cashewnut': 102, 'Sunflower': 103, 'Other Citrus Fruit': 104, 
#     'Rapeseed &Mustard': 105, 'Pump Kin': 106, 'Ash Gourd': 107
# }

# # Map the categorical columns to their respective labels
# data['State'] = data['State'].map(state_mapping)
# data['Crop'] = data['Crop'].map(crop_mapping)

# Split the data into features and target
X = data[['State', 'Year', 'Crop', 'Area', 'Rain']]
y = data['Production']

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32)

# Create DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [11]:
class YieldPredictor(nn.Module):
    def __init__(self):
        super(YieldPredictor, self).__init__()
        self.fc1 = nn.Linear(5, 64)
        self.fc2 = nn.Linear(64, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 1)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)
        return x

model = YieldPredictor()


In [12]:
# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 50

for epoch in range(num_epochs):
    model.train()
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs.squeeze(), y_batch)
        loss.backward()
        optimizer.step()
    
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            outputs = model(X_batch)
            loss = criterion(outputs.squeeze(), y_batch)
            val_loss += loss.item()
    
    val_loss /= len(val_loader)
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}, Val Loss: {val_loss:.4f}')


Epoch 1/50, Loss: 191374622720.0000, Val Loss: 534989707654600.0625
Epoch 2/50, Loss: 5240495013888.0000, Val Loss: 527097772428658.0000
Epoch 3/50, Loss: 12929270284288.0000, Val Loss: 525137119754773.5000
Epoch 4/50, Loss: 17196319244288.0000, Val Loss: 524189577766998.0625
Epoch 5/50, Loss: 21153124974592.0000, Val Loss: 523838786385627.4375


KeyboardInterrupt: 

In [6]:
# Check for missing values
print(data.isnull().sum())

State         76103
Year              0
Crop          76103
Area              0
Rain              0
Production        0
dtype: int64


In [7]:
data.shape

(76103, 6)

In [8]:
data.head()

Unnamed: 0,State,Year,Crop,Area,Rain,Production
0,,2000,,1254.0,2763.2,2000.0
1,,2000,,2.0,2763.2,1.0
2,,2000,,102.0,2763.2,321.0
3,,2000,,176.0,2763.2,641.0
4,,2000,,720.0,2763.2,165.0
