In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from imblearn.over_sampling import SMOTE
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import joblib

# อ่านข้อมูลจาก CSV
df = pd.read_csv('placementdata_miss.csv')  # เปลี่ยน path ตามไฟล์ของคุณ

# จัดการข้อมูลที่หายไป
numerical_cols = ['CGPA', 'Internships', 'Projects', 'Workshops/Certifications',
                  'AptitudeTestScore', 'SoftSkillsRating', 'SSC_Marks', 'HSC_Marks']
for col in numerical_cols:
    df[col] = df[col].fillna(df[col].mean())

df['ExtracurricularActivities'] = df['ExtracurricularActivities'].fillna('No')
df['PlacementTraining'] = df['PlacementTraining'].fillna('No')
df = df.dropna(subset=['PlacementStatus'])

# แปลง categorical เป็น numerical ด้วย LabelEncoder
# ใช้ LabelEncoder สำหรับ ExtracurricularActivities และ PlacementTraining
label_encoder_features = LabelEncoder()
label_encoder_features.fit(['No', 'Yes'])  # บังคับให้รู้จัก 'No' และ 'Yes'
df['ExtracurricularActivities'] = label_encoder_features.transform(df['ExtracurricularActivities'])
df['PlacementTraining'] = label_encoder_features.transform(df['PlacementTraining'])

# ใช้ LabelEncoder แยกสำหรับ PlacementStatus
label_encoder_target = LabelEncoder()
df['PlacementStatus'] = label_encoder_target.fit_transform(df['PlacementStatus'])  # Placed=1, NotPlaced=0

# กำหนด Input และ Output
X = df[numerical_cols + ['ExtracurricularActivities', 'PlacementTraining']].values
y = df['PlacementStatus'].values

# Feature Scaling
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Oversampling ด้วย SMOTE
smote = SMOTE(random_state=42)
X, y = smote.fit_resample(X, y)

# แบ่งข้อมูล
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# แปลงข้อมูลเป็น Tensor
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

# สร้าง DataLoader
train_data = TensorDataset(X_train_tensor, y_train_tensor)
test_data = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32)

# สร้างโมเดล
class PlacementNN(nn.Module):
    def __init__(self):
        super(PlacementNN, self).__init__()
        self.fc1 = nn.Linear(10, 128)
        self.dropout1 = nn.Dropout(0.3)
        self.fc2 = nn.Linear(128, 64)
        self.dropout2 = nn.Dropout(0.3)
        self.fc3 = nn.Linear(64, 32)
        self.dropout3 = nn.Dropout(0.3)
        self.fc4 = nn.Linear(32, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout1(x)
        x = self.relu(self.fc2(x))
        x = self.dropout2(x)
        x = self.relu(self.fc3(x))
        x = self.dropout3(x)
        x = self.fc4(x)
        return x

model = PlacementNN()

# คำนวณ class weights
num_not_placed = len(df[df['PlacementStatus'] == 0])
num_placed = len(df[df['PlacementStatus'] == 1])
weight_for_not_placed = 1.0
weight_for_placed = (num_not_placed / num_placed) * 1.5

# Loss function และ Optimizer
pos_weight = torch.tensor([weight_for_placed], dtype=torch.float32)
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# ฝึกโมเดล
num_epochs = 50
best_loss = float('inf')
patience = 5
patience_counter = 0

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            val_loss += criterion(outputs, labels).item()
    val_loss /= len(test_loader)

    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {running_loss/len(train_loader):.4f}, Val Loss: {val_loss:.4f}')

    if val_loss < best_loss:
        best_loss = val_loss
        patience_counter = 0
        torch.save(model.state_dict(), 'placement_model.pkl')
        print(f"New best model saved with Val Loss: {val_loss:.4f}")
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print("Early stopping triggered!")
            break

# บันทึก Scaler และ LabelEncoder (สำหรับ features เท่านั้น)
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(label_encoder_features, 'label_encoder.pkl')  # บันทึกตัวที่รู้จัก 'No' และ 'Yes'

# ตรวจสอบคลาสของ LabelEncoder
print("LabelEncoder classes:", label_encoder_features.classes_)

print("Model, Scaler, and LabelEncoder have been saved as 'placement_model.pkl', 'scaler.pkl', and 'label_encoder.pkl'")

Epoch 1/50, Train Loss: 0.6878, Val Loss: 0.6300
New best model saved with Val Loss: 0.6300
Epoch 2/50, Train Loss: 0.6412, Val Loss: 0.6349
Epoch 3/50, Train Loss: 0.6394, Val Loss: 0.6239
New best model saved with Val Loss: 0.6239
Epoch 4/50, Train Loss: 0.6389, Val Loss: 0.6195
New best model saved with Val Loss: 0.6195
Epoch 5/50, Train Loss: 0.6360, Val Loss: 0.6230
Epoch 6/50, Train Loss: 0.6313, Val Loss: 0.6205
Epoch 7/50, Train Loss: 0.6303, Val Loss: 0.6235
Epoch 8/50, Train Loss: 0.6283, Val Loss: 0.6186
New best model saved with Val Loss: 0.6186
Epoch 9/50, Train Loss: 0.6270, Val Loss: 0.6236
Epoch 10/50, Train Loss: 0.6276, Val Loss: 0.6208
Epoch 11/50, Train Loss: 0.6303, Val Loss: 0.6246
Epoch 12/50, Train Loss: 0.6238, Val Loss: 0.6213
Epoch 13/50, Train Loss: 0.6254, Val Loss: 0.6238
Early stopping triggered!
LabelEncoder classes: ['No' 'Yes']
Model, Scaler, and LabelEncoder have been saved as 'placement_model.pkl', 'scaler.pkl', and 'label_encoder.pkl'
