In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import joblib

# อ่านข้อมูลจาก CSV และจัดการข้อมูลที่หายไป
df = pd.read_csv('/content/YoutubeCommentsDataSet.csv')
df = df.dropna(subset=['Comment'])
df['Comment'] = df['Comment'].fillna('').astype(str)

# แปลง Labels เป็นตัวเลข
label_encoder = LabelEncoder()
label_encoder.fit(['negative', 'neutral', 'positive'])
labels = label_encoder.transform(df['Sentiment'].values)

# แปลงข้อความเป็นเวกเตอร์ TF-IDF
tfidf = TfidfVectorizer(max_features=5000)
X = tfidf.fit_transform(df['Comment'].values).toarray()

# แบ่งข้อมูลเป็น train และ test
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)

# แปลงข้อมูลเป็น Tensor
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# สร้าง DataLoader
train_data = TensorDataset(X_train_tensor, y_train_tensor)
test_data = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32)

# สร้าง Neural Network Model
class SentimentNN(nn.Module):
    def __init__(self):
        super(SentimentNN, self).__init__()
        self.fc1 = nn.Linear(5000, 128)
        self.fc2 = nn.Linear(128, 3)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return self.softmax(x)

# กำหนด Loss Function และ Optimizer
model = SentimentNN()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# ฝึกโมเดล
num_epochs = 60
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}')

# ประเมินโมเดล
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {100 * correct / total}%')

# บันทึกโมเดลและตัวแปลงข้อความ
torch.save(model.state_dict(), 'sentiment_model.pth')
joblib.dump(tfidf, 'tfidf_vectorizer.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')

print("Model and Vectorizer saved successfully!")


Epoch 1/60, Loss: 0.8850762781889542
Epoch 2/60, Loss: 0.7918576345495556
Epoch 3/60, Loss: 0.7623044109862783
Epoch 4/60, Loss: 0.7227088105419408
Epoch 5/60, Loss: 0.6791782508725706
Epoch 6/60, Loss: 0.6543883915828622
Epoch 7/60, Loss: 0.6372234897769016
Epoch 8/60, Loss: 0.6260948017887448
Epoch 9/60, Loss: 0.6177899328262909
Epoch 10/60, Loss: 0.6120253850584445
Epoch 11/60, Loss: 0.6080542954413787
Epoch 12/60, Loss: 0.6049253144989843
Epoch 13/60, Loss: 0.602858904392823
Epoch 14/60, Loss: 0.6014519328656404
Epoch 15/60, Loss: 0.6001349305329116
Epoch 16/60, Loss: 0.5990585832492165
Epoch 17/60, Loss: 0.5985308079615883
Epoch 18/60, Loss: 0.598149137393288
Epoch 19/60, Loss: 0.5975649536951728
Epoch 20/60, Loss: 0.5982252781805785
Epoch 21/60, Loss: 0.5977700791929079
Epoch 22/60, Loss: 0.5969278036252312
Epoch 23/60, Loss: 0.5968717518060104
Epoch 24/60, Loss: 0.5963890481254329
Epoch 25/60, Loss: 0.5958492766255917
Epoch 26/60, Loss: 0.5954787157152011
Epoch 27/60, Loss: 0.59