<a href="https://colab.research.google.com/github/samyarsworld/Language-classification-RNN/blob/main/Language_classification_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Language Detection

### 1. Download the project repository:

Repository address: https://github.com/samyarsworld/Language-classification-RNN/tree/main

The directory includes the data directory, a python utils file containing helper functions, and Python version of the code.

In [None]:
!git clone https://github.com/samyarsworld/Language-classification-RNN
%cd Language-classification-RNN

Cloning into 'Language-classification-RNN'...
remote: Enumerating objects: 43, done.[K
remote: Counting objects:   2% (1/43)[Kremote: Counting objects:   4% (2/43)[Kremote: Counting objects:   6% (3/43)[Kremote: Counting objects:   9% (4/43)[Kremote: Counting objects:  11% (5/43)[Kremote: Counting objects:  13% (6/43)[Kremote: Counting objects:  16% (7/43)[Kremote: Counting objects:  18% (8/43)[Kremote: Counting objects:  20% (9/43)[Kremote: Counting objects:  23% (10/43)[Kremote: Counting objects:  25% (11/43)[Kremote: Counting objects:  27% (12/43)[Kremote: Counting objects:  30% (13/43)[Kremote: Counting objects:  32% (14/43)[Kremote: Counting objects:  34% (15/43)[Kremote: Counting objects:  37% (16/43)[Kremote: Counting objects:  39% (17/43)[Kremote: Counting objects:  41% (18/43)[Kremote: Counting objects:  44% (19/43)[Kremote: Counting objects:  46% (20/43)[Kremote: Counting objects:  48% (21/43)[Kremote: Counting objects:  51% (22/43)

### 2. Install the necessary libraries:
Make sure Python (recommended 3.10) and pip (recommended 23.3.1) are installed.

In [None]:
!pip install -r requirements.txt

### 3. Import Required Libraries and Functions:

In [None]:
import torch
from torch import nn
import matplotlib.pyplot as plt

from utils import load_data, letter_to_tensor, sentence_to_tensor, LETTERS, get_random_sample
from pathlib import Path

### 4. Setup Initial Configurations:

In [None]:
# Load the data
languages, language_names = load_data()

MODEL_PATH = Path("./model.pt")
N_HIDDEN = 128  # Number of hidden units
EPOCHS = 300000
STEPS = 1000

### 5. Create RNN Model:

In [None]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size) -> None:
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.i2h = nn.Linear(in_features=input_size + hidden_size, out_features=hidden_size)
        self.i2o = nn.Linear(in_features=input_size + hidden_size, out_features=output_size)
        self.softmax = nn.LogSoftmax(dim=1)


    def forward(self, x: torch.Tensor, h: torch.Tensor) -> torch.Tensor:
        combined_tensor = torch.cat((x, h), dim=1)
        return self.softmax(self.i2o(combined_tensor)), self.i2h(combined_tensor)

In [None]:
# Initialize Model
RNN_model = RNN(len(LETTERS), N_HIDDEN, len(languages))

### 6. Trainin Setup:

In [None]:
# Optimizer
LR = 0.005
optimizer = torch.optim.SGD(params=RNN_model.parameters(), lr=LR)

# Loss Function
criterion = nn.NLLLoss()
current_loss = 0.0
all_losses = []

In [None]:
# Training setup
def train(name_tensor, language_tensor):
    next_hidden = RNN_model.init_hidden()

    for i in range(name_tensor.size()[0]):
        output, next_hidden = RNN_model(name_tensor[i], next_hidden)

    loss = criterion(output, language_tensor)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    return output, loss.item()

### 7. Train the Model:

In [None]:
# Load model if exists
if MODEL_PATH.exists():
  RNN_model.load_state_dict(torch.load(MODEL_PATH))
  assert("Model was SUCCESSFULLY loaded.")
else:
  assert("Model does NOT exist.")


In [None]:
# Run traning if the model doesn't exist or needs more training
for epoch in range(EPOCHS):
        language, language_tensor, name,  name_tensor = get_random_sample(languages, language_names)
        output, loss = train(name_tensor, language_tensor)
        current_loss += loss

        if epoch % STEPS == 0:
            all_losses.append(current_loss / STEPS)
            current_loss = 0

            prediction = languages[torch.argmax(output).item()]
            correct = "CORRECT" if prediction == language else f"WRONG -> {language}"
            print(f"{epoch} {epoch / EPOCHS * 100:.0f} {loss:.5f} {name} / {prediction} {correct}")

    # Save the model to local disk
    torch.save(RNN_model.state_dict(), MODEL_PATH)

### 8. Plot Training Losses:

In [None]:
# Plot losses per 1000 words
plt.plot(all_losses[1:-1], label='Loss', marker='o', linestyle='-')

# Adding titles and labels
plt.title('Plot Title')
plt.xlabel('X-axis Label')
plt.ylabel('Y-axis Label')

# Adding a legend
plt.legend()

# Display the plot
plt.show()

### 9. Perform Predictions:

In [None]:
def predict(sentence):
    print(f"\n> {sentence}")
    with torch.inference_mode():
        name_tensor = sentence_to_tensor(sentence)

        next_hidden = RNN_model.init_hidden()

        for i in range(name_tensor.size()[0]):
            output, next_hidden = RNN_model(name_tensor[i], next_hidden)

        prediction = languages[torch.argmax(output).item()]
        print(prediction)

# Prediction state
while True:
    sentence = input("Input:")
    if sentence == "q":
        break
    else:
        predict(sentence)