# **CLI: Conversational Language Interface**
### **Converting English into Linux commands**

**Author: Partha Seetala**

**Video Tutorial: https://www.youtube.com/watch?v=eTknarEWVm8**

# **Import relevant Python modules**

In [1]:
import random
import numpy as np
import string
import os
import pickle
import json
import tensorflow as tf
from tensorflow.keras.layers import (Input, Embedding, LSTM, Attention, Concatenate, Dense, Masking)
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# **Helper function to generate training data**

In [2]:
def generate_english_to_cli_sentences():

    def rand_str(minlen=3, maxlen=6):
        val = ''.join(random.choice(string.ascii_uppercase) for _ in range(random.randint(minlen, maxlen)))
        return val

    pleasantries = ["", "please", "kindly", "hey", "could you"]
    words = [rand_str() for _ in range(5)]
    filenames = [rand_str() for _ in range(5)]
    dirnames = [rand_str() for _ in range(5)]
    oldnames = [rand_str() for _ in range(5)]
    newnames = [rand_str() for _ in range(5)]
    srcs = [rand_str() for _ in range(5)]
    dests = [rand_str() for _ in range(5)]

    def create_cli():
        english = []
        cli = []
        for pleasantry in pleasantries:
            for action in ["create", "make", "build"]:
                for pronoun in ["", "a", "the", "me a"]:
                    for name in ["dir", "folder", "directory"]:
                        for extra in ["", "called", "calling it", "named", "naming it", "label it", "tag it"]:
                            for dirname in dirnames:
                                eng = f"{pleasantry} {action} {pronoun} {name} {extra} {dirname}"
                                cmd = f"mkdir {dirname}"
                                english.append(eng.strip())
                                cli.append(cmd.strip())
        return english, cli

    def delete_cli():
        english = []
        cli = []
        for pleasantry in pleasantries:
            for action in ["delete", "del", "purge", "nuke", "trash", "remove"]:
                for pronoun in ["", "a", "the"]:
                    for obj in ["file", "dir", "folder", "directory"]:
                        for extra in ["", "named", "by the name", "with name"]:
                            if obj == "file":
                                args = ""
                                objnames = filenames
                            else:
                                args = "-r"
                                objnames = dirnames
                            for objname in objnames:
                                eng = f"{pleasantry} {action} {pronoun} {obj} {extra} {objname}"
                                cmd = f"rm {args} {objname}"
                                english.append(eng.strip())
                                cli.append(cmd.strip())
        return english, cli

    def rename_cli():
        english = []
        cli = []
        for pleasantry in pleasantries:
            for action in ["rename", "move", "change", "replace"]:
                for name in ["", "file", "the file", "dir", "the dir" "folder", "the folder"]:
                    for extra in ["to", "as", "and call it", "to new name", "with", "and name it"]:
                        for src in oldnames:
                            for dst in newnames:
                                eng = f"{pleasantry} {action} {name} {src} {extra} {dst}"
                                cmd = f"mv {src} {dst}"
                                english.append(eng.strip())
                                cli.append(cmd.strip())
        return english, cli

    def copy_cli():
        english = []
        cli = []
        for pleasantry in pleasantries:
            for action in ["copy", "clone", "duplicate", "cp"]:
                for name in ["", "file", "the file", "dir", "the dir" "folder", "the folder", "directory", "the directory"]:
                    for extra in ["to", "as"]:
                        for src in srcs:
                            for dst in dests:
                                eng = f"{pleasantry} {action} {name} {src} {extra} {dst}"
                                cmd = f"cp {src} {dst}"
                                english.append(eng.strip())
                                cli.append(cmd.strip())
        return english, cli


    def list_cli():
        english = []
        cli = []
        for pleasantry in pleasantries:
            for action in ["list", "show", "display", "ls", "present"]:
                for name in ["", "dir", "directory", "files under", "everything under", "files inside", "contents of"]:
                    for extra in ["", "named", "by the name", "with name"]:
                            for dirname in dirnames:
                                eng = f"{pleasantry} {action} {name} {extra} {dirname}"
                                cmd = f"ls -l {dirname}"
                                english.append(eng.strip())
                                cli.append(cmd.strip())
        return english, cli

    def search_cli():
        english = []
        cli = []
        for pleasantry in pleasantries:
            for action in ["find", "search", "look for", "search for", "grep", "check for"]:
                for extra1 in ["", "", "word", "the word", "pattern", "existence of"]:
                    for needle in words:
                        for extra2 in ["in", "inside", "in file"]:
                            for haystack in filenames:
                                eng = f"{pleasantry} {action} {extra1} {needle} {extra2} {haystack}"
                                cmd = f"grep -i {needle} {haystack}"
                                english.append(eng.strip())
                                cli.append(cmd.strip())
        return english, cli


    english = []
    cli = []

    e, c = create_cli()
    english.extend(e)
    cli.extend(c)

    e, c = delete_cli()
    english.extend(e)
    cli.extend(c)

    e, c = rename_cli()
    english.extend(e)
    cli.extend(c)

    e, c = copy_cli()
    english.extend(e)
    cli.extend(c)

    e, c = list_cli()
    english.extend(e)
    cli.extend(c)

    e, c = search_cli()
    english.extend(e)
    cli.extend(c)

    indices = list(range(len(english)))
    random.shuffle(indices)

    english = [english[i] for i in indices]
    machine = [cli[i] for i in indices]

    placeholders = set(words) | set(filenames) | set(dirnames) | set(oldnames) | set(newnames) | set(srcs) | set(dests)

    return english, machine, placeholders

# **Using Seq2Seq to build "Jarvis" our Conversational Language Interface**

In [3]:
class JarvisTranslator:
    def __init__(self, max_english_len=20, max_cli_len=10, embedding_dim=300, latent_dim=1024):
        self.max_english_len = max_english_len
        self.max_cli_len = max_cli_len
        self.latent_dim = latent_dim
        self.embedding_dim = embedding_dim
        self.cli_vocab_size = 0
        self.placeholders = None

        self.english_tokenizer = None
        self.cli_tokenizer = None

        self.encoder = None
        self.decoder = None
        self.decoder_logits = None
        self.training_model = None
        self.inference_model = None

    def prepare_training_data(self, english_sentences, cli_commands, placeholders):
        self.placeholders = placeholders

        # Tokenize the English/english sentences
        self.english_tokenizer = Tokenizer(filters="", lower=True)  # english Language (lower case everything)
        self.english_tokenizer.fit_on_texts(english_sentences)
        self.english_vocab_size = len(self.english_tokenizer.word_index) + 1

        # Tokenize the CLI command sentences
        self.cli_tokenizer   = Tokenizer(filters="", lower=False)   # CLI Language (preserve case)
        self.cli_tokenizer.fit_on_texts(["[START]", "[END]"] + cli_commands)
        self.cli_vocab_size = len(self.cli_tokenizer.word_index) + 1

        # Convert texts → integer sequences
        en_seqs = self.english_tokenizer.texts_to_sequences(english_sentences)

        cmd_in  = ["[START]" + " " + seq for seq in cli_commands]
        cmd_out = [seq + " " + "[END]"   for seq in cli_commands]
        cmd_seqs_in  = self.cli_tokenizer.texts_to_sequences(cmd_in)
        cmd_seqs_out = self.cli_tokenizer.texts_to_sequences(cmd_out)

        # Pad to fixed lengths
        encoder_inputs = pad_sequences(en_seqs, maxlen=self.max_english_len, padding="post")
        decoder_inputs = pad_sequences(cmd_seqs_in,  maxlen=self.max_cli_len, padding="post")
        decoder_targets= pad_sequences(cmd_seqs_out, maxlen=self.max_cli_len, padding="post")

        # Expand dims for sparse_categorical_crossentropy
        decoder_targets = np.expand_dims(decoder_targets, -1)

        return encoder_inputs, decoder_inputs, decoder_targets

    def build_training_model(self):
        assert self.training_model is None, "Training model is already built!"

        # ENCODER
        encoder_inputs = Input(shape=(self.max_english_len,))
        encoder_embedding = Embedding(self.english_vocab_size, self.embedding_dim, name="encoder_embedding")(encoder_inputs)
        encoder_outputs, encoder_h, encoder_c = LSTM(self.latent_dim, return_sequences=True, return_state=True, name="encoder_lstm")(encoder_embedding)

        self.encoder = Model(inputs=encoder_inputs, outputs=[encoder_outputs, encoder_h, encoder_c], name="encoder")

        # DECODER
        decoder_inputs = Input(shape=(self.max_cli_len,), name="decoder")
        decoder_embedding = Embedding(self.cli_vocab_size, self.embedding_dim, name="decoder_embedding")(decoder_inputs)
        decoder_outputs, _, _ = LSTM(self.latent_dim, return_sequences=True, return_state=True, name="decoder_lstm")(decoder_embedding, initial_state=[encoder_h, encoder_c])

        # ATTENTION LAYER (Luong Attention)
        attention_output = Attention(use_scale=True, name="luong_attention")([decoder_outputs, encoder_outputs])

        # Concatenate decoder outputs with attention context (Luong-style)
        decoder_concat = Concatenate(axis=-1, name="context_concat")([decoder_outputs, attention_output])

        # Final output layer (project to vocab size to pick a token from across all tokens in our vocab)
        decoder_logits = Dense(self.cli_vocab_size, activation="softmax", name="output_logits")(decoder_concat)

        # Create training model
        self.training_model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_logits)

        self.training_model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

        return self.training_model

    def build_inference_model(self):
        assert self.inference_model is None, "Inference model is already built!"

        # REUSE ENCODER
        encoder_inputs = Input(shape=(self.max_english_len,))
        encoder_embedding = self.training_model.get_layer("encoder_embedding")(encoder_inputs)  # REUSE Encoder's Embedding Layer
        encoder_outputs, encoder_h, encoder_c = self.training_model.get_layer("encoder_lstm")(encoder_embedding) # REUSE Encoder's LSTM Layer

        # REUSE *PARTS OF* DECODER
        decoder_input = Input(shape=(1,))             # Inferencing Decoder takes one-token at a time to predict next token
        decoder_h = Input(shape=(self.latent_dim,))   # Socket to plugin Encoder's LAST encoder_h (hidden-state) to Decoder
        decoder_c = Input(shape=(self.latent_dim,))   # Socket to plugin Encoder's LAST encoder_c (cell-state) to Decoder

        decoder_embedding = self.training_model.get_layer("decoder_embedding")(decoder_input) # REUSE Decoder's Embedding Layer
        decoder_lstm = self.training_model.get_layer("decoder_lstm") # REUSE Decoder's LSTM Layer
        decoder_output, decoder_h_out, decoder_c_out = decoder_lstm(decoder_embedding, initial_state=[decoder_h, decoder_c])


        # REUSE ATTENTION
        encoder_outputs_input = Input(shape=(self.max_english_len, self.latent_dim)) # Socket to plugin in all hidden-sates of Encoder to Attention Module
        luong_attention = self.training_model.get_layer("luong_attention")   # RUSE
        attention_output = luong_attention([decoder_output, encoder_outputs_input])

        context_concat = Concatenate(axis=-1)([decoder_output, attention_output])

        # Final prediction (reuse trained weights)
        output_logits = self.training_model.get_layer("output_logits")(context_concat)

        # Create inference model
        self.inference_model = Model(
            inputs=[decoder_input, encoder_outputs_input, decoder_h, decoder_c],
            outputs=[output_logits, decoder_h_out, decoder_c_out],
        )

        return self.inference_model

    def build_model(self):
        if self.training_model is None:
            self.build_training_model()

    def show_model(self):
        if self.training_model is not None:
            print("TRAINING MODEL:")
            self.training_model.summary()

        if self.inference_model is not None:
            print("INFERENCE MODEL:")
            self.inference_model.summary()

    def train(self, english_seqs, cli_seqs_in, cli_seqs_out, batch_size=64, epochs=20, verbose=2):
        if self.training_model is None:
            self.build_training_model()

        trainer = self.training_model

        num_samples   = len(english_seqs)
        split         = int(0.8 * num_samples)  # Split 80% for Training and 20% for Validation
        train_enc_in  = english_seqs[:split]
        val_enc_in    = english_seqs[split:]
        train_dec_in  = cli_seqs_in[:split]
        val_dec_in    = cli_seqs_in[split:]
        train_dec_out = cli_seqs_out[:split]
        val_dec_out   = cli_seqs_out[split:]

        history = trainer.fit([train_enc_in, train_dec_in], train_dec_out, batch_size=batch_size, epochs=epochs,
                               validation_data=([val_enc_in, val_dec_in], val_dec_out), verbose=verbose)
        return history

    def english_to_cli(self, english_sentence):

        if self.inference_model is None:
            self.build_inference_model()

        # Tokenize enlgish sentence as a sequence and pad it
        eng_seq = self.english_tokenizer.texts_to_sequences([english_sentence.lower()])
        eng_seq = pad_sequences(eng_seq, maxlen=self.max_english_len, padding="post")

        # Run it through encoder to get the encoder's output
        encoder_outputs, encoder_hstates, encoder_cstates = self.encoder.predict(eng_seq, verbose=0)

        model = self.inference_model

        # Initialize the Decoder by passing it the [START] token so that it can start generating
        tokenid = self.cli_tokenizer.word_index["[START]"]
        cli_seq = np.array([[tokenid]])
        states  = [encoder_hstates, encoder_cstates]

        decoded_tokens = []
        for _ in range(self.max_cli_len):
            # predict next token
            logits, decoder_hstates, decoder_cstates = model.predict([cli_seq, encoder_outputs] + states, verbose = 0)

            tokenid = np.argmax(logits[0, -1, :])
            token = self.cli_tokenizer.index_word.get(tokenid, "")

            if token == "[END]":
                break
            if token and token != "[START]":
                decoded_tokens.append(token)

            cli_seq = np.array([[tokenid]])
            states  = [decoder_hstates, decoder_cstates]


        # extract parameter (argument) values from the english sentence
        raw_tokens = english_sentence.strip().split()
        input_oov = [tok for tok in raw_tokens if tok.lower() not in self.english_tokenizer.word_index]

        final_tokens = []
        oov_queue = input_oov.copy()
        for tok in decoded_tokens:
            if tok in self.placeholders and oov_queue:
                final_tokens.append(oov_queue.pop(0))
            else:
                final_tokens.append(tok)

        return " ".join(final_tokens)


In [4]:
np.random.seed(42)
tf.random.set_seed(42)

english_sentences, cli_commands, placeholders = generate_english_to_cli_sentences()

print("Jarvis will be trained on", len(english_sentences), "English sentences")

Jarvis will be trained on 56500 English sentences


In [5]:
for i in range(100):
    print("{:<6} | {:55s} | {}".format(i+1, english_sentences[i], cli_commands[i]))

1      | kindly move  HQLY and name it LFY                       | mv HQLY LFY
2      | hey rename the folder DFADM and name it TWV             | mv DFADM TWV
3      | kindly check for pattern XTY in BSPD                    | grep -i XTY BSPD
4      | could you purge the file named MYOGG                    | rm  MYOGG
5      | duplicate  RAOA to WRV                                  | cp RAOA WRV
6      | create me a directory called IZJQRK                     | mkdir IZJQRK
7      | kindly change  DFADM to new name WRWN                   | mv DFADM WRWN
8      | hey trash  directory by the name JEOMEV                 | rm -r JEOMEV
9      | change the folder EIN as TWV                            | mv EIN TWV
10     | please move  KBJI and call it WRWN                      | mv KBJI WRWN
11     | kindly nuke  dir by the name JEOMEV                     | rm -r JEOMEV
12     | hey change the dirfolder KBJI and call it LFY           | mv KBJI LFY
13     | hey search for pattern IZNO in MYO

In [None]:
jarvis = JarvisTranslator()

english_seqs, cli_seqs_in, cli_seqs_out = jarvis.prepare_training_data(english_sentences, cli_commands, placeholders)

jarvis.build_model()
jarvis.show_model()

TRAINING MODEL:


# **Train Jarvis**

In [None]:
jarvis.train(english_seqs, cli_seqs_in, cli_seqs_out, batch_size=64, epochs=10, verbose=2)

Epoch 1/10
707/707 - 11s - 16ms/step - accuracy: 0.9323 - loss: 0.1904 - val_accuracy: 0.9672 - val_loss: 0.1246
Epoch 2/10
707/707 - 7s - 9ms/step - accuracy: 0.9947 - loss: 0.0106 - val_accuracy: 1.0000 - val_loss: 1.2178e-04
Epoch 3/10
707/707 - 7s - 9ms/step - accuracy: 1.0000 - loss: 6.8610e-05 - val_accuracy: 1.0000 - val_loss: 3.8937e-05
Epoch 4/10
707/707 - 7s - 9ms/step - accuracy: 1.0000 - loss: 2.7087e-05 - val_accuracy: 1.0000 - val_loss: 1.8616e-05
Epoch 5/10
707/707 - 6s - 9ms/step - accuracy: 1.0000 - loss: 1.4032e-05 - val_accuracy: 1.0000 - val_loss: 1.0389e-05
Epoch 6/10
707/707 - 6s - 9ms/step - accuracy: 1.0000 - loss: 8.1078e-06 - val_accuracy: 1.0000 - val_loss: 6.1985e-06
Epoch 7/10
707/707 - 6s - 9ms/step - accuracy: 1.0000 - loss: 4.9467e-06 - val_accuracy: 1.0000 - val_loss: 3.9020e-06
Epoch 8/10
707/707 - 6s - 9ms/step - accuracy: 1.0000 - loss: 3.1922e-06 - val_accuracy: 1.0000 - val_loss: 2.5673e-06
Epoch 9/10
707/707 - 6s - 9ms/step - accuracy: 1.0000 - lo

<keras.src.callbacks.history.History at 0x7cbd66702750>

# **Use Jarvis to turn English sentences into Linux commands**

In [None]:
test_sentences = [
    "hey create me a folder name ai",
    "please rename the directory ai to ai-presentations",
    "copy file seq2seq.pptx to folder ai-presentations",
    "kindly rename it seq2seq.pptx to encoder-decoder.pptx",
    "display files under ai-presentations",
    "search for the word attention in the file myreport.pdf",
    "could you rename myreport.pdf to hisreport.pdf",
    "replace hisreport.pdf to myreport.pdf"
]

for sentence in test_sentences:
    cli = jarvis.english_to_cli(sentence)
    print("{:60s} \u2192   {}".format(sentence, cli))
    print("-" * 100)


hey create me a folder name ai                               →   mkdir ai
----------------------------------------------------------------------------------------------------
please rename the directory ai to ai-presentations           →   mv ai ai-presentations
----------------------------------------------------------------------------------------------------
copy file seq2seq.pptx to folder ai-presentations            →   mkdir seq2seq.pptx
----------------------------------------------------------------------------------------------------
kindly rename it seq2seq.pptx to encoder-decoder.pptx        →   mv seq2seq.pptx encoder-decoder.pptx
----------------------------------------------------------------------------------------------------
display files under ai-presentations                         →   ls -l ai-presentations
----------------------------------------------------------------------------------------------------
search for the word attention in the file myreport.pdf     