<a href="https://colab.research.google.com/github/shrutiramdurg/Systems-Software/blob/main/LEX_and_YACC_Compiler.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# LEX and YACC Compiler in Colab

Drawbacks:
* Regular interrupts (Ctrl+D, Ctrl+C) for shell won't work in Colab while inputting for program.
<br>Workaround: Store your inputs in a txt file and pass it to the program.

In [None]:
#@title Install *prerqeuisites* (run this cell first to work on LEX/YACC)
!sudo apt install flex bison

## Lex only

In [None]:
#@title Writing Lex program
%%writefile program.l

%{
    #include <stdio.h>
    int ctChar=0;
    int ctSpace=0;
    int ctWord=0;
    int ctLine=0;
%}
WORD [^ \t\n,\.:]+
EOL [\n]
BLANK [ ]
%%

{WORD} {ctWord++; ctChar+=yyleng;}
{BLANK} {ctSpace++;}
{EOL} {ctLine++;}
. {ctChar++;}
%%

void main(int argc, char *argv[]){
    if(argc!=2){
        printf("Usage:\n\t./a.out <FILENAME>\n");
        exit(0);
    }

    yyin=fopen(argv[1],"r");
    yylex();

    printf("Word Count: %d\n",ctWord);
    printf("Character Count: %d\n",ctChar);
    printf("Space Count: %d\n",ctSpace);
    printf("Line Count: %d\n",ctLine);
    fclose(yyin);

}

int yywrap(){
    return 1;
}

Overwriting program.l


if you want to use at txt as an input

In [None]:
%%writefile program.txt

This is a sample file.

Writing program.txt


In [None]:
#@title Shell Execution (you can rewrite the commands as per your need, eg. if you want to include a file as an input)
%%shell

lex -l program.l
gcc lex.yy.c
./a.out program.txt

Word Count: 5
Character Count: 18
Space Count: 4
Line Count: 2




## Lex and Yacc combined

In [None]:
#@title Writing YACC program
%%writefile program.y

%{
    #include<stdio.h>
    #include<stdlib.h>
%}
%token DIGIT LETTER UND NL
%%
stmt: variable NL {printf("Valid Identifier\n");exit(0);}
variable: LETTER alphanumeric;
alphanumeric: LETTER alphanumeric
            | DIGIT alphanumeric
            | UND alphanumeric
            | LETTER
            | DIGIT
            | UND;
%%

int yyerror(){
    printf("Invalid Identifier\n");
    exit(0);
}

void main(){
    printf("Enter the variable name: ");
    yyparse();
}

Overwriting program.y


In [None]:
#@title Writing Lex program
%%writefile program.l

%{
    #include "y.tab.h"
%}
%%
[a-zA-Z] {return LETTER;}
[0-9] {return DIGIT;}
[_] {return UND;}
\n {return NL;}
. {return yytext[0];}
%%

Overwriting program.l


if you want to use at txt as an input

In [None]:
%%writefile program.txt

This is a sample file.

In [None]:
#@title Shell Execution (you can rewrite the commands as per your need, eg. if you want to include a file as an input)
%%shell

yacc -d program.y
lex program.l
cc y.tab.c lex.yy.c -ll
./a.out

[01m[Ky.tab.c:[m[K In function ‘[01m[Kyyparse[m[K’:
       yychar = [01;35m[Kyylex[m[K ();
                [01;35m[K^~~~~[m[K
       [01;35m[Kyyerror[m[K (YY_("syntax error"));
       [01;35m[K^~~~~~~[m[K
       [32m[Kyyerrok[m[K
Enter the variable name: variable_name
Valid Identifier




In [1]:
lex_code = """
%{
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

typedef struct {
    char symbol[50];
    char type[20];
} SymbolTableEntry;

SymbolTableEntry symtab[100];
int count = 0;

int isDuplicate(char *sym) {
    for (int i = 0; i < count; i++) {
        if (strcmp(symtab[i].symbol, sym) == 0) return 1;
    }
    return 0;
}
%}

kw main|printf|scanf|if|for|while|do
id [a-zA-Z_][a-zA-Z0-9_]*
dt int|float|char|double
D [0-9]+
ops "+"|"-"|"*"|"/"
cmp ">"|"<"|"=="|"<="|">="
as "="
sc ";"
header "#include"<[a-zA-Z_.]+>

%%

{header}   { printf("%s is a header file\\n",yytext); }
{dt}       { printf("%s is a datatype\\n",yytext); }
{ops}      { printf("%s is an arithmetic operator\\n",yytext); }
{cmp}      { printf("%s is a comparative operator\\n",yytext); }
{as}       { printf("%s is an assignment operator\\n",yytext); }
{D}        { printf("%s is a digit\\n",yytext); }
{sc}       { printf("%s is a semicolon\\n",yytext); }
{kw}"("    { printf("%s is a library function\\n",yytext); }

{id}"["{D}"]" {
    printf("%s is an array\\n", yytext);
    if (!isDuplicate(yytext)) {
        strcpy(symtab[count].symbol, yytext);
        strcpy(symtab[count].type, "array");
        count++;
    }
}

{id} {
    printf("%s is an identifier\\n",yytext);
    if (!isDuplicate(yytext)) {
        strcpy(symtab[count].symbol, yytext);
        strcpy(symtab[count].type, "identifier");
        count++;
    }
}

[a-zA-Z_][a-zA-Z0-9_]*"("   { printf("%s is a user defined function\\n",yytext); }
"{"        { printf("This is an opening bracket\\n"); }
"}"        { printf("This is a closing bracket\\n"); }

%%

int main(int argc, char *argv[]) {
    if (argc < 2) {
        printf("Usage: %s <input file>\\n", argv[0]);
        return 1;
    }

    FILE *fp = fopen(argv[1], "r");
    if (!fp) {
        perror("Cannot open input file");
        return 1;
    }
    yyin = fp;
    yylex();
    fclose(fp);

    FILE *symfile = fopen("symtab_output.txt", "w");
    if (!symfile) {
        perror("Cannot write symtab file");
        return 1;
    }

    fprintf(symfile, "Symbol\\tType\\n");
    fprintf(symfile, "-------------------\\n");
    for (int i = 0; i < count; i++) {
        fprintf(symfile, "%s\\t%s\\n", symtab[i].symbol, symtab[i].type);
    }
    fclose(symfile);

    return 0;
}

int yywrap() {
    return 1;
}
"""

with open("lexer.l", "w") as f:
    f.write(lex_code)


In [4]:
!apt-get update
!apt-get install -y flex bison


0% [Working]            Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
0% [Connecting to archive.ubuntu.com (185.125.190.83)] [Connecting to security.0% [Connecting to archive.ubuntu.com (185.125.190.83)] [Connecting to security.                                                                               Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Get:3 https://cli.github.com/packages stable InRelease [3,917 B]
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:5 https://cli.github.com/packages stable/main amd64 Packages [346 B]
Get:6 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Get:7 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:8 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Get:9 https://r2u.stat.illinois.edu/ubuntu jammy/main amd64 Packages [2,807 kB]
Hit:10 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy

In [5]:
!flex lexer.l

In [6]:
!gcc lex.yy.c -lfl -o lexer

In [7]:
!./lexer input.c

#include<stdio.h> is a header file

int is a datatype
 a is an identifier
, b is an identifier
; is a semicolon

float is a datatype
 x is an identifier
; is a semicolon

char is a datatype
 arr[10] is an array
; is a semicolon

a is an identifier
 = is an assignment operator
 5 is a digit
; is a semicolon

