In [1]:
import sys
!{sys.executable} -m pip install --quiet scikit-learn numpy pandas joblib Flask pytest

In [3]:
import pandas as pd
from pathlib import Path
import csv

basics = [
    # Greetings / small talk
    {"question": "hi", "answer": "Hello! ðŸ‘‹ Iâ€™m your Java assistant. How can I help you today?"},
    {"question": "hello", "answer": "Hey there! ðŸ˜Š Ask me any Java question or share your code."},
    {"question": "how are you", "answer": "Iâ€™m doing great! Ready to help you with Java programming questions."},

    # Intro & history
    {"question": "What is Java?", "answer": "Java is a high-level, class-based, object-oriented programming language developed by James Gosling at Sun Microsystems in 1995."},
    {"question": "Who developed Java?", "answer": "Java was developed by James Gosling at Sun Microsystems (now Oracle)."},
    {"question": "What is JVM?", "answer": "JVM (Java Virtual Machine) runs Java bytecode on different platforms."},
    {"question": "What is JDK and JRE?", "answer": "JDK = Java Development Kit (compiler + tools). JRE = Java Runtime Environment (JVM + libraries)."},
    {"question": "How Java code compiles and runs?", "answer": "javac compiles .java to .class (bytecode). java command runs the bytecode on the JVM."},

    # Hello World and WAP basics
    {"question": "Write a Java program to print Hello World", 
     "answer": "public class HelloWorld {\n    public static void main(String[] args) {\n        System.out.println(\"Hello World\");\n    }\n}"},
    {"question": "WAP (write a program) to print Hello World", 
     "answer": "See: public class HelloWorld { public static void main(String[] args) { System.out.println(\"Hello World\"); } }"},
    
    # Variables and types
    {"question": "What are primitive data types in Java?", 
     "answer": "byte, short, int, long, float, double, char, boolean."},
    {"question": "Declare variables example in Java", 
     "answer": "int a = 10;\ndouble pi = 3.14;\nchar c = 'A';\nboolean ok = true;"},
    {"question": "Type casting in Java", 
     "answer": "int x = (int) 3.14; // explicit cast\ndouble y = x; // implicit cast"},

    # Input / Output
    {"question": "How to read input using Scanner", 
     "answer": "import java.util.Scanner;\nScanner sc = new Scanner(System.in);\nint n = sc.nextInt();\nString s = sc.nextLine();\nsc.close();"},
    {"question": "How to read input using BufferedReader", 
     "answer": "import java.io.*;\nBufferedReader br = new BufferedReader(new InputStreamReader(System.in));\nString s = br.readLine();\nint n = Integer.parseInt(br.readLine());"},

    # Operators
    {"question": "What are arithmetic operators in Java?", 
     "answer": "+ - * / % (modulus)"},
    {"question": "What are relational operators in Java?", 
     "answer": "==, !=, >, <, >=, <= (used for comparisons)"},
    {"question": "Ternary operator example", 
     "answer": "int max = (a > b) ? a : b;"},

    # Control statements
    {"question": "If-else example in Java", 
     "answer": "if (x > 0) {\n    System.out.println(\"positive\");\n} else {\n    System.out.println(\"non-positive\");\n}"},
    {"question": "Switch-case example in Java", 
     "answer": "switch (day) {\n  case 1: System.out.println(\"Mon\"); break;\n  case 2: System.out.println(\"Tue\"); break;\n  default: System.out.println(\"Other\");\n}"},

    # Loops
    {"question": "For loop example in Java", 
     "answer": "for (int i = 0; i < n; i++) {\n    System.out.println(i);\n}"},
    {"question": "While loop example in Java", 
     "answer": "int i = 0;\nwhile (i < n) {\n    System.out.println(i);\n    i++;\n}"},
    {"question": "Do-while loop example in Java", 
     "answer": "int i = 0;\ndo {\n   System.out.println(i);\n   i++;\n} while (i < n);"},

    # Basic algorithms: arithmetic
    {"question": "WAP to find factorial of a number in Java (iterative)", 
     "answer": "import java.util.Scanner;\npublic class Fact {\n  public static void main(String[] args) {\n    Scanner sc = new Scanner(System.in);\n    int n = sc.nextInt();\n    long f = 1;\n    for (int i = 2; i <= n; i++) f *= i;\n    System.out.println(f);\n    sc.close();\n  }\n}"},
    {"question": "WAP to find factorial (recursive)", 
     "answer": "public class FactRec {\n  static long fact(int n){ return n<=1?1:n*fact(n-1); }\n  public static void main(String[] args){ System.out.println(fact(5)); }\n}"},

    {"question": "WAP to print Fibonacci series (iterative)", 
     "answer": "int a=0,b=1;\nfor(int i=0;i<n;i++){ System.out.println(a); int t=a+b; a=b; b=t; }"},

    {"question": "WAP to check if a number is prime", 
     "answer": "boolean isPrime(int n){ if(n<=1) return false; for(int i=2;i*i<=n;i++) if(n%i==0) return false; return true; }"},

    {"question": "WAP to reverse a number in Java", 
     "answer": "int rev=0; while(n>0){ rev = rev*10 + n%10; n/=10; } System.out.println(rev);"},

    # Arrays (basic)
    {"question": "Declare and initialize array in Java", 
     "answer": "int[] a = new int[]{1,2,3};\nint[] b = {4,5,6};"},
    {"question": "Find max element in array", 
     "answer": "int max = a[0]; for(int i=1;i<a.length;i++) if(a[i]>max) max=a[i]; System.out.println(max);"},

    {"question": "WAP to reverse an array", 
     "answer": "for(int i=0;i<a.length/2;i++){ int t=a[i]; a[i]=a[a.length-1-i]; a[a.length-1-i]=t; }"},

    # Strings (basic)
    {"question": "String reverse in Java", 
     "answer": "StringBuilder sb = new StringBuilder(s); sb.reverse().toString();"},
    {"question": "Check palindrome string", 
     "answer": "String rev = new StringBuilder(s).reverse().toString(); if(s.equals(rev)) System.out.println(\"palindrome\");"},

    # Sorting & searching (simple)
    {"question": "Bubble sort example in Java", 
     "answer": "for(int i=0;i<n-1;i++) for(int j=0;j<n-1-i;j++) if(a[j]>a[j+1]){ int t=a[j]; a[j]=a[j+1]; a[j+1]=t; }"},
    {"question": "Binary search in sorted array (iterative)", 
     "answer": "int l=0,r=n-1; while(l<=r){ int mid=(l+r)/2; if(a[mid]==key) return mid; else if(a[mid]<key) l=mid+1; else r=mid-1; } return -1;"},

    # Exception handling basics
    {"question": "Try-catch example in Java", 
     "answer": "try{ int x = 5/0; } catch(ArithmeticException e){ System.out.println(e.getMessage()); } finally{ System.out.println(\"clean up\"); }"},
    {"question": "Custom exception example in Java", 
     "answer": "class MyEx extends Exception{ MyEx(String m){ super(m);} }\n// throw new MyEx(\"error\");"},

    # File I/O basics
    {"question": "Write text to file in Java", 
     "answer": "import java.io.*;\ntry(FileWriter fw = new FileWriter(\"out.txt\")) { fw.write(\"Hello\"); } catch(IOException e){ e.printStackTrace(); }"},
    {"question": "Read text file line by line in Java", 
     "answer": "import java.nio.file.*;\nfor(String line: Files.readAllLines(Paths.get(\"in.txt\"))) System.out.println(line);"},

    # Useful utilities
    {"question": "String vs StringBuilder vs StringBuffer", 
     "answer": "String is immutable. StringBuilder is mutable and not thread-safe (faster). StringBuffer is thread-safe (synchronized)."},
    {"question": "How to convert String to int and vice versa", 
     "answer": "int x = Integer.parseInt(s);\nString s = String.valueOf(x);"},

    # Simple OOP starter examples
    {"question": "Create a simple class and object example", 
     "answer": "class Person{ String name; Person(String n){ name=n; } }\nPerson p = new Person(\"Suraj\"); System.out.println(p.name);"},

    # Basic recursion practice
    {"question": "GCD of two numbers (recursive)", 
     "answer": "int gcd(int a,int b){ return b==0?a:gcd(b,a%b); }"},

    # Misc small programs
    {"question": "Swap two numbers without temp variable", 
     "answer": "a = a + b; b = a - b; a = a - b; // for integers (beware overflow)."},
    {"question": "Check even or odd", 
     "answer": "if(n%2==0) System.out.println(\"even\"); else System.out.println(\"odd\");"},

    # Java 8 basics (introduced here)
    {"question": "Lambda expression example Java 8", 
     "answer": "List<Integer> nums = Arrays.asList(1,2,3);\nnums.forEach(n -> System.out.println(n));"},
    {"question": "Stream filter map example Java 8", 
     "answer": "List<String> names = ...; names.stream().filter(s->s.length()>3).map(String::toUpperCase).forEach(System.out::println);"},
    
    # Try-with-resources
    {"question": "Try-with-resources example", 
     "answer": "try (BufferedReader br = new BufferedReader(new FileReader(\"in.txt\"))) { String s = br.readLine(); } catch(IOException e) { e.printStackTrace(); }"},
    
    # Simple Serialization
    {"question": "Serialization example in Java", 
     "answer": "class Person implements Serializable { String name; }\nObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(\"p.obj\")); oos.writeObject(person); oos.close();"},

    # End-marker sample
    {"question": "java basics summary", "answer": "Java basics include syntax, data types, control flow, OOP fundamentals, exceptions, collections, and I/O. Practice small programs."}
]

DATA_DIR = Path.cwd().parent / "data" if Path.cwd().name == "notebook" else Path("data")
DATA_DIR.mkdir(parents=True, exist_ok=True)

basics_path = DATA_DIR / "corpus_java_basics.csv"
df_basics = pd.DataFrame(basics)
df_basics.to_csv(basics_path, index=False, encoding="utf-8")

master_path = DATA_DIR / "corpus_java.csv"
if master_path.exists():
    df_master = pd.read_csv(master_path)
    combined = pd.concat([df_master, df_basics], ignore_index=True)
    combined = combined.drop_duplicates(subset=["question"], keep="first")
    combined.to_csv(master_path, index=False, encoding="utf-8")
else:
    # create master from basics
    df_basics.to_csv(master_path, index=False, encoding="utf-8")

print("Wrote basics to:", basics_path)
print("Master corpus at:", master_path)
print("Total entries in master (approx):", len(pd.read_csv(master_path)))

Wrote basics to: C:\Users\suraj\Desktop\java_chatbot_project\data\corpus_java_basics.csv
Master corpus at: C:\Users\suraj\Desktop\java_chatbot_project\data\corpus_java.csv
Total entries in master (approx): 50


In [5]:
import pandas as pd
from pathlib import Path

def load_corpus(file_path: Path):
    # Check if the file exists
    if not file_path.exists():
        raise FileNotFoundError(f"File not found: {file_path}")
    # Load the CSV file into a DataFrame and return it
    return pd.read_csv(file_path)

In [7]:
corpus_path = Path("data/corpus_java.csv")
corpus = load_corpus(corpus_path)

In [9]:
import pandas as pd
from pathlib import Path

oop = [
    # Classes & Objects
    {"question": "What is a class in Java?", 
     "answer": "A class is a blueprint for creating objects, containing fields (variables) and methods (functions)."},
    {"question": "What is an object in Java?", 
     "answer": "An object is an instance of a class with its own state and behavior."},
    {"question": "Write a simple class and create object", 
     "answer": "class Car{ String model; int year; Car(String m, int y){ model=m; year=y; } }\nCar c = new Car(\"Tesla\",2025); System.out.println(c.model + \" \" + c.year);"},

    # Constructors
    {"question": "What is a constructor in Java?", 
     "answer": "A constructor is a special method used to initialize objects. It has no return type and same name as class."},
    {"question": "Constructor example in Java", 
     "answer": "class Person{ String name; Person(String n){ name=n; } } Person p = new Person(\"Suraj\");"},

    # Inheritance
    {"question": "What is inheritance in Java?", 
     "answer": "Inheritance allows a class to acquire properties and methods of another class using 'extends' keyword."},
    {"question": "Example of inheritance", 
     "answer": "class Animal{ void eat(){System.out.println(\"eat\");} } class Dog extends Animal{ void bark(){System.out.println(\"bark\");} } Dog d = new Dog(); d.eat(); d.bark();"},

    # Polymorphism
    {"question": "What is polymorphism in Java?", 
     "answer": "Polymorphism allows objects to take many forms. It includes compile-time (method overloading) and runtime (method overriding)."},
    {"question": "Method overloading example", 
     "answer": "class Calc{ int add(int a,int b){return a+b;} double add(double a,double b){return a+b;} }"},
    {"question": "Method overriding example", 
     "answer": "class Animal{ void sound(){System.out.println(\"Animal\");} } class Dog extends Animal{ void sound(){System.out.println(\"Bark\");} }"},

    # Encapsulation
    {"question": "What is encapsulation in Java?", 
     "answer": "Encapsulation is wrapping data (variables) and code (methods) together. Use private fields + public getters/setters."},
    {"question": "Encapsulation example", 
     "answer": "class Person{ private String name; public String getName(){return name;} public void setName(String n){name=n;} }"},

    # Abstraction
    {"question": "What is abstraction in Java?", 
     "answer": "Abstraction hides implementation details using abstract classes or interfaces."},
    {"question": "Abstract class example", 
     "answer": "abstract class Shape{ abstract void draw(); } class Circle extends Shape{ void draw(){System.out.println(\"Circle\");} }"},

    # Interface
    {"question": "What is interface in Java?", 
     "answer": "An interface is a contract that defines abstract methods. Classes implement interfaces using 'implements' keyword."},
    {"question": "Interface example", 
     "answer": "interface Animal{ void sound(); } class Dog implements Animal{ public void sound(){System.out.println(\"Bark\");} }"},

    # Inner / Nested classes
    {"question": "What is inner class in Java?", 
     "answer": "A class defined inside another class. Can access outer class members."},
    {"question": "Inner class example", 
     "answer": "class Outer{ int x=10; class Inner{ void show(){System.out.println(x);} } } Outer o = new Outer(); Outer.Inner i = o.new Inner(); i.show();"},

    # Collections basics
    {"question": "What is ArrayList in Java?", 
     "answer": "ArrayList is a resizable array implementation of List interface."},
    {"question": "ArrayList example", 
     "answer": "import java.util.*;\nArrayList<String> list = new ArrayList<>(); list.add(\"A\"); list.add(\"B\"); for(String s:list) System.out.println(s);"},

    {"question": "What is HashMap in Java?", 
     "answer": "HashMap stores key-value pairs. Keys are unique. O(1) access on average."},
    {"question": "HashMap example", 
     "answer": "import java.util.*;\nHashMap<String,Integer> map = new HashMap<>(); map.put(\"A\",1); map.put(\"B\",2); for(String k:map.keySet()) System.out.println(k+\"=\"+map.get(k));"},

    {"question": "What is LinkedList in Java?", 
     "answer": "LinkedList is a doubly-linked list implementation of List and Deque interfaces."},
    {"question": "LinkedList example", 
     "answer": "import java.util.*;\nLinkedList<Integer> ll = new LinkedList<>(); ll.add(1); ll.add(2); ll.removeFirst(); System.out.println(ll);"}, 

    # Iterators
    {"question": "Iterator example for ArrayList", 
     "answer": "Iterator<String> it = list.iterator(); while(it.hasNext()){ System.out.println(it.next()); }"},

    # Generics
    {"question": "What are generics in Java?", 
     "answer": "Generics allow type-safe operations on collections, e.g., ArrayList<String> ensures only Strings."},
    {"question": "Generic class example", 
     "answer": "class Box<T>{ T item; void set(T i){item=i;} T get(){return item;} } Box<Integer> b = new Box<>(); b.set(10); System.out.println(b.get());"},

    # Java 8 features with OOP
    {"question": "Default method in interface (Java 8)", 
     "answer": "interface A{ default void show(){System.out.println(\"default\");} } class B implements A{} new B().show();"},
    {"question": "Functional interface example", 
     "answer": "@FunctionalInterface interface Func{ void apply(); } Func f = ()->System.out.println(\"Hi\"); f.apply();"},
    
    # End-marker
    {"question": "java oop collections summary", "answer": "OOP concepts: class, object, inheritance, polymorphism, encapsulation, abstraction, interface. Collections: ArrayList, LinkedList, HashMap, Generics, Iterators."}
]

DATA_DIR = Path.cwd().parent / "data" if Path.cwd().name == "notebook" else Path("data")
DATA_DIR.mkdir(parents=True, exist_ok=True)

oop_path = DATA_DIR / "corpus_java_oop.csv"
df_oop = pd.DataFrame(oop)
df_oop.to_csv(oop_path, index=False, encoding="utf-8")

master_path = DATA_DIR / "corpus_java.csv"
if master_path.exists():
    df_master = pd.read_csv(master_path)
    combined = pd.concat([df_master, df_oop], ignore_index=True)
    combined = combined.drop_duplicates(subset=["question"], keep="first")
    combined.to_csv(master_path, index=False, encoding="utf-8")
else:
    df_oop.to_csv(master_path, index=False, encoding="utf-8")

print("Wrote OOP & Collections to:", oop_path)
print("Master corpus at:", master_path)
print("Total entries in master (approx):", len(pd.read_csv(master_path)))

Wrote OOP & Collections to: C:\Users\suraj\Desktop\java_chatbot_project\data\corpus_java_oop.csv
Master corpus at: C:\Users\suraj\Desktop\java_chatbot_project\data\corpus_java.csv
Total entries in master (approx): 80


In [11]:
import pandas as pd
from pathlib import Path

arrays_strings = [
    # Arrays basics
    {"question": "Declare an integer array in Java", 
     "answer": "int[] a = new int[5]; // size 5\nint[] b = {1,2,3,4,5}; // initialization"},
    {"question": "Access elements of an array", 
     "answer": "int x = a[0]; // first element\nfor(int i=0;i<a.length;i++) System.out.println(a[i]);"},
    {"question": "Find max element in array", 
     "answer": "int max = a[0]; for(int i=1;i<a.length;i++) if(a[i]>max) max=a[i]; System.out.println(max);"},
    {"question": "Find min element in array", 
     "answer": "int min = a[0]; for(int i=1;i<a.length;i++) if(a[i]<min) min=a[i]; System.out.println(min);"},
    {"question": "Sum of all elements in array", 
     "answer": "int sum=0; for(int i:a) sum+=i; System.out.println(sum);"},
    {"question": "Reverse an array in Java", 
     "answer": "for(int i=0;i<a.length/2;i++){ int t=a[i]; a[i]=a[a.length-1-i]; a[a.length-1-i]=t; }"},
    {"question": "Check if array contains a number", 
     "answer": "boolean found=false; for(int i:a) if(i==key) {found=true; break;}"},
    {"question": "Linear search in array", 
     "answer": "for(int i=0;i<a.length;i++) if(a[i]==key) System.out.println(\"Found at \"+i);"},
    {"question": "Binary search in sorted array", 
     "answer": "int l=0,r=a.length-1; while(l<=r){ int mid=(l+r)/2; if(a[mid]==key){System.out.println(mid); break;} else if(a[mid]<key) l=mid+1; else r=mid-1; }"},
    
    # Array sorting
    {"question": "Bubble sort in Java", 
     "answer": "for(int i=0;i<n-1;i++) for(int j=0;j<n-1-i;j++) if(a[j]>a[j+1]){ int t=a[j]; a[j]=a[j+1]; a[j+1]=t; }"},
    {"question": "Selection sort in Java", 
     "answer": "for(int i=0;i<n-1;i++){ int min=i; for(int j=i+1;j<n;j++) if(a[j]<a[min]) min=j; int t=a[i]; a[i]=a[min]; a[min]=t; }"},
    {"question": "Insertion sort in Java", 
     "answer": "for(int i=1;i<n;i++){ int key=a[i]; int j=i-1; while(j>=0 && a[j]>key){ a[j+1]=a[j]; j--; } a[j+1]=key; }"},

    # Arrays advanced
    {"question": "Find second largest element in array", 
     "answer": "int first=Integer.MIN_VALUE, second=Integer.MIN_VALUE; for(int i:a){ if(i>first){ second=first; first=i;} else if(i>second && i!=first) second=i; }"},
    {"question": "Remove duplicates from array", 
     "answer": "Set<Integer> s = new HashSet<>(); for(int i:a) s.add(i); System.out.println(s);"},
    {"question": "Find subarray with given sum", 
     "answer": "int sum=0,start=0; for(int i=0;i<a.length;i++){ sum+=a[i]; while(sum>target){ sum-=a[start]; start++; } if(sum==target) System.out.println(start+\" to \"+i); }"},
    {"question": "Rotate array by k positions", 
     "answer": "int n=a.length; int[] temp = new int[n]; for(int i=0;i<n;i++) temp[(i+k)%n]=a[i]; a=temp;"},
    {"question": "Find majority element in array", 
     "answer": "int count=0, candidate=0; for(int i:a){ if(count==0) candidate=i; if(i==candidate) count++; else count--; } System.out.println(candidate);"},

    # Strings basics
    {"question": "Declare a string in Java", 
     "answer": "String s = \"Hello\";"},
    {"question": "String length in Java", 
     "answer": "int len = s.length();"},
    {"question": "Concatenate strings", 
     "answer": "String s2 = s + \" World\";"},
    {"question": "Compare strings", 
     "answer": "s.equals(\"Hello\"); // content\ns==\"Hello\"; // reference"},
    {"question": "String substring example", 
     "answer": "String sub = s.substring(1,4); // index 1 to 3"},
    {"question": "Convert string to char array", 
     "answer": "char[] c = s.toCharArray();"},
    {"question": "Reverse a string", 
     "answer": "String rev = new StringBuilder(s).reverse().toString();"},
    {"question": "Check palindrome string", 
     "answer": "String rev = new StringBuilder(s).reverse().toString(); if(s.equals(rev)) System.out.println(\"Palindrome\");"},
    {"question": "Check anagram of two strings", 
     "answer": "char[] c1=s1.toCharArray(); char[] c2=s2.toCharArray(); Arrays.sort(c1); Arrays.sort(c2); if(Arrays.equals(c1,c2)) System.out.println(\"Anagram\");"},
    {"question": "Count vowels in string", 
     "answer": "int count=0; for(char ch:s.toCharArray()) if(\"aeiouAEIOU\".indexOf(ch)>=0) count++; System.out.println(count);"},
    {"question": "Find first non-repeating character", 
     "answer": "Map<Character,Integer> m=new HashMap<>(); for(char ch:s.toCharArray()) m.put(ch,m.getOrDefault(ch,0)+1); for(char ch:s.toCharArray()) if(m.get(ch)==1){ System.out.println(ch); break; }"},

    # StringBuilder / StringBuffer
    {"question": "StringBuilder append example", 
     "answer": "StringBuilder sb = new StringBuilder(\"Hi\"); sb.append(\" World\"); System.out.println(sb);"},
    {"question": "StringBuilder insert example", 
     "answer": "StringBuilder sb = new StringBuilder(\"Hi\"); sb.insert(2,\" Java\"); System.out.println(sb);"},
    {"question": "StringBuffer example", 
     "answer": "StringBuffer sb = new StringBuffer(\"Hi\"); sb.append(\" World\"); System.out.println(sb); // thread-safe"},

    # Arrays + Strings algorithmic
    {"question": "Find longest substring without repeating characters", 
     "answer": "Set<Character> set = new HashSet<>(); int l=0,maxLen=0; for(int r=0;r<s.length();r++){ while(set.contains(s.charAt(r))){ set.remove(s.charAt(l)); l++; } set.add(s.charAt(r)); maxLen=Math.max(maxLen,r-l+1); } System.out.println(maxLen);"},
    {"question": "Find all permutations of a string", 
     "answer": "void permute(String str,String ans){ if(str.length()==0) System.out.println(ans); for(int i=0;i<str.length();i++){ char ch=str.charAt(i); String ros=str.substring(0,i)+str.substring(i+1); permute(ros,ans+ch); } }"},
    {"question": "Count occurrence of each character in string", 
     "answer": "Map<Character,Integer> m=new HashMap<>(); for(char c:s.toCharArray()) m.put(c,m.getOrDefault(c,0)+1); System.out.println(m);"},

    # Arrays 2D
    {"question": "Declare 2D array in Java", 
     "answer": "int[][] a = new int[3][3]; int[][] b = {{1,2,3},{4,5,6},{7,8,9}};"},
    {"question": "Traverse 2D array", 
     "answer": "for(int i=0;i<a.length;i++) for(int j=0;j<a[i].length;j++) System.out.println(a[i][j]);"},
    {"question": "Sum of 2D array elements", 
     "answer": "int sum=0; for(int i=0;i<a.length;i++) for(int j=0;j<a[i].length;j++) sum+=a[i][j]; System.out.println(sum);"},
    {"question": "Find max in 2D array", 
     "answer": "int max=a[0][0]; for(int i=0;i<a.length;i++) for(int j=0;j<a[i].length;j++) if(a[i][j]>max) max=a[i][j]; System.out.println(max);"},

    # Array + String small programs
    {"question": "Check if two arrays are equal", 
     "answer": "Arrays.equals(a,b);"},
    {"question": "Merge two arrays", 
     "answer": "int[] c = new int[a.length+b.length]; System.arraycopy(a,0,c,0,a.length); System.arraycopy(b,0,c,a.length,b.length);"},
    {"question": "Find duplicate elements in array", 
     "answer": "Set<Integer> s = new HashSet<>(); for(int i:a){ if(!s.add(i)) System.out.println(i); }"},
    {"question": "Find missing number in 1 to n array", 
     "answer": "int total=n*(n+1)/2, sum=0; for(int i:a) sum+=i; System.out.println(total-sum);"},
    
    # End-marker
    {"question": "java arrays strings summary", 
     "answer": "Arrays: declaration, traversal, searching, sorting, reversal, 2D arrays, duplicates, sum, merge. Strings: length, compare, reverse, substring, palindrome, anagram, StringBuilder/Buffer, regex, permutations."}
]

DATA_DIR = Path.cwd().parent / "data" if Path.cwd().name == "notebook" else Path("data")
DATA_DIR.mkdir(parents=True, exist_ok=True)

arrays_strings_path = DATA_DIR / "corpus_java_arrays_strings.csv"
df_as = pd.DataFrame(arrays_strings)
df_as.to_csv(arrays_strings_path, index=False, encoding="utf-8")

master_path = DATA_DIR / "corpus_java.csv"
if master_path.exists():
    df_master = pd.read_csv(master_path)
    combined = pd.concat([df_master, df_as], ignore_index=True)
    combined = combined.drop_duplicates(subset=["question"], keep="first")
    combined.to_csv(master_path, index=False, encoding="utf-8")
else:
    df_as.to_csv(master_path, index=False, encoding="utf-8")

print("Wrote Arrays & Strings to:", arrays_strings_path)
print("Master corpus at:", master_path)
print("Total entries in master (approx):", len(pd.read_csv(master_path)))

Wrote Arrays & Strings to: C:\Users\suraj\Desktop\java_chatbot_project\data\corpus_java_arrays_strings.csv
Master corpus at: C:\Users\suraj\Desktop\java_chatbot_project\data\corpus_java.csv
Total entries in master (approx): 121


In [13]:
import pandas as pd
from pathlib import Path

exceptions_io = [
    # Exception handling basics
    {"question": "What is exception in Java?", 
     "answer": "An exception is an event that disrupts normal flow of the program, e.g., ArithmeticException, NullPointerException."},
    {"question": "Difference between checked and unchecked exceptions", 
     "answer": "Checked: compile-time (IOException, SQLException). Unchecked: runtime (ArithmeticException, NullPointerException)."},
    {"question": "Try-catch example in Java", 
     "answer": "try{ int x=5/0; } catch(ArithmeticException e){ System.out.println(e); } finally{ System.out.println(\"cleanup\"); }"},
    {"question": "Multiple catch blocks example", 
     "answer": "try{ ... } catch(ArithmeticException e){ ... } catch(ArrayIndexOutOfBoundsException e){ ... }"},
    {"question": "Throw keyword example", 
     "answer": "void check(int n){ if(n<0) throw new IllegalArgumentException(\"Negative\"); }"},
    {"question": "Throws keyword example", 
     "answer": "void readFile() throws IOException { FileReader fr = new FileReader(\"in.txt\"); }"},
    {"question": "Custom exception example", 
     "answer": "class MyEx extends Exception{ MyEx(String msg){ super(msg); } } throw new MyEx(\"error\");"},

    # File I/O basics
    {"question": "Read file using FileReader", 
     "answer": "FileReader fr = new FileReader(\"in.txt\"); int ch; while((ch=fr.read())!=-1) System.out.print((char)ch); fr.close();"},
    {"question": "Write file using FileWriter", 
     "answer": "FileWriter fw = new FileWriter(\"out.txt\"); fw.write(\"Hello\"); fw.close();"},
    {"question": "Read file using BufferedReader", 
     "answer": "BufferedReader br = new BufferedReader(new FileReader(\"in.txt\")); String line; while((line=br.readLine())!=null) System.out.println(line); br.close();"},
    {"question": "Write file using BufferedWriter", 
     "answer": "BufferedWriter bw = new BufferedWriter(new FileWriter(\"out.txt\")); bw.write(\"Hello\"); bw.newLine(); bw.close();"},
    {"question": "Try-with-resources for file", 
     "answer": "try(BufferedReader br=new BufferedReader(new FileReader(\"in.txt\"))) { String s; while((s=br.readLine())!=null) System.out.println(s); } catch(IOException e){ e.printStackTrace(); }"},

    # Files API
    {"question": "Read all lines using Files API", 
     "answer": "List<String> lines = Files.readAllLines(Paths.get(\"in.txt\")); for(String l:lines) System.out.println(l);"},
    {"question": "Write all lines using Files API", 
     "answer": "List<String> lines = Arrays.asList(\"A\",\"B\"); Files.write(Paths.get(\"out.txt\"), lines);"},
    {"question": "Check if file exists", 
     "answer": "if(Files.exists(Paths.get(\"file.txt\"))) System.out.println(\"Exists\");"},
    {"question": "Copy file using Files API", 
     "answer": "Files.copy(Paths.get(\"src.txt\"), Paths.get(\"dest.txt\"), StandardCopyOption.REPLACE_EXISTING);"},

    # Serialization / Deserialization
    {"question": "What is serialization in Java?", 
     "answer": "Serialization converts an object into a byte stream for storage or transmission."},
    {"question": "What is deserialization in Java?", 
     "answer": "Deserialization converts a byte stream back into a Java object."},
    {"question": "Serialization example", 
     "answer": "class Person implements Serializable{ String name; }\nPerson p = new Person(); ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(\"p.obj\")); oos.writeObject(p); oos.close();"},
    {"question": "Deserialization example", 
     "answer": "ObjectInputStream ois = new ObjectInputStream(new FileInputStream(\"p.obj\")); Person p = (Person) ois.readObject(); ois.close();"},
    
    # NIO basics
    {"question": "Path example in Java NIO", 
     "answer": "Path path = Paths.get(\"file.txt\"); System.out.println(path.toAbsolutePath());"},
    {"question": "Read file using NIO Files.lines()", 
     "answer": "Files.lines(Paths.get(\"in.txt\")).forEach(System.out::println);"},
    {"question": "Write file using NIO Files.write()", 
     "answer": "List<String> lines = Arrays.asList(\"A\",\"B\"); Files.write(Paths.get(\"out.txt\"), lines);"},
    {"question": "Check if path exists using NIO", 
     "answer": "if(Files.exists(Paths.get(\"file.txt\"))) System.out.println(\"Exists\");"},
    {"question": "Copy file using NIO Files.copy()", 
     "answer": "Files.copy(Paths.get(\"src.txt\"), Paths.get(\"dest.txt\"), StandardCopyOption.REPLACE_EXISTING);"},

    # Exception handling + I/O small programs
    {"question": "Read integers from file and sum", 
     "answer": "BufferedReader br = new BufferedReader(new FileReader(\"nums.txt\")); String line; int sum=0; while((line=br.readLine())!=null) sum+=Integer.parseInt(line); System.out.println(sum); br.close();"},
    {"question": "Handle multiple exceptions in one catch (Java 7+)", 
     "answer": "try{ ... } catch(IOException | NumberFormatException e){ e.printStackTrace(); }"},
    {"question": "Throw exception from method", 
     "answer": "void check(int n) throws IOException { if(n<0) throw new IOException(\"Negative\"); }"},
    {"question": "Finally block example", 
     "answer": "try{ ... } catch(Exception e){ ... } finally{ System.out.println(\"Always executed\"); }"},
    {"question": "Suppress exceptions using try-with-resources", 
     "answer": "try(FileReader fr=new FileReader(\"in.txt\")) { ... } catch(IOException e){ e.printStackTrace(); } // Auto closes resource"},

    # NIO ByteBuffer & Channels
    {"question": "Create ByteBuffer in Java NIO", 
     "answer": "ByteBuffer buffer = ByteBuffer.allocate(1024);"},
    {"question": "Write data to ByteBuffer", 
     "answer": "buffer.put((byte)65); buffer.flip();"},
    {"question": "Read data from ByteBuffer", 
     "answer": "while(buffer.hasRemaining()) System.out.print((char)buffer.get());"},
    {"question": "FileChannel example", 
     "answer": "RandomAccessFile file = new RandomAccessFile(\"file.txt\",\"rw\"); FileChannel channel = file.getChannel(); ByteBuffer buffer = ByteBuffer.allocate(1024); channel.read(buffer); buffer.flip(); while(buffer.hasRemaining()) System.out.print((char)buffer.get()); channel.close(); file.close();"},

    # Miscellaneous
    {"question": "Difference between FileInputStream and FileReader", 
     "answer": "FileInputStream reads bytes, FileReader reads characters."},
    {"question": "Difference between Serializable and Externalizable", 
     "answer": "Serializable: default serialization. Externalizable: custom read/write implementation."},
    {"question": "What is IOException?", 
     "answer": "IOException is a checked exception for failed or interrupted I/O operations."},
    {"question": "What is FileNotFoundException?", 
     "answer": "FileNotFoundException is thrown when a file is not found or inaccessible."},

    # End-marker
    {"question": "java exceptions file io summary", 
     "answer": "Exception handling: try, catch, finally, throw, throws, custom exceptions. File I/O: FileReader/Writer, BufferedReader/Writer, Files API. Serialization & Deserialization. NIO: Path, Files, Channels, ByteBuffer."}
]

DATA_DIR = Path.cwd().parent / "data" if Path.cwd().name == "notebook" else Path("data")
DATA_DIR.mkdir(parents=True, exist_ok=True)

exceptions_io_path = DATA_DIR / "corpus_java_exceptions_io.csv"
df_eio = pd.DataFrame(exceptions_io)
df_eio.to_csv(exceptions_io_path, index=False, encoding="utf-8")

master_path = DATA_DIR / "corpus_java.csv"
if master_path.exists():
    df_master = pd.read_csv(master_path)
    combined = pd.concat([df_master, df_eio], ignore_index=True)
    combined = combined.drop_duplicates(subset=["question"], keep="first")
    combined.to_csv(master_path, index=False, encoding="utf-8")
else:
    df_eio.to_csv(master_path, index=False, encoding="utf-8")

print("Wrote Exceptions & File I/O to:", exceptions_io_path)
print("Master corpus at:", master_path)
print("Total entries in master (approx):", len(pd.read_csv(master_path)))

Wrote Exceptions & File I/O to: C:\Users\suraj\Desktop\java_chatbot_project\data\corpus_java_exceptions_io.csv
Master corpus at: C:\Users\suraj\Desktop\java_chatbot_project\data\corpus_java.csv
Total entries in master (approx): 159


In [15]:
import pandas as pd
from pathlib import Path

collections = [
    # List and ArrayList
    {"question": "What is ArrayList in Java?", 
     "answer": "ArrayList is a resizable array implementation of List interface."},
    {"question": "Create ArrayList example", 
     "answer": "ArrayList<String> list = new ArrayList<>(); list.add(\"A\"); list.add(\"B\");"},
    {"question": "Access elements in ArrayList", 
     "answer": "String s = list.get(0);"},
    {"question": "Remove element from ArrayList", 
     "answer": "list.remove(0);"},
    {"question": "Iterate ArrayList using for-each", 
     "answer": "for(String s : list) System.out.println(s);"},
    
    # LinkedList
    {"question": "What is LinkedList in Java?", 
     "answer": "LinkedList is a doubly-linked list implementation of List and Deque interfaces."},
    {"question": "Add elements to LinkedList", 
     "answer": "LinkedList<Integer> ll = new LinkedList<>(); ll.add(10); ll.addFirst(5); ll.addLast(20);"},
    {"question": "Remove element from LinkedList", 
     "answer": "ll.removeFirst(); ll.removeLast();"},
    
    # Set
    {"question": "What is Set in Java?", 
     "answer": "Set is a collection that contains no duplicate elements."},
    {"question": "HashSet example", 
     "answer": "HashSet<String> hs = new HashSet<>(); hs.add(\"A\"); hs.add(\"B\");"},
    {"question": "TreeSet example", 
     "answer": "TreeSet<Integer> ts = new TreeSet<>(); ts.add(3); ts.add(1); ts.add(2);"},
    
    # Map
    {"question": "What is Map in Java?", 
     "answer": "Map stores key-value pairs; keys are unique."},
    {"question": "HashMap example", 
     "answer": "HashMap<String,Integer> map = new HashMap<>(); map.put(\"A\",1); map.put(\"B\",2);"},
    {"question": "Access value from HashMap", 
     "answer": "int val = map.get(\"A\");"},
    {"question": "TreeMap example", 
     "answer": "TreeMap<String,Integer> tm = new TreeMap<>(); tm.put(\"C\",3); tm.put(\"A\",1);"},
    
    # Queue
    {"question": "Queue interface in Java", 
     "answer": "Queue stores elements in FIFO order; implemented by LinkedList, PriorityQueue."},
    {"question": "PriorityQueue example", 
     "answer": "PriorityQueue<Integer> pq = new PriorityQueue<>(); pq.add(10); pq.add(5);"},
    
    # Deque
    {"question": "Deque in Java", 
     "answer": "Deque is double-ended queue; implemented by LinkedList or ArrayDeque."},
    {"question": "ArrayDeque example", 
     "answer": "ArrayDeque<String> dq = new ArrayDeque<>(); dq.addFirst(\"A\"); dq.addLast(\"B\");"},
    
    # Generics
    {"question": "Generic class example", 
     "answer": "class Box<T>{ T value; void set(T v){ value=v; } T get(){ return value; } }"},
    {"question": "Generic method example", 
     "answer": "static <T> void print(T val){ System.out.println(val); }"},
    {"question": "Bounded generic example", 
     "answer": "class NumberBox<T extends Number>{ T value; }"},
    
    # Iterator
    {"question": "Iterator example for ArrayList", 
     "answer": "Iterator<String> it = list.iterator(); while(it.hasNext()){ System.out.println(it.next()); }"},
    {"question": "Remove elements using Iterator", 
     "answer": "Iterator<String> it = list.iterator(); while(it.hasNext()){ if(it.next().equals(\"A\")) it.remove(); }"},
    
    # Comparator and Comparable
    {"question": "Comparable example", 
     "answer": "class Person implements Comparable<Person>{ int age; public int compareTo(Person p){ return this.age - p.age; } }"},
    {"question": "Comparator example", 
     "answer": "Comparator<Person> cmp = (p1,p2) -> p1.age - p2.age; Collections.sort(list, cmp);"},
    
    # Practical programs
    {"question": "Remove duplicates from ArrayList", 
     "answer": "List<Integer> l = Arrays.asList(1,2,2,3); Set<Integer> s = new HashSet<>(l); l = new ArrayList<>(s);"},
    {"question": "Sort ArrayList of integers", 
     "answer": "Collections.sort(list);"},
    {"question": "Reverse ArrayList", 
     "answer": "Collections.reverse(list);"},
    {"question": "Find max in ArrayList", 
     "answer": "int max = Collections.max(list);"},
    {"question": "Find min in ArrayList", 
     "answer": "int min = Collections.min(list);"},
    {"question": "Convert array to ArrayList", 
     "answer": "Integer[] arr = {1,2,3}; List<Integer> list = new ArrayList<>(Arrays.asList(arr));"},
    {"question": "Convert ArrayList to array", 
     "answer": "Integer[] arr = list.toArray(new Integer[0]);"},
    
    # Map small programs
    {"question": "Count frequency of elements using HashMap", 
     "answer": "Map<Integer,Integer> freq = new HashMap<>(); for(int x:arr) freq.put(x,freq.getOrDefault(x,0)+1);"},
    {"question": "Merge two HashMaps", 
     "answer": "map1.putAll(map2);"},
    {"question": "Check if key exists in Map", 
     "answer": "if(map.containsKey(\"A\")) System.out.println(\"Exists\");"},
    
    # Set small programs
    {"question": "Union of two sets", 
     "answer": "Set<Integer> set1 = new HashSet<>(Arrays.asList(1,2)); Set<Integer> set2 = new HashSet<>(Arrays.asList(2,3)); set1.addAll(set2);"},
    {"question": "Intersection of two sets", 
     "answer": "set1.retainAll(set2);"},
    {"question": "Difference of two sets", 
     "answer": "set1.removeAll(set2);"},
    
    # Queue/Deque small programs
    {"question": "Peek and Poll from Queue", 
     "answer": "int head = queue.peek(); int removed = queue.poll();"},
    {"question": "Add and remove from Deque", 
     "answer": "deque.addFirst(1); deque.addLast(2); deque.removeFirst(); deque.removeLast();"},
    
    # Misc
    {"question": "Collections.shuffle example", 
     "answer": "Collections.shuffle(list);"},
    {"question": "Collections.sort with Comparator", 
     "answer": "Collections.sort(list, (a,b)->b-a);"},
    {"question": "Convert List to Set", 
     "answer": "Set<Integer> s = new HashSet<>(list);"},
    {"question": "Convert Set to List", 
     "answer": "List<Integer> l = new ArrayList<>(set);"},
    
    # End-marker
    {"question": "java collections summary", 
     "answer": "Java Collections: List (ArrayList, LinkedList), Set (HashSet, TreeSet), Map (HashMap, TreeMap), Queue, Deque. Generics, Iterators, Comparable, Comparator, Collections utility methods."}
]

DATA_DIR = Path.cwd().parent / "data" if Path.cwd().name == "notebook" else Path("data")
DATA_DIR.mkdir(parents=True, exist_ok=True)

collections_path = DATA_DIR / "corpus_java_collections.csv"
df_col = pd.DataFrame(collections)
df_col.to_csv(collections_path, index=False, encoding="utf-8")

master_path = DATA_DIR / "corpus_java.csv"
if master_path.exists():
    df_master = pd.read_csv(master_path)
    combined = pd.concat([df_master, df_col], ignore_index=True)
    combined = combined.drop_duplicates(subset=["question"], keep="first")
    combined.to_csv(master_path, index=False, encoding="utf-8")
else:
    df_col.to_csv(master_path, index=False, encoding="utf-8")

print("Wrote Collections & Generics to:", collections_path)
print("Master corpus at:", master_path)
print("Total entries in master (approx):", len(pd.read_csv(master_path)))

Wrote Collections & Generics to: C:\Users\suraj\Desktop\java_chatbot_project\data\corpus_java_collections.csv
Master corpus at: C:\Users\suraj\Desktop\java_chatbot_project\data\corpus_java.csv
Total entries in master (approx): 200


In [17]:
import pandas as pd
from pathlib import Path

multithreading = [
    # Basics
    {"question": "What is multithreading in Java?", 
     "answer": "Multithreading is a process of executing multiple threads simultaneously to improve performance."},
    {"question": "What is a thread?", 
     "answer": "A thread is a lightweight sub-process that shares memory but executes independently."},
    {"question": "How to create a thread in Java?", 
     "answer": "By extending Thread class or implementing Runnable interface."},
    {"question": "Example using Thread class", 
     "answer": "class MyThread extends Thread { public void run(){ System.out.println(\"Running\"); } } new MyThread().start();"},
    {"question": "Example using Runnable interface", 
     "answer": "class MyRunnable implements Runnable { public void run(){ System.out.println(\"Running\"); } } new Thread(new MyRunnable()).start();"},
    
    # Thread methods
    {"question": "Difference between start() and run()", 
     "answer": "start() creates a new thread and calls run(); run() executes in the current thread."},
    {"question": "Thread sleep example", 
     "answer": "Thread.sleep(1000); // pauses for 1 second"},
    {"question": "Thread join example", 
     "answer": "t1.start(); t1.join(); // waits for t1 to finish"},
    {"question": "Thread yield example", 
     "answer": "Thread.yield(); // gives chance to other threads"},
    {"question": "Thread priority in Java", 
     "answer": "Threads have priority 1 (MIN) to 10 (MAX), default 5 (NORM)."},
    
    # Synchronization
    {"question": "What is synchronization in Java?", 
     "answer": "Synchronization ensures that only one thread accesses a shared resource at a time."},
    {"question": "Synchronized method example", 
     "answer": "synchronized void print(){ System.out.println(\"Hello\"); }"},
    {"question": "Synchronized block example", 
     "answer": "synchronized(this){ System.out.println(\"Safe block\"); }"},
    {"question": "Static synchronized method", 
     "answer": "static synchronized void demo(){ System.out.println(\"Static lock\"); }"},
    
    # Inter-thread communication
    {"question": "What is inter-thread communication?", 
     "answer": "It allows threads to communicate using wait(), notify(), and notifyAll()."},
    {"question": "wait(), notify(), notifyAll() example", 
     "answer": "synchronized(obj){ obj.wait(); obj.notify(); obj.notifyAll(); }"},
    {"question": "Why use wait() in synchronized block?", 
     "answer": "Because it releases the lock and allows other threads to acquire it."},
    
    # Thread lifecycle
    {"question": "Thread lifecycle states", 
     "answer": "New, Runnable, Running, Blocked/Waiting, Terminated."},
    {"question": "Check if thread is alive", 
     "answer": "t1.isAlive();"},
    {"question": "Stop a thread", 
     "answer": "Use a flag instead of deprecated stop(). Example: while(running){ ... }"},
    
    # Daemon threads
    {"question": "What is daemon thread?", 
     "answer": "Daemon threads run in the background and terminate when all user threads finish."},
    {"question": "Example of daemon thread", 
     "answer": "Thread t = new Thread(task); t.setDaemon(true); t.start();"},
    
    # Thread groups
    {"question": "What is ThreadGroup?", 
     "answer": "A ThreadGroup represents a group of related threads."},
    {"question": "Create ThreadGroup example", 
     "answer": "ThreadGroup tg = new ThreadGroup(\"MyGroup\"); Thread t1 = new Thread(tg, task);"},
    
    # ReentrantLock
    {"question": "What is ReentrantLock in Java?", 
     "answer": "A lock that allows threads to re-enter the same lock multiple times."},
    {"question": "ReentrantLock example", 
     "answer": "Lock lock = new ReentrantLock(); lock.lock(); try{...} finally{lock.unlock();}"},
    
    # Executor Framework
    {"question": "What is Executor framework?", 
     "answer": "It provides a high-level API for managing threads using ThreadPool."},
    {"question": "ExecutorService example", 
     "answer": "ExecutorService ex = Executors.newFixedThreadPool(2); ex.execute(new Task()); ex.shutdown();"},
    {"question": "Callable and Future example", 
     "answer": "Future<Integer> f = ex.submit(() -> 10); int res = f.get();"},
    {"question": "SingleThreadExecutor example", 
     "answer": "ExecutorService ex = Executors.newSingleThreadExecutor(); ex.execute(task); ex.shutdown();"},
    
    # Concurrency Utilities
    {"question": "What is CountDownLatch?", 
     "answer": "A synchronization aid that allows threads to wait until a set of operations complete."},
    {"question": "CountDownLatch example", 
     "answer": "CountDownLatch latch = new CountDownLatch(3); latch.countDown(); latch.await();"},
    {"question": "What is Semaphore?", 
     "answer": "Semaphore controls number of threads accessing a resource."},
    {"question": "Semaphore example", 
     "answer": "Semaphore sem = new Semaphore(2); sem.acquire(); sem.release();"},
    {"question": "What is CyclicBarrier?", 
     "answer": "A barrier that allows threads to wait for each other to reach a common point."},
    {"question": "CyclicBarrier example", 
     "answer": "CyclicBarrier cb = new CyclicBarrier(3); cb.await();"},
    
    # ThreadPool examples
    {"question": "Fixed Thread Pool example", 
     "answer": "ExecutorService pool = Executors.newFixedThreadPool(4); for(int i=0;i<5;i++) pool.execute(task);"},
    {"question": "Cached Thread Pool example", 
     "answer": "ExecutorService pool = Executors.newCachedThreadPool(); pool.execute(task);"},
    {"question": "Scheduled Thread Pool example", 
     "answer": "ScheduledExecutorService ses = Executors.newScheduledThreadPool(1); ses.schedule(task, 5, TimeUnit.SECONDS);"},
    
    # Advanced concurrency
    {"question": "What is ConcurrentHashMap?", 
     "answer": "ConcurrentHashMap allows concurrent access by multiple threads without synchronization on whole map."},
    {"question": "ConcurrentHashMap example", 
     "answer": "ConcurrentHashMap<String,Integer> map = new ConcurrentHashMap<>(); map.put(\"A\",1);"},
    {"question": "What is AtomicInteger?", 
     "answer": "AtomicInteger provides atomic operations on int values."},
    {"question": "AtomicInteger example", 
     "answer": "AtomicInteger count = new AtomicInteger(0); count.incrementAndGet();"},
    {"question": "What is ThreadLocal?", 
     "answer": "ThreadLocal provides thread-local variables unique to each thread."},
    {"question": "ThreadLocal example", 
     "answer": "ThreadLocal<Integer> tl = ThreadLocal.withInitial(() -> 0); tl.set(10);"},
    
    # Practical programs
    {"question": "Print numbers from 1 to 5 using two threads alternately", 
     "answer": "Use wait() and notify() to alternate between even and odd threads."},
    {"question": "Demonstrate thread synchronization with shared counter", 
     "answer": "synchronized void increment(){ count++; }"},
    {"question": "Demonstrate race condition", 
     "answer": "If two threads modify same variable without sync, result may be inconsistent."},
    
    # Summary
    {"question": "Java concurrency summary", 
     "answer": "Java multithreading involves Thread, Runnable, Executor framework, synchronization, locks, wait/notify, and concurrent collections."}
]

DATA_DIR = Path.cwd().parent / "data" if Path.cwd().name == "notebook" else Path("data")
DATA_DIR.mkdir(parents=True, exist_ok=True)

multi_path = DATA_DIR / "corpus_java_multithreading.csv"
df_multi = pd.DataFrame(multithreading)
df_multi.to_csv(multi_path, index=False, encoding="utf-8")

master_path = DATA_DIR / "corpus_java.csv"
if master_path.exists():
    df_master = pd.read_csv(master_path)
    combined = pd.concat([df_master, df_multi], ignore_index=True)
    combined = combined.drop_duplicates(subset=["question"], keep="first")
    combined.to_csv(master_path, index=False, encoding="utf-8")
else:
    df_multi.to_csv(master_path, index=False, encoding="utf-8")

print("Wrote Multithreading & Concurrency to:", multi_path)
print("Master corpus at:", master_path)
print("Total entries in master (approx):", len(pd.read_csv(master_path)))

Wrote Multithreading & Concurrency to: C:\Users\suraj\Desktop\java_chatbot_project\data\corpus_java_multithreading.csv
Master corpus at: C:\Users\suraj\Desktop\java_chatbot_project\data\corpus_java.csv
Total entries in master (approx): 249


In [23]:
import pandas as pd
from pathlib import Path

BASE_DIR = Path(r"C:\Users\suraj\Desktop\java_chatbot_project")
DATA_DIR = BASE_DIR / "data"
DATA_DIR.mkdir(parents=True, exist_ok=True)

MASTER_CORPUS = DATA_DIR / "corpus_java.csv"

file_jdbc_network = [
    {"question": "What is file handling in Java?",
     "answer": "File handling allows Java programs to read and write data to files using java.io and java.nio packages."},

    {"question": "Which package is used for file handling?",
     "answer": "java.io and java.nio packages are used for file handling."},

    {"question": "How to create a file in Java?",
     "answer": "File f = new File(\"demo.txt\"); f.createNewFile();"},

    {"question": "How to check if file exists?",
     "answer": "File f = new File(\"data.txt\"); if(f.exists()) System.out.println(\"Exists\");"},

    {"question": "How to delete a file?",
     "answer": "File f = new File(\"old.txt\"); f.delete();"},

    {"question": "How to write to a file using FileWriter?",
     "answer": "FileWriter fw = new FileWriter(\"out.txt\"); fw.write(\"Hello\"); fw.close();"},

    {"question": "How to read from file using FileReader?",
     "answer": "FileReader fr = new FileReader(\"data.txt\"); int ch; while((ch=fr.read())!=-1) System.out.print((char)ch); fr.close();"},

    {"question": "BufferedReader example",
     "answer": "BufferedReader br = new BufferedReader(new FileReader(\"data.txt\")); String line; while((line=br.readLine())!=null) System.out.println(line); br.close();"},

    {"question": "BufferedWriter example",
     "answer": "BufferedWriter bw = new BufferedWriter(new FileWriter(\"out.txt\")); bw.write(\"Hi\"); bw.newLine(); bw.close();"},

    {"question": "Read file using Scanner",
     "answer": "Scanner sc = new Scanner(new File(\"data.txt\")); while(sc.hasNextLine()) System.out.println(sc.nextLine()); sc.close();"},

    {"question": "What is JDBC?",
     "answer": "JDBC (Java Database Connectivity) is an API that allows Java programs to connect and execute queries on databases."},

    {"question": "Steps to connect JDBC to database",
     "answer": "Load driver, create connection, create statement, execute query, close connection."},

    {"question": "JDBC connection example",
     "answer": "Connection con = DriverManager.getConnection(\"jdbc:mysql://localhost:3306/db\",\"user\",\"pass\");"},

    {"question": "PreparedStatement example",
     "answer": "PreparedStatement ps = con.prepareStatement(\"INSERT INTO emp VALUES (?,?)\"); ps.setInt(1,1); ps.setString(2,\"John\"); ps.executeUpdate();"},

    {"question": "What is networking in Java?",
     "answer": "Java networking allows communication between computers using the java.net package."},

    {"question": "Socket example",
     "answer": "Socket s = new Socket(\"localhost\", 6666);"},

    {"question": "ServerSocket example",
     "answer": "ServerSocket ss = new ServerSocket(6666); Socket s = ss.accept();"},

    {"question": "What is HttpURLConnection?",
     "answer": "HttpURLConnection is used to send HTTP requests and receive responses."},

    {"question": "TCP vs UDP in Java",
     "answer": "TCP is connection-oriented using Socket/ServerSocket, UDP is connectionless using DatagramSocket."}
]


category_path = DATA_DIR / "corpus_java_file_jdbc_network.csv"
df_new = pd.DataFrame(file_jdbc_network)
df_new.to_csv(category_path, index=False, encoding="utf-8")

if MASTER_CORPUS.exists():
    df_master = pd.read_csv(MASTER_CORPUS)
    df_final = pd.concat([df_master, df_new], ignore_index=True)
    df_final = df_final.drop_duplicates(subset=["question"], keep="first")
else:
    df_final = df_new

df_final.to_csv(MASTER_CORPUS, index=False, encoding="utf-8")

print("Wrote File/JDBC/Networking to:", category_path)
print("Master corpus at:", MASTER_CORPUS)
print("Total entries in master:", len(pd.read_csv(MASTER_CORPUS)))

Wrote File/JDBC/Networking to: C:\Users\suraj\Desktop\java_chatbot_project\data\corpus_java_file_jdbc_network.csv
Master corpus at: C:\Users\suraj\Desktop\java_chatbot_project\data\corpus_java.csv
Total entries in master: 268


In [29]:
import pandas as pd
from pathlib import Path

BASE_DIR = Path(r"C:\Users\suraj\Desktop\java_chatbot_project")
DATA_DIR = BASE_DIR / "data"
DATA_DIR.mkdir(parents=True, exist_ok=True)

MASTER_CORPUS = DATA_DIR / "corpus_java.csv"

data_spring_hibernate = [
    ("What is Spring Framework?", "Spring is a lightweight Java framework used to build enterprise applications through dependency injection and aspect-oriented programming."),
    ("What is Dependency Injection in Spring?", "Dependency Injection (DI) allows Spring to automatically provide required objects instead of manually creating them."),
    ("What is Inversion of Control (IoC)?", "IoC is a design principle where object creation and management are delegated to the Spring container."),
    ("What are Spring Beans?", "Beans are objects that form the backbone of a Spring application and are managed by the Spring IoC container."),
    ("What is @Autowired used for?", "@Autowired automatically injects dependencies by type without explicit configuration."),
    ("Explain @Component, @Service, and @Repository.", "@Component is generic, @Service is for business logic, and @Repository is for database operations."),
    ("What is ApplicationContext in Spring?", "ApplicationContext is the central interface for providing configuration information to the Spring container."),
    ("Explain @SpringBootApplication.", "@SpringBootApplication combines @Configuration, @EnableAutoConfiguration, and @ComponentScan annotations."),
    ("What is Spring Boot?", "Spring Boot simplifies application setup by providing defaults and auto-configuration."),
    ("What is DispatcherServlet?", "DispatcherServlet is the front controller in Spring MVC that handles all incoming HTTP requests."),
    ("What is @RestController?", "@RestController combines @Controller and @ResponseBody to create RESTful web services."),
    ("What is @RequestMapping?", "@RequestMapping maps web requests to specific controller methods."),
    ("Explain @PathVariable annotation.", "@PathVariable extracts values from URI paths and binds them to method parameters."),
    ("Explain @RequestParam annotation.", "@RequestParam extracts query parameters from request URLs."),
    ("What is @RequestBody annotation?", "@RequestBody maps the HTTP request body to a Java object."),
    ("What is @ResponseBody annotation?", "@ResponseBody binds the return value of a method to the HTTP response body."),
    ("How to handle exceptions in Spring Boot?", "Use @ControllerAdvice and @ExceptionHandler annotations for global exception handling."),
    ("What is @Transactional?", "@Transactional manages transaction boundaries automatically for methods."),
    ("Explain Spring Data JPA.", "Spring Data JPA simplifies data access by providing repository interfaces for CRUD operations."),
    ("What is JpaRepository?", "JpaRepository extends CrudRepository and adds JPA-specific methods like pagination and flushing."),
    ("What is @Entity in Hibernate?", "@Entity marks a class as a persistent entity representing a database table."),
    ("Explain @Id annotation.", "@Id marks a field as the primary key in a JPA entity."),
    ("Explain @GeneratedValue.", "@GeneratedValue defines the strategy for primary key generation (AUTO, IDENTITY, SEQUENCE)."),
    ("What is Hibernate?", "Hibernate is an ORM framework that maps Java objects to database tables."),
    ("What is SessionFactory?", "SessionFactory creates Session objects and manages the lifecycle of database connections."),
    ("Difference between get() and load() in Hibernate?", "get() returns null if object not found; load() throws ObjectNotFoundException."),
    ("What is lazy loading?", "Lazy loading defers fetching of related data until it is actually accessed."),
    ("What is eager loading?", "Eager loading loads all related data immediately."),
    ("Explain first-level and second-level caching in Hibernate.", "First-level cache is per session; second-level cache is shared across sessions."),
    ("What is @Table annotation?", "@Table defines the database table name for the entity."),
    ("What is @JoinColumn used for?", "@JoinColumn specifies the foreign key column in an association mapping."),
    ("Explain CascadeType.", "CascadeType defines how operations propagate from parent to child entities."),
    ("What is HQL?", "HQL (Hibernate Query Language) is object-oriented and works with entities instead of tables."),
    ("What is JPQL?", "JPQL (Java Persistence Query Language) is similar to SQL but operates on entity objects."),
    ("What is EntityManager?", "EntityManager manages persistence operations like persist, find, merge, and remove."),
    ("Explain OneToMany mapping.", "@OneToMany defines a one-to-many relationship between entities."),
    ("Explain ManyToOne mapping.", "@ManyToOne defines a many-to-one relationship from child to parent entity."),
    ("What is optimistic locking?", "Optimistic locking prevents concurrent update conflicts using version fields."),
    ("What is @EnableJpaRepositories?", "@EnableJpaRepositories enables scanning for JPA repositories."),
    ("What is application.properties used for?", "It is used to define configuration values like database URL, username, and password."),
    ("What is the purpose of data.sql in Spring Boot?", "data.sql is automatically executed at startup to initialize the database with sample data."),
    ("What is Liquibase or Flyway?", "They are tools used for managing database schema versioning and migrations."),
    ("How to test REST APIs in Spring Boot?", "Use @SpringBootTest with MockMvc or RestTemplate for integration testing."),
    ("What is the role of Tomcat in Spring Boot?", "Spring Boot includes an embedded Tomcat server to run web applications directly."),
    ("What is the default port for Spring Boot?", "Spring Boot runs on port 8080 by default."),
    ("How do you change the Spring Boot server port?", "Use 'server.port=9090' in application.properties."),
    ("Explain difference between CrudRepository and JpaRepository.", "JpaRepository extends CrudRepository and adds more JPA-related methods."),
    ("What is @ControllerAdvice used for?", "@ControllerAdvice defines global exception handling and data binding."),
    ("What is the purpose of application.yml?", "application.yml is an alternative to properties file, allowing structured configuration."),
    ("How to secure Spring Boot API?", "Use Spring Security with annotations like @EnableWebSecurity and configure authentication.")
]

df_new = pd.DataFrame(data_spring_hibernate, columns=["question", "answer"])

category_path = DATA_DIR / "corpus_java_spring_hibernate.csv"
df_new.to_csv(category_path, index=False, encoding="utf-8")

if MASTER_CORPUS.exists():
    df_master = pd.read_csv(MASTER_CORPUS)
    df_final = pd.concat([df_master, df_new], ignore_index=True)
    df_final.drop_duplicates(subset=["question"], inplace=True)
else:
    df_final = df_new

temp_path = MASTER_CORPUS.with_suffix(".tmp")
df_final.to_csv(temp_path, index=False, encoding="utf-8")
temp_path.replace(MASTER_CORPUS)

print("Wrote Spring & Hibernate to:", category_path)
print("Master corpus at:", MASTER_CORPUS)
print("Total entries in master:", len(pd.read_csv(MASTER_CORPUS)))


Wrote Spring & Hibernate to: C:\Users\suraj\Desktop\java_chatbot_project\data\corpus_java_spring_hibernate.csv
Master corpus at: C:\Users\suraj\Desktop\java_chatbot_project\data\corpus_java.csv
Total entries in master: 318


In [31]:
import pandas as pd
from pathlib import Path

BASE_DIR = Path(r"C:\Users\suraj\Desktop\java_chatbot_project")
DATA_DIR = BASE_DIR / "data"
DATA_DIR.mkdir(parents=True, exist_ok=True)

MASTER_CORPUS = DATA_DIR / "corpus_java.csv"
data_advanced_java = [
    ("What is a Design Pattern?", "A design pattern is a general reusable solution to a commonly occurring problem in software design."),
    ("Explain Singleton Pattern.", "Singleton ensures only one instance of a class exists throughout the application."),
    ("Explain Factory Pattern.", "Factory pattern creates objects without exposing the creation logic to the client."),
    ("What is Builder Pattern?", "Builder pattern constructs complex objects step by step."),
    ("Explain Prototype Pattern.", "Prototype pattern creates new objects by cloning existing ones."),
    ("What is Adapter Pattern?", "Adapter converts the interface of a class into another interface clients expect."),
    ("What is Observer Pattern?", "Observer allows objects to subscribe and get notified when another object changes state."),
    ("What is Strategy Pattern?", "Strategy allows selecting an algorithmâ€™s behavior at runtime."),
    ("What is Decorator Pattern?", "Decorator adds behavior to objects dynamically without altering their structure."),
    ("What is Command Pattern?", "Command encapsulates a request as an object, allowing parameterization of clients."),
    ("What is Proxy Pattern?", "Proxy provides a surrogate object that controls access to another object."),
    ("Explain MVC Pattern.", "MVC separates an application into Model, View, and Controller for better organization."),
    ("What is DAO Pattern?", "DAO abstracts database interactions, allowing data access logic to be separate from business logic."),
    ("Explain DTO Pattern.", "DTO is used to transfer data between layers without exposing entities directly."),
    ("What is Template Method Pattern?", "Template Method defines a skeleton of an algorithm and lets subclasses fill in specific steps."),
    ("What are Lambda Expressions?", "Lambdas allow defining anonymous methods with concise syntax."),
    ("What is Functional Interface?", "A functional interface contains exactly one abstract method."),
    ("What is Stream API?", "Stream API provides a functional way to process collections using map, filter, and reduce."),
    ("Explain Optional class.", "Optional helps avoid null pointer exceptions by providing a container object."),
    ("What are default methods in interfaces?", "Default methods allow adding new methods to interfaces without breaking implementations."),
    ("Explain method references.", "Method references provide shorthand for calling methods using '::' syntax."),
    ("What is map() in streams?", "map() transforms each element of a stream and returns a new stream."),
    ("What is flatMap() in streams?", "flatMap() flattens nested collections in a stream."),
    ("What are terminal operations in streams?", "Terminal operations include collect(), count(), forEach(), reduce(), etc."),
    ("Explain CompletableFuture.", "CompletableFuture allows asynchronous programming with non-blocking calls."),
    ("Explain Optional.orElse().", "orElse() returns a default value if Optional is empty."),
    ("What is JUnit?", "JUnit is a Java testing framework for unit testing."),
    ("What is @Test annotation?", "@Test marks a method as a test case in JUnit."),
    ("What is Mockito?", "Mockito is a mocking framework used to simulate dependencies in tests."),
    ("Explain assertEquals() in JUnit.", "assertEquals() checks whether two values are equal in a test."),
    ("What is TestNG?", "TestNG is a testing framework that provides advanced features like dependency testing."),
    ("What is Maven?", "Maven is a build automation tool that manages project dependencies and lifecycle."),
    ("What is Gradle?", "Gradle is a build tool that uses Groovy/Kotlin DSL for flexible build scripts."),
    ("What is Jenkins?", "Jenkins automates builds, testing, and deployment in CI/CD pipelines."),
    ("What is Git?", "Git is a distributed version control system for tracking code changes."),
    ("What is GitHub?", "GitHub is a cloud-based platform for hosting and collaborating on Git repositories."),
    ("Explain version control.", "Version control tracks changes and allows multiple developers to work on code simultaneously."),
    ("What is reflection in Java?", "Reflection allows inspection and modification of classes and methods at runtime."),
    ("What is annotation processing?", "Annotation processing generates code or performs validation at compile time."),
    ("What is garbage collection?", "Garbage collection automatically removes unused objects from memory."),
    ("What is volatile keyword?", "volatile ensures variable visibility across threads."),
    ("What is synchronization?", "Synchronization ensures mutual exclusion in multithreaded environments."),
    ("What is ExecutorService?", "ExecutorService manages and executes threads asynchronously."),
    ("What is ForkJoinPool?", "ForkJoinPool executes tasks in parallel using work-stealing algorithm."),
    ("What is parallelStream()?", "parallelStream() processes stream elements in parallel for faster performance."),
    ("What is collect() in streams?", "collect() gathers stream results into a collection or value."),
    ("What is CompletableFuture.thenApply()?", "thenApply() transforms the result of a CompletableFuture when it completes."),
    ("What is the difference between Collection and Stream?", "Collection stores data; Stream processes data in a functional way."),
    ("Explain Predicate interface.", "Predicate is a functional interface used to test a condition and return boolean."),
    ("Explain Supplier interface.", "Supplier is a functional interface that returns a value without taking any input.")
]
df_new = pd.DataFrame(data_advanced_java, columns=["question", "answer"])

category_path = DATA_DIR / "data_advanced_java.csv"
df_new.to_csv(category_path, index=False, encoding="utf-8")

if MASTER_CORPUS.exists():
    df_master = pd.read_csv(MASTER_CORPUS)
    df_final = pd.concat([df_master, df_new], ignore_index=True)
    df_final.drop_duplicates(subset=["question"], inplace=True)
else:
    df_final = df_new

temp_path = MASTER_CORPUS.with_suffix(".tmp")
df_final.to_csv(temp_path, index=False, encoding="utf-8")
temp_path.replace(MASTER_CORPUS)

print("Wrote Spring & Hibernate to:", category_path)
print("Master corpus at:", MASTER_CORPUS)
print("Total entries in master:", len(pd.read_csv(MASTER_CORPUS)))

Wrote Spring & Hibernate to: C:\Users\suraj\Desktop\java_chatbot_project\data\data_advanced_java.csv
Master corpus at: C:\Users\suraj\Desktop\java_chatbot_project\data\corpus_java.csv
Total entries in master: 368


In [33]:
import json
from pathlib import Path

BASE_DIR = Path(r"C:\Users\suraj\Desktop\java_chatbot_project")
DATA_DIR = BASE_DIR / "data"
DATA_DIR.mkdir(parents=True, exist_ok=True)

errors = {
    # 1. SYNTAX ERRORS
    "missing_semicolon": {
        "pattern": r"';' expected",
        "title": "Missing Semicolon",
        "explanation": "Every Java statement must end with a semicolon. The compiler throws this when it encounters a missing one.",
        "fix_example": "Add a semicolon: System.out.println(\"Hello\");"
    },
    "unmatched_parentheses": {
        "pattern": r"')' expected",
        "title": "Unmatched Parentheses",
        "explanation": "A closing parenthesis is missing or misplaced.",
        "fix_example": "Ensure every '(' has a matching ')'."
    },
    "unmatched_curly_braces": {
        "pattern": r"reached end of file while parsing",
        "title": "Unmatched Curly Braces",
        "explanation": "A closing curly brace '}' is missing for a class, method, or block.",
        "fix_example": "Match all '{' with corresponding '}'."
    },
    "unmatched_square_brackets": {
        "pattern": r"'\\]' expected",
        "title": "Unmatched Square Brackets",
        "explanation": "A closing square bracket ']' is missing in array or index syntax.",
        "fix_example": "Ensure every '[' has a corresponding ']'."
    },
    "invalid_identifier": {
        "pattern": r"not a statement",
        "title": "Invalid Identifier",
        "explanation": "An identifier is invalid or used incorrectly, causing a syntax issue.",
        "fix_example": "Avoid using keywords or illegal characters as identifiers."
    },
    "keyword_misuse": {
        "pattern": r"illegal start of expression",
        "title": "Keyword Misuse",
        "explanation": "A keyword is used where an expression or identifier is expected.",
        "fix_example": "Check if-else, for, class, and other keywords are used in proper contexts."
    },
    "illegal_start_of_expression": {
        "pattern": r"illegal start of expression",
        "title": "Illegal Start of Expression",
        "explanation": "Occurs when a statement is misplaced or a keyword is used incorrectly.",
        "fix_example": "Ensure statements are within methods or blocks and follow Java syntax."
    },
    "illegal_start_of_type": {
        "pattern": r"illegal start of type",
        "title": "Illegal Start of Type",
        "explanation": "Class or interface definitions contain invalid modifiers or misplaced elements.",
        "fix_example": "Check class-level declarations and remove misplaced tokens."
    },
    "expected_token": {
        "pattern": r"expected",
        "title": "Expected Token Error",
        "explanation": "The compiler expected a specific symbol (like ')', '}', ';') but did not find it.",
        "fix_example": "Read the error message to see which symbol is expected and insert it."
    },
    "reached_end_of_file_while_parsing": {
        "pattern": r"reached end of file while parsing",
        "title": "Reached End of File While Parsing",
        "explanation": "The compiler hit the end of the file before completing parsing.",
        "fix_example": "Check for missing closing braces, parentheses, or unfinished statements."
    },
    "class_interface_enum_expected": {
        "pattern": r"class, interface, enum expected",
        "title": "Class, Interface, or Enum Expected",
        "explanation": "Top-level code must be a class, interface, or enum declaration.",
        "fix_example": "Move executable statements inside a class or method."
    },
    "method_declaration_syntax_error": {
        "pattern": r"invalid method declaration",
        "title": "Method Declaration Syntax Error",
        "explanation": "Method declaration syntax is incorrect.",
        "fix_example": "Use: [modifiers] returnType methodName(params) { ... }"
    },
    "variable_declaration_syntax_error": {
        "pattern": r"not a statement",
        "title": "Variable Declaration Syntax Error",
        "explanation": "Variable declarations are malformed or placed incorrectly.",
        "fix_example": "Use: type name = value; inside a method or class."
    },
    "invalid_method_signature": {
        "pattern": r"invalid method declaration; return type required",
        "title": "Invalid Method Signature",
        "explanation": "Method declaration is missing a valid return type or name.",
        "fix_example": "Specify a return type and a legal method name."
    },
    "incorrect_access_modifier_usage": {
        "pattern": r"modifier (public|private|protected) not allowed here",
        "title": "Incorrect Access Modifier Usage",
        "explanation": "An access modifier is used in a context where it is not allowed.",
        "fix_example": "Use access modifiers only in class, method, or field declarations."
    },
    "duplicate_modifier": {
        "pattern": r"repeated modifier",
        "title": "Duplicate Modifier",
        "explanation": "A modifier such as public, static, or final is repeated unnecessarily.",
        "fix_example": "Remove duplicate modifiers, e.g., 'public public class Test' is invalid."
    },
    "invalid_modifier": {
        "pattern": r"modifier .* not allowed here",
        "title": "Invalid Modifier",
        "explanation": "A modifier is being used where it is not permitted.",
        "fix_example": "Check Java rules for where modifiers like static, abstract, final are allowed."
    },
    "static_context_syntax_error": {
        "pattern": r"non-static .* cannot be referenced from a static context",
        "title": "Static Context Syntax Error",
        "explanation": "Instance members are used directly inside a static context.",
        "fix_example": "Create an instance or make members static if appropriate."
    },
    "return_type_missing": {
        "pattern": r"invalid method declaration; return type required",
        "title": "Return Type Missing",
        "explanation": "A return type is missing in method declaration.",
        "fix_example": "Add a return type, e.g., void or an appropriate type."
    },
    "invalid_return_statement_syntax": {
        "pattern": r"'return' outside method",
        "title": "Invalid Return Statement",
        "explanation": "A return statement appears outside a method or constructor.",
        "fix_example": "Ensure return is only inside methods or constructors."
    },
    "constructor_syntax_error": {
        "pattern": r"invalid method declaration; return type required",
        "title": "Constructor Syntax Error",
        "explanation": "Constructor is incorrectly declared with a return type or wrong name.",
        "fix_example": "Constructor must have same name as class and no return type."
    },
    "package_declaration_syntax_error": {
        "pattern": r"unexpected type",
        "title": "Package Declaration Syntax Error",
        "explanation": "The package declaration may be malformed or not at the top of the file.",
        "fix_example": "Use: package com.example; as the first non-comment line."
    },
    "import_statement_syntax_error": {
        "pattern": r"cannot find symbol\s*symbol:\s*class",
        "title": "Import Statement Syntax Error",
        "explanation": "The import statement refers to a non-existing or incorrect package/class.",
        "fix_example": "Verify package and class names in import statements."
    },
    "invalid_statement_placement": {
        "pattern": r"class, interface, enum expected",
        "title": "Invalid Statement Placement",
        "explanation": "Statements are placed where only declarations are allowed.",
        "fix_example": "Move executable code into methods or blocks."
    },
    "unterminated_string_literal": {
        "pattern": r"unclosed string literal",
        "title": "Unterminated String Literal",
        "explanation": "A string literal is missing the closing quote.",
        "fix_example": "Ensure all string literals start and end with double quotes."
    },
    "invalid_escape_sequence": {
        "pattern": r"illegal escape character",
        "title": "Invalid Escape Sequence",
        "explanation": "An invalid escape sequence is used in a string or char literal.",
        "fix_example": "Use valid escapes like \\n, \\t, \\\\\" or \\\\."
    },
    "invalid_character_literal": {
        "pattern": r"unclosed character literal",
        "title": "Invalid Character Literal",
        "explanation": "A character literal is not properly formed.",
        "fix_example": "Use single quotes around a single character, e.g., 'a'."
    },
    "numeric_literal_format_error": {
        "pattern": r"integer number too large|malformed floating point literal",
        "title": "Numeric Literal Format Error",
        "explanation": "A numeric literal is out of range or incorrectly formatted.",
        "fix_example": "Check for correct use of digits, underscores, and suffixes like L or F."
    },
    "array_declaration_syntax_error": {
        "pattern": r"illegal start of expression",
        "title": "Array Declaration Syntax Error",
        "explanation": "Array syntax is incorrect or placed improperly.",
        "fix_example": "Use: int[] arr = new int[10];"
    },
    "generic_syntax_error": {
        "pattern": r"illegal start of type|cannot infer type arguments for",
        "title": "Generic Syntax Error",
        "explanation": "Generic type parameters are misused or incorrectly declared.",
        "fix_example": "Check generic type usage like List<String> and ensure proper syntax."
    },
    "annotation_syntax_error": {
        "pattern": r"annotation type not applicable to this kind of declaration",
        "title": "Annotation Syntax Error",
        "explanation": "An annotation is used in an unsupported context.",
        "fix_example": "Use annotations only on supported elements (class, method, field, etc.)."
    },
    "switch_statement_syntax_error": {
        "pattern": r"orphaned case",
        "title": "Switch Statement Syntax Error",
        "explanation": "A case or default label appears outside a switch statement.",
        "fix_example": "Place case/default labels only inside a valid switch block."
    },
    "try_catch_syntax_error": {
        "pattern": r"'catch' without 'try'",
        "title": "Tryâ€“Catch Syntax Error",
        "explanation": "A catch block appears without a corresponding try block.",
        "fix_example": "Ensure every catch follows a try block."
    },
    "catch_block_ordering_error": {
        "pattern": r"exception .* has already been caught",
        "title": "Catch Block Ordering Error",
        "explanation": "A more specific exception is caught after a more general one.",
        "fix_example": "Place specific exception catch blocks before general ones."
    },
    "finally_block_syntax_error": {
        "pattern": r"'finally' without 'try'",
        "title": "Finally Block Syntax Error",
        "explanation": "A finally block appears without a try block.",
        "fix_example": "Use finally only after a valid try (and optional catch)."
    },
    "if_else_syntax_error": {
        "pattern": r"'else' without 'if'",
        "title": "Ifâ€“Else Syntax Error",
        "explanation": "An else statement does not have a matching if.",
        "fix_example": "Ensure every else has a corresponding if with proper braces."
    },
    "loop_syntax_error": {
        "pattern": r"'while' expected|'for' expected",
        "title": "Loop Syntax Error",
        "explanation": "Loop syntax for for/while/do-while is incorrect.",
        "fix_example": "Check parentheses, semicolons, and block braces in loop statements."
    },
    "lambda_syntax_error": {
        "pattern": r"lambda expressions are not supported in -source",
        "title": "Lambda Expression Syntax/Error",
        "explanation": "Lambda expression used with an unsupported source level or incorrect syntax.",
        "fix_example": "Use correct lambda syntax and Java 8+ compilation options."
    },
    "enum_declaration_syntax_error": {
        "pattern": r"enum constant expected",
        "title": "Enum Declaration Syntax Error",
        "explanation": "Enum constants or body are incorrectly defined.",
        "fix_example": "Separate enum constants with commas and define them before methods."
    },
    "interface_method_syntax_error": {
        "pattern": r"; expected",
        "title": "Interface Method Syntax Error",
        "explanation": "Interface methods in older Java versions must be abstract-like declarations without bodies.",
        "fix_example": "Use method signatures without bodies, or use default/static methods properly in newer Java."
    },
    "abstract_method_syntax_error": {
        "pattern": r"missing method body, or declare abstract",
        "title": "Abstract Method Syntax Error",
        "explanation": "An abstract method lacks a body or is not marked abstract.",
        "fix_example": "Either provide a body or declare the method abstract in an abstract class."
    },

    # 2. COMPILE-TIME ERRORS
    "type_mismatch": {
        "pattern": r"incompatible types",
        "title": "Type Mismatch",
        "explanation": "The assigned value type does not match the variable type.",
        "fix_example": "Ensure both sides of the assignment use compatible types or cast safely."
    },
    "incompatible_data_type_assignment": {
        "pattern": r"incompatible types",
        "title": "Incompatible Data Type Assignment",
        "explanation": "A value is assigned to a variable of an incompatible type.",
        "fix_example": "Use proper type conversion or correct the variable type."
    },
    "possible_lossy_conversion": {
        "pattern": r"possible lossy conversion",
        "title": "Possible Lossy Conversion",
        "explanation": "A narrowing primitive conversion may result in data loss.",
        "fix_example": "Use an explicit cast only if safe, or use a wider type."
    },
    "incompatible_return_type": {
        "pattern": r"incompatible types: .* cannot be converted to",
        "title": "Incompatible Return Type",
        "explanation": "Method return type doesn't match the declared type.",
        "fix_example": "Change return type or return value to make them compatible."
    },
    "cannot_find_symbol": {
        "pattern": r"cannot find symbol",
        "title": "Cannot Find Symbol",
        "explanation": "A variable, method, or class is referenced but not defined or imported.",
        "fix_example": "Check spelling, imports, and declarations."
    },
    "variable_not_initialized": {
        "pattern": r"variable .* might not have been initialized",
        "title": "Variable Not Initialized",
        "explanation": "A local variable is used before being given a value.",
        "fix_example": "Assign a value to the variable before first use."
    },
    "unreachable_statement": {
        "pattern": r"unreachable statement",
        "title": "Unreachable Statement",
        "explanation": "The compiler detects code that can never execute.",
        "fix_example": "Remove or refactor code so that all statements are reachable."
    },
    "missing_return_statement": {
        "pattern": r"missing return statement",
        "title": "Missing Return Statement",
        "explanation": "A method with non-void return type does not always return a value.",
        "fix_example": "Return a value on all code paths in the method."
    },
    "incompatible_operand_types": {
        "pattern": r"bad operand types for binary operator",
        "title": "Incompatible Operand Types",
        "explanation": "An operator is applied to operands of incompatible types.",
        "fix_example": "Ensure correct operand types or cast appropriately."
    },
    "operator_cannot_be_applied": {
        "pattern": r"operator .* cannot be applied to",
        "title": "Operator Cannot Be Applied",
        "explanation": "An operator is used with unsupported operand types.",
        "fix_example": "Check operator and operand types for compatibility."
    },
    "array_index_type_mismatch": {
        "pattern": r"array required, but .* found",
        "title": "Array Index Type Mismatch",
        "explanation": "Array indexing used on a non-array type or with wrong syntax.",
        "fix_example": "Ensure the variable is an array and index is an int."
    },
    "invalid_cast": {
        "pattern": r"inconvertible types",
        "title": "Invalid Cast",
        "explanation": "A cast between incompatible reference types was attempted.",
        "fix_example": "Use instanceof before casting and ensure the cast is legal."
    },
    "compile_time_class_not_found": {
        "pattern": r"cannot find symbol\s*symbol:\s*class",
        "title": "Class Not Found (Compile Time)",
        "explanation": "The referenced class is not found by the compiler.",
        "fix_example": "Add the correct import or ensure the class is in the classpath."
    },
    "package_does_not_exist": {
        "pattern": r"package .* does not exist",
        "title": "Package Does Not Exist",
        "explanation": "The imported package cannot be found.",
        "fix_example": "Check package spelling and add required libraries."
    },
    "duplicate_class": {
        "pattern": r"duplicate class",
        "title": "Duplicate Class",
        "explanation": "Two classes with the same fully qualified name exist.",
        "fix_example": "Remove or rename duplicate class files."
    },
    "duplicate_method_signature": {
        "pattern": r"method .* is already defined in class",
        "title": "Duplicate Method Signature",
        "explanation": "A method with the same signature exists already in the class.",
        "fix_example": "Rename the method or change its parameters."
    },
    "duplicate_variable": {
        "pattern": r"is already defined in",
        "title": "Duplicate Variable",
        "explanation": "A variable with the same name already exists in the scope.",
        "fix_example": "Rename the variable or remove the duplicate declaration."
    },
    "method_not_found": {
        "pattern": r"cannot find symbol\s*symbol:\s*method",
        "title": "Method Not Found",
        "explanation": "The method is not declared or visible in the class.",
        "fix_example": "Check method name, parameters, and class imports."
    },
    "method_argument_mismatch": {
        "pattern": r"no suitable method found",
        "title": "Method Argument Mismatch",
        "explanation": "No method matches the provided argument types.",
        "fix_example": "Ensure arguments match the declared parameter types."
    },
    "ambiguous_method_call": {
        "pattern": r"reference to .* is ambiguous",
        "title": "Ambiguous Method Call",
        "explanation": "The compiler cannot decide between multiple overloaded methods.",
        "fix_example": "Cast arguments explicitly or change method names."
    },
    "overriding_signature_mismatch": {
        "pattern": r"method does not override or implement a method from a supertype",
        "title": "Overriding Method Signature Mismatch",
        "explanation": "A method is marked @Override but does not match any superclass/interface method.",
        "fix_example": "Adjust method signature to match the method being overridden."
    },
    "override_weaker_access": {
        "pattern": r"attempting to assign weaker access privileges",
        "title": "Overriding with Weaker Access Privilege",
        "explanation": "An overriding method cannot have more restrictive access than the overridden method.",
        "fix_example": "Use same or less restrictive access (e.g., public instead of private)."
    },
    "override_final_method": {
        "pattern": r"cannot override final method",
        "title": "Overriding Final Method",
        "explanation": "Final methods cannot be overridden.",
        "fix_example": "Remove override attempt or do not declare the superclass method as final."
    },
    "override_static_method": {
        "pattern": r"static method .* in .* cannot override",
        "title": "Overriding Static Method",
        "explanation": "Static methods are hidden, not overridden.",
        "fix_example": "Remove @Override annotation or design differently."
    },
    "cannot_override_return_type": {
        "pattern": r"return type is incompatible with",
        "title": "Cannot Override Return Type",
        "explanation": "Overriding method returns a type incompatible with the superclass method.",
        "fix_example": "Use a covariant return type or match the superclass return type."
    },
    "abstract_method_not_implemented": {
        "pattern": r"is not abstract and does not override abstract method",
        "title": "Abstract Method Not Implemented",
        "explanation": "A concrete class does not implement all abstract methods.",
        "fix_example": "Implement all abstract methods or make the class abstract."
    },
    "instantiation_of_abstract_class": {
        "pattern": r"cannot instantiate the type",
        "title": "Instantiation of Abstract Class",
        "explanation": "Abstract classes cannot be instantiated directly.",
        "fix_example": "Instantiate a concrete subclass instead."
    },
    "instantiation_of_interface": {
        "pattern": r"cannot instantiate the type",
        "title": "Instantiation of Interface",
        "explanation": "Interfaces cannot be instantiated directly.",
        "fix_example": "Use a class implementing the interface or an anonymous implementation."
    },
    "final_variable_reassignment": {
        "pattern": r"cannot assign a value to final variable",
        "title": "Final Variable Reassignment",
        "explanation": "A final variable is being assigned a new value.",
        "fix_example": "Remove the reassignment or do not declare the variable final."
    },
    "final_method_override_attempt": {
        "pattern": r"cannot override final method",
        "title": "Final Method Override Attempt",
        "explanation": "Attempt to override a method that is declared final.",
        "fix_example": "Do not override the method or remove final from the parent method."
    },
    "final_class_inheritance": {
        "pattern": r"cannot inherit from final",
        "title": "Final Class Inheritance Error",
        "explanation": "A final class cannot be extended.",
        "fix_example": "Do not extend final classes; use composition instead."
    },
    "static_reference_to_nonstatic": {
        "pattern": r"non-static .* cannot be referenced from a static context",
        "title": "Static Reference to Non-Static Member",
        "explanation": "Static methods cannot directly access instance members.",
        "fix_example": "Create an instance or make members static if appropriate."
    },
    "invalid_throws_clause": {
        "pattern": r"exception .* is never thrown in body of corresponding try statement",
        "title": "Invalid Throws or Catch Clause",
        "explanation": "A catch or throws clause references an exception that cannot be thrown.",
        "fix_example": "Remove the unused exception or adjust the code to actually throw it."
    },
    "unhandled_checked_exception": {
        "pattern": r"unreported exception .*; must be caught or declared to be thrown",
        "title": "Unhandled Checked Exception",
        "explanation": "A checked exception is not handled or declared.",
        "fix_example": "Wrap it in try-catch or add it to the method's throws clause."
    },
    "cyclic_inheritance": {
        "pattern": r"cyclic inheritance involving",
        "title": "Cyclic Inheritance Error",
        "explanation": "Class hierarchies must not form cycles.",
        "fix_example": "Remove circular inheritance relationships."
    },
    "invalid_super_constructor_call": {
        "pattern": r"constructor .* in class .* cannot be applied to given types",
        "title": "Invalid Superclass Constructor Call",
        "explanation": "The super() call does not match any superclass constructor.",
        "fix_example": "Adjust super() arguments or change superclass constructors."
    },
    "no_default_constructor": {
        "pattern": r"cannot find symbol\s*symbol:\s*constructor",
        "title": "No Default Constructor Available",
        "explanation": "A class without a no-arg constructor is being instantiated with no arguments.",
        "fix_example": "Add a default constructor or call an existing one with parameters."
    },
    "accessing_private_member": {
        "pattern": r"has private access",
        "title": "Accessing Private Member",
        "explanation": "Code is trying to access a private member from outside its class.",
        "fix_example": "Use getters/setters or adjust access modifier."
    },
    "protected_access_violation": {
        "pattern": r"has protected access",
        "title": "Protected Access Violation",
        "explanation": "Protected member accessed from a non-subclass in a different package.",
        "fix_example": "Access via subclass or adjust access modifiers."
    },
    "package_private_access_violation": {
        "pattern": r"is not public in .*; cannot be accessed from outside package",
        "title": "Package-Private Access Violation",
        "explanation": "A class with default (package-private) access is used from another package.",
        "fix_example": "Make the class public or move code into the same package."
    },
    "generic_type_bounds_violation": {
        "pattern": r"bound mismatch",
        "title": "Generic Type Bounds Violation",
        "explanation": "Type argument does not satisfy the generic bounds.",
        "fix_example": "Use a type parameter that meets all declared bounds."
    },
    "raw_type_usage": {
        "pattern": r"uses unchecked or unsafe operations",
        "title": "Raw Type Usage",
        "explanation": "A generic type is used without specifying type parameters.",
        "fix_example": "Use parameterized types like List<String> instead of raw List."
    },
    "type_erasure_conflict": {
        "pattern": r"name clash: .* have the same erasure",
        "title": "Type Erasure Conflict",
        "explanation": "Overloaded methods conflict after generic type erasure.",
        "fix_example": "Use distinct method names or change parameter lists."
    },
    "incompatible_generic_assignment": {
        "pattern": r"incompatible types: .* cannot be converted to",
        "title": "Incompatible Generic Assignment",
        "explanation": "Assignment between generic types that are not compatible.",
        "fix_example": "Use compatible generic types or wildcards."
    },
    "annotation_type_mismatch": {
        "pattern": r"incompatible types: .* cannot be converted to",
        "title": "Annotation Type Mismatch",
        "explanation": "Annotation value type does not match the element type.",
        "fix_example": "Provide a value matching the expected type."
    },
    "invalid_annotation_target": {
        "pattern": r"annotation type not applicable to this kind of declaration",
        "title": "Invalid Annotation Target",
        "explanation": "Annotation applied to an unsupported program element.",
        "fix_example": "Use annotation only on supported targets."
    },
    "invalid_functional_interface": {
        "pattern": r"is not a functional interface",
        "title": "Invalid Functional Interface",
        "explanation": "Functional interface must have exactly one abstract method.",
        "fix_example": "Ensure only one abstract method or use a non-functional interface."
    },
    "lambda_parameter_type_mismatch": {
        "pattern": r"incompatible parameter types in lambda expression",
        "title": "Lambda Parameter Type Mismatch",
        "explanation": "Lambda parameter types do not match the target functional interface.",
        "fix_example": "Ensure lambda parameters match the abstract method's parameters."
    },

    # 3. RUNTIME ERRORS
    "ArithmeticException": {
        "pattern": r"ArithmeticException",
        "title": "ArithmeticException",
        "explanation": "An invalid arithmetic operation occurred, often division by zero.",
        "fix_example": "Check operands before division or modulo operations."
    },
    "NullPointerException": {
        "pattern": r"NullPointerException",
        "title": "NullPointerException",
        "explanation": "Code attempted to dereference a null reference.",
        "fix_example": "Add null checks or ensure objects are properly initialized."
    },
    "ArrayIndexOutOfBoundsException": {
        "pattern": r"ArrayIndexOutOfBoundsException",
        "title": "ArrayIndexOutOfBoundsException",
        "explanation": "An array index is outside the valid range.",
        "fix_example": "Ensure index is between 0 and array.length - 1."
    },
    "StringIndexOutOfBoundsException": {
        "pattern": r"StringIndexOutOfBoundsException",
        "title": "StringIndexOutOfBoundsException",
        "explanation": "A string index is outside the valid range.",
        "fix_example": "Check string length before using charAt or substring."
    },
    "IndexOutOfBoundsException": {
        "pattern": r"IndexOutOfBoundsException",
        "title": "IndexOutOfBoundsException",
        "explanation": "An index in a list or collection is out of range.",
        "fix_example": "Check index against collection size before access."
    },
    "NumberFormatException": {
        "pattern": r"NumberFormatException",
        "title": "NumberFormatException",
        "explanation": "A string that is not a valid number was parsed as a number.",
        "fix_example": "Validate or sanitize input before parsing."
    },
    "ClassCastException": {
        "pattern": r"ClassCastException",
        "title": "ClassCastException",
        "explanation": "An object was cast to an incompatible type.",
        "fix_example": "Use instanceof before casting and ensure correct type usage."
    },
    "IllegalArgumentException": {
        "pattern": r"IllegalArgumentException",
        "title": "IllegalArgumentException",
        "explanation": "A method received an argument that is illegal or inappropriate.",
        "fix_example": "Validate method arguments and check bounds or nullability."
    },
    "IllegalStateException": {
        "pattern": r"IllegalStateException",
        "title": "IllegalStateException",
        "explanation": "A method has been invoked at an illegal or inappropriate time.",
        "fix_example": "Check object state and method preconditions before invocation."
    },
    "NegativeArraySizeException": {
        "pattern": r"NegativeArraySizeException",
        "title": "NegativeArraySizeException",
        "explanation": "An attempt was made to create an array with negative size.",
        "fix_example": "Verify that array sizes are non-negative."
    },
    "ArrayStoreException": {
        "pattern": r"ArrayStoreException",
        "title": "ArrayStoreException",
        "explanation": "An array element of an incompatible type was stored.",
        "fix_example": "Ensure that only compatible types are stored in the array."
    },
    "ConcurrentModificationException": {
        "pattern": r"ConcurrentModificationException",
        "title": "ConcurrentModificationException",
        "explanation": "A collection was modified while iterating over it.",
        "fix_example": "Use Iterator.remove() or concurrent collections."
    },
    "UnsupportedOperationException": {
        "pattern": r"UnsupportedOperationException",
        "title": "UnsupportedOperationException",
        "explanation": "An unsupported operation was attempted, often on an unmodifiable collection.",
        "fix_example": "Check if the collection is modifiable before attempting modifications."
    },
    "NoSuchElementException": {
        "pattern": r"NoSuchElementException",
        "title": "NoSuchElementException",
        "explanation": "An attempt was made to access an element that does not exist.",
        "fix_example": "Check hasNext()/hasMoreElements() before calling next()."
    },
    "InputMismatchException": {
        "pattern": r"InputMismatchException",
        "title": "InputMismatchException",
        "explanation": "Scanner input type does not match the expected type.",
        "fix_example": "Validate input or use nextLine and parse manually."
    },
    "SecurityException": {
        "pattern": r"SecurityException",
        "title": "SecurityException",
        "explanation": "A security violation is detected by the security manager.",
        "fix_example": "Check security policy restrictions and requested operation."
    },
    "UnsupportedClassVersionError": {
        "pattern": r"UnsupportedClassVersionError",
        "title": "UnsupportedClassVersionError",
        "explanation": "Class was compiled with a higher Java version than the runtime.",
        "fix_example": "Recompile with a compatible target version or update the JVM."
    },
    "StackOverflowError": {
        "pattern": r"StackOverflowError",
        "title": "StackOverflowError",
        "explanation": "Excessive recursion or deep call stack exhausted the stack memory.",
        "fix_example": "Fix recursion or reduce call depth."
    },
    "OutOfMemoryError": {
        "pattern": r"OutOfMemoryError",
        "title": "OutOfMemoryError",
        "explanation": "The JVM heap is exhausted.",
        "fix_example": "Optimize memory usage or increase heap size."
    },
    "AssertionError": {
        "pattern": r"AssertionError",
        "title": "AssertionError",
        "explanation": "An assert statement failed at runtime.",
        "fix_example": "Verify assertion conditions or disable assertions in production."
    },
    "ExceptionInInitializerError": {
        "pattern": r"ExceptionInInitializerError",
        "title": "ExceptionInInitializerError",
        "explanation": "An exception occurred during class initialization (static block).",
        "fix_example": "Check static initializers and handle exceptions properly."
    },
    "NoClassDefFoundError": {
        "pattern": r"NoClassDefFoundError",
        "title": "NoClassDefFoundError",
        "explanation": "The JVM could not find a class definition at runtime that was present at compile time.",
        "fix_example": "Ensure the required class is in the runtime classpath."
    },
    "AbstractMethodError": {
        "pattern": r"AbstractMethodError",
        "title": "AbstractMethodError",
        "explanation": "An application tries to call an abstract method that has not been implemented.",
        "fix_example": "Ensure that all abstract methods are implemented in subclasses."
    },
    "UnsatisfiedLinkError": {
        "pattern": r"UnsatisfiedLinkError",
        "title": "UnsatisfiedLinkError",
        "explanation": "A native method could not be found.",
        "fix_example": "Verify that native libraries are loaded and in the correct path."
    },
    "VerifyError": {
        "pattern": r"VerifyError",
        "title": "VerifyError",
        "explanation": "Bytecode verification failed.",
        "fix_example": "Ensure classes are compiled correctly and not modified improperly."
    },
    "IncompatibleClassChangeError": {
        "pattern": r"IncompatibleClassChangeError",
        "title": "IncompatibleClassChangeError",
        "explanation": "A class has changed incompatibly since being compiled.",
        "fix_example": "Recompile dependent classes and keep versions consistent."
    },
    "InstantiationError": {
        "pattern": r"InstantiationError",
        "title": "InstantiationError",
        "explanation": "An abstract class or interface was incorrectly instantiated at runtime.",
        "fix_example": "Instantiate only concrete classes."
    },
    "LinkageError": {
        "pattern": r"LinkageError",
        "title": "LinkageError",
        "explanation": "A serious problem with class loading or linking has occurred.",
        "fix_example": "Check for conflicting classes/JARs and version mismatches."
    },
    "ThreadDeath": {
        "pattern": r"ThreadDeath",
        "title": "ThreadDeath",
        "explanation": "The stop method was called on a thread, causing it to terminate.",
        "fix_example": "Avoid using Thread.stop(); use interruption and flags instead."
    },
    "IllegalMonitorStateException": {
        "pattern": r"IllegalMonitorStateException",
        "title": "IllegalMonitorStateException",
        "explanation": "wait/notify was called on an object without owning its monitor.",
        "fix_example": "Call wait/notify inside synchronized blocks on the same object."
    },
    "InterruptedException": {
        "pattern": r"InterruptedException",
        "title": "InterruptedException",
        "explanation": "A blocking operation was interrupted.",
        "fix_example": "Handle interrupts properly and restore thread interrupt status if needed."
    },
    "ExecutionException": {
        "pattern": r"ExecutionException",
        "title": "ExecutionException",
        "explanation": "An exception was thrown during the execution of a task in a Future.",
        "fix_example": "Check the cause of the ExecutionException and handle it accordingly."
    },
    "TimeoutException": {
        "pattern": r"TimeoutException",
        "title": "TimeoutException",
        "explanation": "An operation timed out.",
        "fix_example": "Increase timeout or ensure operations complete in time."
    },
    "EOFException": {
        "pattern": r"EOFException",
        "title": "EOFException",
        "explanation": "Unexpected end of file or stream encountered.",
        "fix_example": "Ensure correct protocol and complete data transmission."
    },
    "FileNotFoundException": {
        "pattern": r"FileNotFoundException",
        "title": "FileNotFoundException",
        "explanation": "A file with the specified path does not exist.",
        "fix_example": "Check file path and ensure the file exists."
    },
    "IOException": {
        "pattern": r"IOException",
        "title": "IOException",
        "explanation": "A general I/O error occurred.",
        "fix_example": "Handle I/O exceptions with try-catch and provide fallbacks."
    },
    "SocketException": {
        "pattern": r"SocketException",
        "title": "SocketException",
        "explanation": "A network socket error occurred.",
        "fix_example": "Check network connectivity and socket configuration."
    },
    "BindException": {
        "pattern": r"BindException",
        "title": "BindException",
        "explanation": "A port is already in use or cannot be bound.",
        "fix_example": "Use a different port or close the process using the port."
    },
    "ConnectException": {
        "pattern": r"ConnectException",
        "title": "ConnectException",
        "explanation": "Connection refused or host unreachable.",
        "fix_example": "Check server availability and network configuration."
    },
    "UnknownHostException": {
        "pattern": r"UnknownHostException",
        "title": "UnknownHostException",
        "explanation": "The IP address of a host could not be determined.",
        "fix_example": "Verify host name and DNS configuration."
    },
    "MalformedURLException": {
        "pattern": r"MalformedURLException",
        "title": "MalformedURLException",
        "explanation": "A malformed URL was used.",
        "fix_example": "Check URL syntax, protocol, and special characters."
    },
    "SSLException": {
        "pattern": r"SSLException",
        "title": "SSLException",
        "explanation": "An SSL-related error occurred.",
        "fix_example": "Check certificates, protocols, and SSL configuration."
    },
    "ClassNotFoundException": {
        "pattern": r"ClassNotFoundException",
        "title": "ClassNotFoundException",
        "explanation": "A class could not be located at runtime.",
        "fix_example": "Ensure the class is present in the classpath."
    },
    "ReflectiveOperationException": {
        "pattern": r"ReflectiveOperationException",
        "title": "ReflectiveOperationException",
        "explanation": "An error occurred during reflection operations.",
        "fix_example": "Verify that fields/methods exist and are accessible."
    },
    "InvocationTargetException": {
        "pattern": r"InvocationTargetException",
        "title": "InvocationTargetException",
        "explanation": "An exception was thrown by an invoked method via reflection.",
        "fix_example": "Inspect the underlying cause (getCause()) and fix it."
    },
    "MissingResourceException": {
        "pattern": r"MissingResourceException",
        "title": "MissingResourceException",
        "explanation": "A resource required by ResourceBundle could not be found.",
        "fix_example": "Ensure resource bundles and property files are present and correctly named."
    },
    
    # 4. COMMON PROGRAMMING MISTAKES
    "off_by_one_error": {
        "pattern": "",
        "title": "Off-by-One Error",
        "explanation": "Loop bounds or index calculations are off by one.",
        "fix_example": "Check whether loops should use < or <= and index ranges carefully."
    },
    "infinite_loop_mistake": {
        "pattern": "",
        "title": "Infinite Loop Mistake",
        "explanation": "Loop condition never becomes false, causing an infinite loop.",
        "fix_example": "Ensure the loop variable is updated and that exit conditions are reachable."
    },
    "incorrect_loop_condition": {
        "pattern": "",
        "title": "Incorrect Loop Condition",
        "explanation": "Loop condition logic does not match intended behavior.",
        "fix_example": "Review condition and test with edge cases."
    },
    "wrong_operator_usage": {
        "pattern": "",
        "title": "Wrong Operator Usage",
        "explanation": "Incorrect logical or arithmetic operator is used.",
        "fix_example": "Check whether you meant ==, !=, &&, ||, +, -, *, /, or %."
    },
    "using_double_equals_instead_of_equals": {
        "pattern": "",
        "title": "Using == Instead of .equals()",
        "explanation": "== compares references for objects instead of their contents.",
        "fix_example": "Use .equals() for content comparison of strings and objects."
    },
    "integer_division_mistake": {
        "pattern": "",
        "title": "Integer Division Mistake",
        "explanation": "Integer division truncates decimal part unexpectedly.",
        "fix_example": "Use double types or cast operands to double when fractional results are needed."
    },
    "floating_point_precision_mistake": {
        "pattern": "",
        "title": "Floating-Point Precision Mistake",
        "explanation": "Floating-point arithmetic can result in precision loss.",
        "fix_example": "Use BigDecimal for precise decimal calculations like money."
    },
    "ignoring_operator_precedence": {
        "pattern": "",
        "title": "Ignoring Operator Precedence",
        "explanation": "Expressions may be evaluated in an unintended order.",
        "fix_example": "Use parentheses to make evaluation order explicit."
    },
    "incorrect_variable_scope_usage": {
        "pattern": "",
        "title": "Incorrect Variable Scope Usage",
        "explanation": "Variables are declared in the wrong scope, causing visibility issues.",
        "fix_example": "Declare variables in the narrowest necessary scope."
    },
    "shadowing_variables_unintentionally": {
        "pattern": "",
        "title": "Unintentional Variable Shadowing",
        "explanation": "Inner-scope variables hide outer-scope variables.",
        "fix_example": "Use different names or refer to this.field when needed."
    },
    "using_uninitialized_logic_values": {
        "pattern": "",
        "title": "Using Uninitialized Logic Values",
        "explanation": "Logical decisions are based on default or uninitialized values.",
        "fix_example": "Initialize variables clearly before use in conditions."
    },
    "hard_coded_values_magic_numbers": {
        "pattern": "",
        "title": "Hard-Coded Values (Magic Numbers)",
        "explanation": "Magic numbers reduce readability and maintainability.",
        "fix_example": "Replace with named constants or configuration values."
    },
    "not_handling_null_cases": {
        "pattern": "",
        "title": "Not Handling Null Cases",
        "explanation": "Ignoring potential nulls can lead to NullPointerExceptions.",
        "fix_example": "Add null checks or use Optional to represent optional values."
    },
    "ignoring_exception_handling": {
        "pattern": "",
        "title": "Ignoring Exception Handling",
        "explanation": "Exceptions are not handled or considered.",
        "fix_example": "Use try-catch or throws declarations thoughtfully."
    },
    "catching_generic_exception": {
        "pattern": "",
        "title": "Catching Generic Exception",
        "explanation": "Catching Exception makes it hard to distinguish specific failure causes.",
        "fix_example": "Catch more specific exceptions to handle different cases appropriately."
    },
    "swallowing_exceptions_silently": {
        "pattern": "",
        "title": "Swallowing Exceptions Silently",
        "explanation": "Empty catch blocks hide problems and make debugging difficult.",
        "fix_example": "Log exceptions or rethrow them with meaningful messages."
    },
    "resource_leak_not_closing": {
        "pattern": "",
        "title": "Resource Leak (Not Closing Resources)",
        "explanation": "Failing to close streams, readers, or connections can exhaust resources.",
        "fix_example": "Use try-with-resources or finally blocks to close resources."
    },
    "memory_leak_via_references": {
        "pattern": "",
        "title": "Memory Leak via Object References",
        "explanation": "Unneeded references prevent garbage collection.",
        "fix_example": "Set references to null or reduce object lifetimes appropriately."
    },
    "inefficient_data_structure_choice": {
        "pattern": "",
        "title": "Inefficient Data Structure Choice",
        "explanation": "Using suboptimal collections impacts performance.",
        "fix_example": "Choose data structures based on access, search, and update patterns."
    },
    "wrong_algorithm_selection": {
        "pattern": "",
        "title": "Wrong Algorithm Selection",
        "explanation": "An inefficient algorithm chosen for a large input size.",
        "fix_example": "Analyze complexity and use more optimal algorithms."
    },
    "premature_optimization_mistake": {
        "pattern": "",
        "title": "Premature Optimization",
        "explanation": "Optimizing too early can complicate code without real benefit.",
        "fix_example": "Optimize only after profiling and identifying real bottlenecks."
    },
    "overcomplicated_logic": {
        "pattern": "",
        "title": "Overcomplicated Logic",
        "explanation": "Very complex logic is hard to understand and maintain.",
        "fix_example": "Refactor into simpler, smaller methods."
    },
    "code_duplication": {
        "pattern": "",
        "title": "Code Duplication",
        "explanation": "Copy-pasted code increases maintenance effort.",
        "fix_example": "Extract common logic into reusable methods."
    },
    "poor_method_naming": {
        "pattern": "",
        "title": "Poor Method Naming",
        "explanation": "Method names do not reflect their behavior clearly.",
        "fix_example": "Choose descriptive names that indicate the method's purpose."
    },
    "poor_class_design": {
        "pattern": "",
        "title": "Poor Class Design",
        "explanation": "Classes do not have clear responsibilities or structure.",
        "fix_example": "Apply object-oriented design principles and refactor responsibilities."
    },
    "violating_single_responsibility": {
        "pattern": "",
        "title": "Violating Single Responsibility Principle",
        "explanation": "A class or method does too many unrelated things.",
        "fix_example": "Split responsibilities into separate classes or methods."
    },
    "tight_coupling_between_classes": {
        "pattern": "",
        "title": "Tight Coupling Between Classes",
        "explanation": "Classes depend heavily on each other's implementations.",
        "fix_example": "Introduce interfaces and dependency injection."
    },
    "excessive_use_of_static": {
        "pattern": "",
        "title": "Excessive Use of Static",
        "explanation": "Too many static methods/fields can lead to poor design.",
        "fix_example": "Prefer instance methods and proper object design."
    },
    "misusing_inheritance_instead_of_composition": {
        "pattern": "",
        "title": "Misusing Inheritance Instead of Composition",
        "explanation": "Inheritance is used where composition would be more appropriate.",
        "fix_example": "Use composition to reuse functionality rather than deep inheritance hierarchies."
    },
    "incorrect_method_overriding_logic": {
        "pattern": "",
        "title": "Incorrect Method Overriding Logic",
        "explanation": "Override behavior does not respect superclass contract.",
        "fix_example": "Follow Liskov Substitution Principle and base class expectations."
    },
    "forgetting_break_in_switch": {
        "pattern": "",
        "title": "Forgetting Break in Switch",
        "explanation": "Missing break causes unintended fall-through between cases.",
        "fix_example": "Add break statements or intentional fall-through comments."
    },
    "modifying_collection_while_iterating": {
        "pattern": "",
        "title": "Modifying Collection While Iterating",
        "explanation": "Changing collection during iteration can cause runtime errors.",
        "fix_example": "Use Iterator.remove() or collect changes and apply after iteration."
    },
    "using_mutable_objects_as_map_keys": {
        "pattern": "",
        "title": "Using Mutable Objects as Map Keys",
        "explanation": "Mutating a key used in maps can break lookups.",
        "fix_example": "Use immutable keys or avoid mutating key fields."
    },
    "incorrect_equals_hashcode": {
        "pattern": "",
        "title": "Incorrect equalsâ€“hashCode Implementation",
        "explanation": "Inconsistent equals and hashCode cause map or set behavior issues.",
        "fix_example": "Ensure both methods follow the contract and use same fields."
    },
    "ignoring_thread_safety": {
        "pattern": "",
        "title": "Ignoring Thread Safety",
        "explanation": "Concurrent access to shared state without synchronization.",
        "fix_example": "Use synchronization, locks, or concurrent data structures."
    },
    "race_condition": {
        "pattern": "",
        "title": "Race Condition",
        "explanation": "Program behavior depends on unpredictable thread scheduling.",
        "fix_example": "Protect shared state with synchronization or lock-free algorithms."
    },
    "deadlock_risk": {
        "pattern": "",
        "title": "Deadlock Risk",
        "explanation": "Multiple locks obtained in inconsistent order can deadlock.",
        "fix_example": "Always acquire locks in the same global order."
    },
    "improper_synchronization": {
        "pattern": "",
        "title": "Improper Synchronization",
        "explanation": "Incorrect use of synchronized or locks leads to concurrency bugs.",
        "fix_example": "Synchronize on the correct objects and minimize lock scope."
    },
    "blocking_in_main_thread": {
        "pattern": "",
        "title": "Blocking in Main/UI Thread",
        "explanation": "Long-running tasks block the main or UI thread.",
        "fix_example": "Run heavy tasks in background threads."
    },
    "misusing_concurrency_utilities": {
        "pattern": "",
        "title": "Misusing Concurrency Utilities",
        "explanation": "Incorrect use of executors, futures, or semaphores.",
        "fix_example": "Follow java.util.concurrent best practices and examples."
    },
    "assuming_execution_order_in_threads": {
        "pattern": "",
        "title": "Assuming Execution Order in Threads",
        "explanation": "Thread scheduling assumptions lead to fragile code.",
        "fix_example": "Use proper coordination mechanisms like join(), locks, or conditions."
    },
    "not_validating_user_input": {
        "pattern": "",
        "title": "Not Validating User Input",
        "explanation": "Lack of validation can cause crashes or vulnerabilities.",
        "fix_example": "Validate all external inputs for type, range, and format."
    },
    "trusting_external_data_blindly": {
        "pattern": "",
        "title": "Trusting External Data Blindly",
        "explanation": "External data sources are used directly without checks.",
        "fix_example": "Sanitize and validate data from files, networks, or users."
    },
    "incorrect_file_path_handling": {
        "pattern": "",
        "title": "Incorrect File Path Handling",
        "explanation": "Hard-coded or platform-specific paths break portability.",
        "fix_example": "Use java.nio.file.Path and platform-independent separators."
    },
    "platform_dependent_assumptions": {
        "pattern": "",
        "title": "Platform-Dependent Assumptions",
        "explanation": "Assuming OS-specific behavior (e.g., line endings, file separators).",
        "fix_example": "Use platform-independent APIs and constants."
    },
    "ignoring_edge_cases": {
        "pattern": "",
        "title": "Ignoring Edge Cases",
        "explanation": "Code fails for boundary conditions or special inputs.",
        "fix_example": "Test with empty, null, min/max values, and unusual cases."
    },
    "lack_of_input_boundary_checks": {
        "pattern": "",
        "title": "Lack of Input Boundary Checks",
        "explanation": "Inputs are not checked for valid ranges.",
        "fix_example": "Add checks before processing inputs."
    },
    "poor_error_messages": {
        "pattern": "",
        "title": "Poor Error Messages",
        "explanation": "Unclear or missing error messages hinder debugging.",
        "fix_example": "Provide descriptive messages with context and possible causes."
    },
    "logging_sensitive_data": {
        "pattern": "",
        "title": "Logging Sensitive Data",
        "explanation": "Sensitive information is printed in logs.",
        "fix_example": "Avoid logging passwords, tokens, or personal data."
    },
    "ignoring_performance_bottlenecks": {
        "pattern": "",
        "title": "Ignoring Performance Bottlenecks",
        "explanation": "Performance issues go unaddressed.",
        "fix_example": "Profile the application and optimize hot paths."
    },
    "not_writing_unit_tests": {
        "pattern": "",
        "title": "Not Writing Unit Tests",
        "explanation": "Lack of tests reduces confidence in changes.",
        "fix_example": "Add unit tests for important methods and behavior."
    },
    "overfitting_logic_to_sample_data": {
        "pattern": "",
        "title": "Overfitting Logic to Sample Data",
        "explanation": "Code only works for a small set of example inputs.",
        "fix_example": "Generalize logic and test with varied data."
    },

    # 5. BEST PRACTICES IN PROGRAMMING
    "follow_naming_conventions": {
        "pattern": "",
        "title": "Follow Consistent Naming Conventions",
        "explanation": "Consistent naming makes code easier to read and maintain.",
        "fix_example": "Use camelCase for variables/methods, PascalCase for classes, UPPER_CASE for constants."
    },
    "write_clean_readable_code": {
        "pattern": "",
        "title": "Write Clean and Readable Code",
        "explanation": "Readable code reduces bugs and improves team productivity.",
        "fix_example": "Use proper indentation, whitespace, and meaningful names."
    },
    "use_meaningful_names": {
        "pattern": "",
        "title": "Use Meaningful Names",
        "explanation": "Descriptive names communicate intent clearly.",
        "fix_example": "Prefer 'totalMarks' over 'tm' or 'x'."
    },
    "keep_methods_small": {
        "pattern": "",
        "title": "Keep Methods Small and Focused",
        "explanation": "Small methods are easier to understand and test.",
        "fix_example": "Extract complex logic into helper methods."
    },
    "single_responsibility_principle": {
        "pattern": "",
        "title": "Follow Single Responsibility Principle",
        "explanation": "Each class or method should have one responsibility.",
        "fix_example": "Split classes/methods that handle multiple concerns."
    },
    "avoid_code_duplication": {
        "pattern": "",
        "title": "Avoid Code Duplication (DRY)",
        "explanation": "Duplicate code is harder to maintain.",
        "fix_example": "Extract common logic into reusable functions."
    },
    "prefer_composition_over_inheritance": {
        "pattern": "",
        "title": "Prefer Composition Over Inheritance",
        "explanation": "Composition is more flexible and less tightly coupled than deep inheritance.",
        "fix_example": "Use fields that reference other objects instead of extending them unnecessarily."
    },
    "program_to_interfaces": {
        "pattern": "",
        "title": "Program to Interfaces, Not Implementations",
        "explanation": "Depending on interfaces makes code more flexible.",
        "fix_example": "Use List instead of ArrayList in method parameters."
    },
    "use_access_modifiers_properly": {
        "pattern": "",
        "title": "Use Access Modifiers Properly",
        "explanation": "Proper encapsulation improves safety and flexibility.",
        "fix_example": "Hide internals with private and expose only necessary APIs as public/protected."
    },
    "minimize_mutable_state": {
        "pattern": "",
        "title": "Minimize Mutable State",
        "explanation": "Less mutability reduces bugs and makes reasoning about code easier.",
        "fix_example": "Use final fields and immutable objects when possible."
    },
    "use_constants_instead_of_magic_numbers": {
        "pattern": "",
        "title": "Use Constants Instead of Magic Numbers",
        "explanation": "Named constants make code self-describing.",
        "fix_example": "Define static final int MAX_RETRIES = 3; and use it."
    },
    "handle_exceptions_properly": {
        "pattern": "",
        "title": "Handle Exceptions Properly",
        "explanation": "Good exception handling improves reliability.",
        "fix_example": "Catch only what you can handle and log or propagate others."
    },
    "use_try_with_resources": {
        "pattern": "",
        "title": "Use try-with-resources",
        "explanation": "Auto-closing resources reduces leaks and boilerplate.",
        "fix_example": "Use try(Resource r = ...) { ... } for AutoCloseable resources."
    },
    "validate_external_inputs": {
        "pattern": "",
        "title": "Validate All External Inputs",
        "explanation": "Input validation prevents bugs and security issues.",
        "fix_example": "Check input size, range, format, and syntax before processing."
    },
    "write_unit_tests": {
        "pattern": "",
        "title": "Write Unit Tests",
        "explanation": "Tests give confidence when refactoring and deploying.",
        "fix_example": "Use JUnit to test methods with various inputs and edge cases."
    },
    "test_edge_cases": {
        "pattern": "",
        "title": "Test Edge Cases Explicitly",
        "explanation": "Boundary conditions can reveal hidden bugs.",
        "fix_example": "Include tests for empty collections, nulls, minima, and maxima."
    },
    "use_version_control": {
        "pattern": "",
        "title": "Use Version Control Effectively",
        "explanation": "Version control helps track changes and collaborate.",
        "fix_example": "Commit frequently with meaningful messages using Git."
    },
    "write_self_documenting_code": {
        "pattern": "",
        "title": "Write Self-Documenting Code",
        "explanation": "Clear code reduces the need for comments.",
        "fix_example": "Use clear names and simple logic so code explains itself."
    },
    "follow_code_formatting": {
        "pattern": "",
        "title": "Follow Standard Code Formatting",
        "explanation": "Consistent formatting improves readability.",
        "fix_example": "Use an auto-formatter or follow a style guide (like Google Java Style)."
    },
    "use_logging_instead_of_print": {
        "pattern": "",
        "title": "Use Logging Instead of print Statements",
        "explanation": "Logging is configurable and suitable for production.",
        "fix_example": "Use a logging framework with levels (INFO, DEBUG, ERROR)."
    },
    "avoid_premature_optimization": {
        "pattern": "",
        "title": "Avoid Premature Optimization",
        "explanation": "Optimizing too early complicates code without clear benefits.",
        "fix_example": "Focus on clarity first, then optimize after measuring performance."
    },
    "choose_appropriate_data_structures": {
        "pattern": "",
        "title": "Choose Appropriate Data Structures",
        "explanation": "Choosing the right structure improves performance and clarity.",
        "fix_example": "Use List for ordered collections, Map for keyâ€“value pairs, Set for uniqueness."
    },
    "use_immutable_objects": {
        "pattern": "",
        "title": "Use Immutable Objects When Possible",
        "explanation": "Immutability helps thread safety and predictability.",
        "fix_example": "Avoid setters and use final fields for value types."
    },
    "avoid_deep_nesting": {
        "pattern": "",
        "title": "Avoid Deep Nesting",
        "explanation": "Deeply nested code is hard to read and maintain.",
        "fix_example": "Use early returns and helper methods to flatten control flow."
    },
    "prefer_early_returns": {
        "pattern": "",
        "title": "Prefer Early Returns",
        "explanation": "Early returns reduce nested ifs and improve clarity.",
        "fix_example": "Return early when conditions are not met instead of adding nested blocks."
    },
    "encapsulate_object_state": {
        "pattern": "",
        "title": "Encapsulate Object State",
        "explanation": "Proper encapsulation protects invariants and internal logic.",
        "fix_example": "Make fields private and provide controlled accessors."
    },
    "make_dependencies_explicit": {
        "pattern": "",
        "title": "Make Dependencies Explicit",
        "explanation": "Hidden dependencies make code harder to understand and test.",
        "fix_example": "Inject dependencies via constructors or setters."
    },
    "use_dependency_injection": {
        "pattern": "",
        "title": "Use Dependency Injection",
        "explanation": "DI frameworks simplify management of object dependencies.",
        "fix_example": "Use constructor injection or DI containers like Spring where appropriate."
    },
    "avoid_static_abuse": {
        "pattern": "",
        "title": "Avoid Static Abuse",
        "explanation": "Overusing static methods and fields reduces flexibility.",
        "fix_example": "Prefer instance-based design and proper object lifecycles."
    },
    "design_for_testability": {
        "pattern": "",
        "title": "Design for Testability",
        "explanation": "Testable design makes it easier to maintain quality.",
        "fix_example": "Separate logic from I/O and use interfaces to mock dependencies."
    },
    "write_modular_code": {
        "pattern": "",
        "title": "Write Modular Code",
        "explanation": "Modularity enables reuse and easier understanding.",
        "fix_example": "Divide the system into cohesive modules or packages."
    },
    "follow_layered_architecture": {
        "pattern": "",
        "title": "Follow Layered Architecture",
        "explanation": "Separating layers (UI, business, data) improves structure.",
        "fix_example": "Keep controllers, services, and repositories separate."
    },
    "separate_business_logic_from_ui": {
        "pattern": "",
        "title": "Separate Business Logic from UI",
        "explanation": "Mixing UI and logic makes code hard to modify.",
        "fix_example": "Put business rules into service classes or domain models."
    },
    "handle_concurrency_explicitly": {
        "pattern": "",
        "title": "Handle Concurrency Explicitly",
        "explanation": "Explicit concurrency handling avoids subtle race conditions.",
        "fix_example": "Use synchronization, locks, or java.util.concurrent tools intentionally."
    },
    "use_thread_safe_constructs": {
        "pattern": "",
        "title": "Use Thread-Safe Constructs",
        "explanation": "Thread-safe constructs help avoid concurrency bugs.",
        "fix_example": "Use ConcurrentHashMap, AtomicInteger, and thread-safe patterns where needed."
    },
    "document_public_apis": {
        "pattern": "",
        "title": "Document Public APIs",
        "explanation": "API documentation helps users understand contracts and usage.",
        "fix_example": "Use JavaDoc to describe parameters, return values, and behavior."
    },
    "keep_methods_side_effect_minimal": {
        "pattern": "",
        "title": "Keep Methods Side-Effect Minimal",
        "explanation": "Functions with fewer side-effects are easier to reason about.",
        "fix_example": "Limit state changes and avoid hidden mutations."
    },
    "clean_up_unused_code": {
        "pattern": "",
        "title": "Clean Up Unused Code",
        "explanation": "Dead code increases noise and confusion.",
        "fix_example": "Delete unused methods, fields, and classes."
    },
    "review_code_regularly": {
        "pattern": "",
        "title": "Review Code Regularly",
        "explanation": "Code reviews catch issues early and share knowledge.",
        "fix_example": "Use peer reviews or pull requests as part of the workflow."
    },
    "refactor_continuously": {
        "pattern": "",
        "title": "Refactor Continuously",
        "explanation": "Continuous refactoring keeps the codebase healthy.",
        "fix_example": "Regularly improve design without changing behavior."
    }
}

error_path = DATA_DIR / "common_java_errors.json"

with open(error_path, "w", encoding="utf-8") as f:
    json.dump(errors, f, indent=2)

print("Wrote extended error & query patterns to:", error_path)
print("Total patterns stored:", len(errors))


Wrote extended error & query patterns to: C:\Users\suraj\Desktop\java_chatbot_project\data\common_java_errors.json
Total patterns stored: 227


In [41]:
from pathlib import Path
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
import joblib

BASE_DIR = Path.cwd()
MODELS_DIR = BASE_DIR / "models"
DATA_DIR = BASE_DIR / "data"

MODELS_DIR.mkdir(parents=True, exist_ok=True)
DATA_DIR.mkdir(parents=True, exist_ok=True)

def train_tfidf(corpus):
    vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')
    tfidf_matrix = vectorizer.fit_transform(corpus["question"].fillna(""))
    return vectorizer, tfidf_matrix

corpus = pd.read_csv(DATA_DIR / "corpus_java.csv")
print(f"Loaded {len(corpus)} questions from corpus")

vectorizer, tfidf_matrix = train_tfidf(corpus)
joblib.dump(vectorizer, MODELS_DIR / "tfidf_vectorizer.pkl")
print("Vectorizer saved to:", MODELS_DIR / "tfidf_vectorizer.pkl")
print("Training complete!")

Loaded 148 questions from corpus
Vectorizer saved to: C:\Users\suraj\Desktop\java_chatbot_project\notebook\models\tfidf_vectorizer.pkl
Training complete!


In [43]:
from sklearn.feature_extraction.text import TfidfVectorizer
import joblib
import pandas as pd
from pathlib import Path

def load_corpus(path: Path) -> pd.DataFrame:
    return pd.read_csv(path)

def train_tfidf(corpus_df: pd.DataFrame, text_col: str = "question"):
    texts = corpus_df[text_col].fillna("").tolist()
    vectorizer = TfidfVectorizer(ngram_range=(1,2), stop_words='english', max_df=0.9)
    X = vectorizer.fit_transform(texts)
    return vectorizer, X

# Load and train
corpus_path = Path("data/corpus_java.csv")  # ensure correct path
corpus = load_corpus(corpus_path)
vectorizer, tfidf_matrix = train_tfidf(corpus)

# save model
MODELS_DIR = Path("models")
MODELS_DIR.mkdir(exist_ok=True)
joblib.dump(vectorizer, MODELS_DIR / "tfidf_vectorizer.pkl")

print("Vectorizer saved to:", MODELS_DIR / "tfidf_vectorizer.pkl")
print("Corpus size:", corpus.shape)

Vectorizer saved to: models\tfidf_vectorizer.pkl
Corpus size: (148, 2)


In [47]:
import json
import re
import tempfile
import subprocess
import shutil
from pathlib import Path
from typing import Dict, Any, List, Tuple
import joblib

with open(error_path, "r") as f:
    ERROR_PATTERNS = json.load(f)

def write_java_file(code: str, class_name: str, work_dir: Path) -> Path:
    java_path = work_dir / f"{class_name}.java"
    java_path.write_text(code)
    return java_path

def find_public_class_name(code: str) -> str:
    m = re.search(r'public\s+class\s+([A-Za-z_][A-Za-z0-9_]*)', code)
    if m:
        return m.group(1)
    # fallback: first class name
    m2 = re.search(r'class\s+([A-Za-z_][A-Za-z0-9_]*)', code)
    return m2.group(1) if m2 else "Main"

def compile_java(java_file: Path) -> Tuple[bool, str]:
    """
    Tries to compile the given Java file using javac.
    Returns (success, stderr+stdout text)
    """
    proc = subprocess.run(["javac", str(java_file)], capture_output=True, text=True)
    ok = proc.returncode == 0
    out = (proc.stdout or "") + (proc.stderr or "")
    return ok, out

def run_java(class_name: str, work_dir: Path, timeout_seconds: int = 5) -> Tuple[bool, str]:
    """
    Runs 'java ClassName' in work_dir. Assumes .class exists.
    Returns (success, stdout+stderr)
    """
    proc = subprocess.run(["java", "-cp", str(work_dir), class_name], capture_output=True, text=True, timeout=timeout_seconds)
    ok = proc.returncode == 0
    out = (proc.stdout or "") + (proc.stderr or "")
    return ok, out

def parse_javac_output(output: str) -> List[Dict[str,str]]:
    """
    Parse javac/jvm output and match against ERROR_PATTERNS to produce friendly messages.
    """
    matches = []
    for key, info in ERROR_PATTERNS.items():
        pat = info["pattern"]
        try:
            regex = re.compile(pat)
        except re.error:
            regex = re.compile(re.escape(pat))
        m = regex.search(output)
        if m:
            detail = m.group(0)
            matches.append({
                "id": key,
                "title": info["title"],
                "explanation": info["explanation"],
                "fix_example": info.get("fix_example", ""),
                "detail": detail
            })
    # If nothing matched, include raw output
    if not matches and output.strip():
        matches.append({
            "id": "unknown",
            "title": "Unrecognized error",
            "explanation": "The compiler/runtime returned an error message that didn't match the common patterns.",
            "fix_example": "Check the compiler output (below) and search for keywords.",
            "detail": output.strip()
        })
    return matches

def analyze_java_code(code: str, attempt_fix: bool = True) -> Dict[str, Any]:
    """
    Full pipeline: write file, compile, parse errors, optionally try simple fixes.
    Returns dict with fields: success, compile_output, suggestions, corrected_code (if any)
    """
    tempdir = Path(tempfile.mkdtemp(prefix="java_chatbot_"))
    try:
        class_name = find_public_class_name(code)
        java_file = write_java_file(code, class_name, tempdir)
        success, output = compile_java(java_file)
        response = {
            "success": success,
            "compile_output": output,
            "suggestions": [],
            "corrected_code": None,
            "work_dir": str(tempdir)
        }
        parsed = parse_javac_output(output)
        response["suggestions"] = parsed

        if not success and attempt_fix:
            # simple heuristics:
            corrected = code

            # 1) missing semicolon heuristic: if javac says "';' expected", try adding semicolon at end of line before error line
            if any(p["id"] == "semicolon_missing" for p in parsed):
                # naive fix: add semicolon to the last non-empty line
                lines = corrected.rstrip().splitlines()
                for i in range(len(lines)-1, -1, -1):
                    if lines[i].strip() and not lines[i].strip().endswith(";") and not lines[i].strip().endswith("{") and not lines[i].strip().endswith("}"):
                        lines[i] = lines[i] + ";"
                        break
                corrected = "\n".join(lines)

            # 2) public class vs filename mismatch: if javac error mentions filename, recommend renaming or change class to non-public
            for p in parsed:
                if p["id"] == "class_name_mismatch":
                    # try removing 'public' from class declaration (safer auto-fix)
                    corrected = re.sub(r'public\s+class', 'class', corrected, count=1)

            # 3) missing main (can't autofix reliably, but show template)
            if any("main" in s["id"] or "main" in s["title"].lower() for s in parsed):
                # append a main template if no main exists
                if "public static void main" not in corrected:
                    corrected += "\n\n// Added template main method\npublic static void main(String[] args) {\n    System.out.println(\"Hello from auto-added main\");\n}\n"

            # write and try compile again
            java_file2 = write_java_file(corrected, find_public_class_name(corrected), tempdir)
            success2, output2 = compile_java(java_file2)
            response["attempted_fix"] = {
                "corrected_code": corrected,
                "compile_success": success2,
                "compile_output": output2
            }
            if success2:
                response["corrected_code"] = corrected
        return response
    finally:
        # do not delete tempdir automatically during debugging; user may want to inspect
        # If you want to auto-clean, uncomment next line:
        # shutil.rmtree(tempdir)
        pass

In [49]:
# Example 1: missing semicolon
broken_code1 = """
public class Test {
    public static void main(String[] args) {
        int x = 10
        System.out.println(x);
    }
}
"""
res1 = analyze_java_code(broken_code1)
print("Compile success:", res1["success"])
print("Compile output:\n", res1["compile_output"])
print("Parsed suggestions:", res1["suggestions"])
if "attempted_fix" in res1:
    print("Tried fix - success:", res1["attempted_fix"]["compile_success"])
    print("Corrected code:\n", res1["attempted_fix"]["corrected_code"])

# Example 2: public class name mismatch
broken_code2 = """
public class HelloWorldWrongName {
    public static void main(String[] args) {
        System.out.println("Hi");
    }
}
"""
res2 = analyze_java_code(broken_code2)
print("Parsed suggestions:", res2["suggestions"])


Compile success: False
Compile output:
 C:\Users\suraj\AppData\Local\Temp\java_chatbot_m8cevfgj\Test.java:4: error: ';' expected
        int x = 10
                  ^
1 error

Parsed suggestions: [{'id': 'missing_semicolon', 'title': 'Missing Semicolon', 'explanation': 'Every Java statement must end with a semicolon. The compiler throws this when it encounters a missing one.', 'fix_example': 'Add a semicolon: System.out.println("Hello");', 'detail': "';' expected"}, {'id': 'expected_token', 'title': 'Expected Token Error', 'explanation': "The compiler expected a specific symbol (like ')', '}', ';') but did not find it.", 'fix_example': 'Read the error message to see which symbol is expected and insert it.', 'detail': 'expected'}, {'id': 'off_by_one_error', 'title': 'Off-by-One Error', 'explanation': 'Loop bounds or index calculations are off by one.', 'fix_example': 'Check whether loops should use < or <= and index ranges carefully.', 'detail': ''}, {'id': 'infinite_loop_mistake', 'ti

In [53]:
from pathlib import Path
import joblib

BASE_DIR = Path.cwd()
MODELS_DIR = BASE_DIR / "models"
UTILS_DIR = BASE_DIR / "utils"

MODELS_DIR.mkdir(exist_ok=True)
UTILS_DIR.mkdir(exist_ok=True)

joblib.dump(vectorizer, MODELS_DIR / "tfidf_vectorizer.pkl", compress=3)

utils_code = '''
# retrieval.py
import joblib
import pandas as pd
from pathlib import Path
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def load_corpus(path: Path) -> pd.DataFrame:
    return pd.read_csv(path)

def load_vectorizer(path: Path):
    return joblib.load(path)

def retrieve_answer(vectorizer, tfidf_matrix, corpus_df, query, k=1):
    q_vec = vectorizer.transform([query])
    sims = cosine_similarity(q_vec, tfidf_matrix)[0]
    top_idx = sims.argsort()[::-1][:k]
    return [{"question": corpus_df.iloc[i]["question"],
             "answer": corpus_df.iloc[i]["answer"],
             "score": float(sims[i])} for i in top_idx]
'''

with open(UTILS_DIR / "retrieval.py", "w") as f:
    f.write(utils_code)

print("Utilities written to", UTILS_DIR)


Utilities written to C:\Users\suraj\Desktop\java_chatbot_project\notebook\utils


In [59]:
from pathlib import Path

BASE_DIR = Path.cwd()
SERVER_DIR = BASE_DIR / "server"
SERVER_DIR.mkdir(parents=True, exist_ok=True)

flask_code = r'''
from flask import Flask, request, render_template, jsonify
import joblib, os, json
from pathlib import Path
# Minimal imports; re-use notebook utilities after refactoring for production

app = Flask(__name__, template_folder="templates")

# load model and corpus (adjust BASE_DIR if needed)
BASE = Path(__file__).resolve().parents[1]
models_dir = BASE / "models"
data_dir = BASE / "data"
vectorizer = joblib.load(models_dir / "tfidf_vectorizer.pkl")
corpus = __import__("pandas").read_csv(data_dir / "corpus_java.csv")
# Note: for full logic, import your utils.code_analyzer and utils.retrieval after refactor

@app.route("/")
def index():
    return render_template("index.html")

@app.route("/api/query", methods=["POST"])
def api_query():
    payload = request.json or {}
    text = payload.get("text", "")
    # For brevity, this is a placeholder; in production import functions written in utils/
    return jsonify({"status": "ok", "received": text})

if __name__ == "__main__":
    app.run(debug=True, port=5000)
'''
server_app_path = SERVER_DIR / "app.py"
(server_app_path).write_text(flask_code)
# create a minimal index.html
template_dir = SERVER_DIR / "templates"
template_dir.mkdir(parents=True, exist_ok=True)
(template_dir / "index.html").write_text("""
<!doctype html>
<html>
<head><title>Java Chatbot</title></head>
<body>
  <h1>Java Chatbot</h1>
  <textarea id="q" rows=10 cols=80 placeholder="Paste Java code or ask a Java question"></textarea><br/>
  <button onclick="send()">Send</button>
  <pre id="out"></pre>
  <script>
    async function send(){
      const txt = document.getElementById('q').value;
      const res = await fetch('/api/query', {
        method: 'POST',
        headers: {'Content-Type':'application/json'},
        body: JSON.stringify({text: txt})
      });
      const data = await res.json();
      document.getElementById('out').innerText = JSON.stringify(data, null, 2);
    }
  </script>
</body>
</html>
""")
print("Flask stub written to:", server_app_path)


Flask stub written to: C:\Users\suraj\Desktop\java_chatbot_project\notebook\server\app.py


In [61]:
test_code = '''
def test_parse_error():
    from pathlib import Path
    import json, re
    errors = json.load(open("data/common_java_errors.json"))
    # ensure at least one pattern compiles
    for k, v in errors.items():
        try:
            re.compile(v["pattern"])
        except re.error:
            assert False, f"Bad regex for {k}"
'''
# Save to tests/test_error_parsing.py
tests_dir = BASE_DIR / "tests"
tests_dir.mkdir(exist_ok=True)
(tests_dir / "test_error_parsing.py").write_text(test_code)
print("Wrote unit test to:", tests_dir / "test_error_parsing.py")

Wrote unit test to: C:\Users\suraj\Desktop\java_chatbot_project\notebook\tests\test_error_parsing.py
