In [21]:
import random
import json
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import HashingVectorizer

print("‚úÖ Libraries imported successfully")
print(f"TensorFlow version: {tf.__version__}")


‚úÖ Libraries imported successfully
TensorFlow version: 2.19.0


In [22]:
"""
COMPREHENSIVE STUDYING VOCABULARY FOR GRADES 1-10
Based on NCERT/CBSE/ICSE/State Board Curriculum
Total: 1000+ unique educational terms
"""

STUDYING_VOCAB = {
    
    # ==================== SUBJECTS ====================
    "subjects": [
        "mathematics", "maths", "math", "arithmetic", "algebra", "geometry",
        "trigonometry", "calculus", "statistics", "probability",
        "science", "physics", "chemistry", "biology", "botany", "zoology",
        "english", "language", "literature", "grammar", "composition",
        "hindi", "sanskrit", "regional language", "vernacular",
        "social studies", "social science", "history", "geography", "civics",
        "political science", "economics", "sociology", "psychology",
        "computer science", "informatics", "information technology",
        "physical education", "pe", "sports", "yoga", "health education",
        "art", "craft", "drawing", "painting", "music", "dance"
    ],
    
    # ==================== GRADE 1-3: PRIMARY ====================
    "grade_1_3_english": [
        "alphabet", "a", "b", "c", "letters", "vowels", "consonants",
        "capital letter", "small letter", "uppercase", "lowercase",
        "word", "sentence", "rhyme", "poem", "story", "paragraph",
        "noun", "verb", "pronoun", "adjective", "article",
        "singular", "plural", "this", "that", "these", "those",
        "reading", "writing", "spelling", "dictation", "handwriting",
        "phonics", "sound", "pronunciation", "vocabulary"
    ],
    
    "grade_1_3_math": [
        "number", "digit", "counting", "count", "one", "two", "three",
        "four", "five", "six", "seven", "eight", "nine", "ten",
        "eleven", "twelve", "twenty", "hundred", "thousand",
        "addition", "add", "plus", "sum", "total",
        "subtraction", "subtract", "minus", "difference", "take away",
        "multiplication", "multiply", "times", "product",
        "division", "divide", "quotient", "remainder",
        "equals", "equal to", "greater than", "less than",
        "place value", "ones", "tens", "hundreds",
        "shape", "circle", "square", "triangle", "rectangle",
        "oval", "pentagon", "hexagon", "pattern", "symmetry",
        "measurement", "length", "weight", "capacity", "time",
        "meter", "centimeter", "kilogram", "gram", "liter",
        "clock", "hour", "minute", "morning", "afternoon", "evening"
    ],
    
    "grade_1_3_science": [
        "living", "non-living", "plants", "animals", "human body",
        "tree", "leaf", "flower", "fruit", "seed", "root", "stem",
        "parts of plant", "life cycle", "growth",
        "bird", "fish", "insect", "mammal", "reptile", "amphibian",
        "dog", "cat", "cow", "elephant", "lion", "tiger",
        "eyes", "ears", "nose", "mouth", "hands", "legs",
        "bones", "muscles", "heart", "lungs", "stomach",
        "food", "water", "air", "healthy food", "junk food",
        "seasons", "summer", "winter", "spring", "autumn", "monsoon",
        "weather", "hot", "cold", "rainy", "sunny", "cloudy",
        "day", "night", "sun", "moon", "stars", "sky",
        "earth", "land", "water", "river", "mountain", "ocean"
    ],
    
    "grade_1_3_social": [
        "family", "mother", "father", "brother", "sister", "grandparents",
        "relatives", "friends", "neighbors", "community",
        "school", "teacher", "student", "classroom", "playground",
        "home", "house", "room", "kitchen", "bedroom",
        "festival", "diwali", "holi", "eid", "christmas", "independence day",
        "republic day", "birthday", "celebration",
        "village", "city", "town", "country", "india",
        "road", "street", "market", "hospital", "police station",
        "post office", "bank", "temple", "mosque", "church",
        "transport", "bus", "car", "train", "bicycle", "walk"
    ],
    
    # ==================== GRADE 4-5: UPPER PRIMARY ====================
    "grade_4_5_english": [
        "tense", "present tense", "past tense", "future tense",
        "helping verb", "action verb", "linking verb",
        "adverb", "preposition", "conjunction", "interjection",
        "subject", "predicate", "object", "clause",
        "phrase", "idiom", "proverb", "metaphor", "simile",
        "punctuation", "comma", "full stop", "question mark",
        "exclamation", "apostrophe", "quotation marks",
        "comprehension", "passage", "extract", "essay",
        "letter writing", "formal letter", "informal letter",
        "application", "notice", "message", "diary entry",
        "antonym", "synonym", "homonym", "prefix", "suffix"
    ],
    
    "grade_4_5_math": [
        "fraction", "numerator", "denominator", "proper fraction",
        "improper fraction", "mixed fraction", "equivalent fraction",
        "decimal", "decimal point", "tenths", "hundredths",
        "percentage", "percent", "ratio", "proportion",
        "average", "mean", "mode", "median",
        "factors", "multiples", "prime number", "composite number",
        "hcf", "highest common factor", "lcm", "lowest common multiple",
        "even number", "odd number", "whole number", "natural number",
        "roman numerals", "I", "V", "X", "L", "C", "D", "M",
        "angle", "acute angle", "right angle", "obtuse angle",
        "straight angle", "parallel lines", "perpendicular lines",
        "perimeter", "area", "square units", "formula",
        "profit", "loss", "cost price", "selling price",
        "simple interest", "principal", "rate", "time"
    ],
    
    "grade_4_5_science": [
        "matter", "solid", "liquid", "gas", "states of matter",
        "mixture", "solution", "soluble", "insoluble", "dissolve",
        "evaporation", "condensation", "melting", "freezing",
        "food chain", "producer", "consumer", "decomposer", "herbivore",
        "carnivore", "omnivore", "predator", "prey", "habitat",
        "adaptation", "camouflage", "migration", "hibernation",
        "digestion", "digestive system", "teeth", "saliva", "intestine",
        "respiration", "respiratory system", "breathing", "lungs", "oxygen",
        "circulation", "blood", "heart", "arteries", "veins",
        "skeleton", "skull", "ribs", "spine", "joints",
        "force", "push", "pull", "friction", "gravity",
        "magnet", "magnetic force", "north pole", "south pole", "attract",
        "light", "shadow", "transparent", "translucent", "opaque",
        "sound", "vibration", "pitch", "loud", "soft", "echo",
        "electricity", "circuit", "battery", "bulb", "switch", "conductor"
    ],
    
    "grade_4_5_social": [
        "earth", "globe", "map", "atlas", "direction", "north", "south",
        "east", "west", "compass", "latitude", "longitude",
        "continent", "asia", "africa", "europe", "australia",
        "north america", "south america", "antarctica",
        "ocean", "pacific", "atlantic", "indian", "arctic",
        "country", "state", "capital", "delhi", "mumbai", "kolkata",
        "river", "ganga", "yamuna", "brahmaputra", "narmada",
        "mountain", "himalayas", "western ghats", "eastern ghats",
        "plain", "plateau", "desert", "forest", "island",
        "climate", "tropical", "temperate", "polar", "rainfall",
        "ancient india", "indus valley", "vedic period", "mauryan empire",
        "gupta empire", "medieval india", "medieval period",
        "constitution", "government", "democracy", "president", "prime minister"
    ],
    
    # ==================== GRADE 6-8: MIDDLE SCHOOL ====================
    "grade_6_8_math": [
        "integer", "positive integer", "negative integer", "absolute value",
        "rational number", "irrational number", "real number",
        "exponent", "power", "base", "index", "exponential form",
        "square", "square root", "cube", "cube root", "perfect square",
        "algebraic expression", "term", "coefficient", "constant",
        "like terms", "unlike terms", "degree", "polynomial",
        "linear equation", "variable", "unknown", "solution",
        "inequality", "greater than or equal to", "less than or equal to",
        "coordinate geometry", "x-axis", "y-axis", "origin", "quadrant",
        "ordered pair", "coordinates", "abscissa", "ordinate",
        "unitary method", "direct proportion", "inverse proportion",
        "compound interest", "amount", "time period", "annual",
        "data handling", "bar graph", "pie chart", "histogram",
        "frequency", "class interval", "range", "probability",
        "congruent", "similar", "corresponding angles", "corresponding sides",
        "construction", "compass", "protractor", "bisector",
        "volume", "surface area", "curved surface area", "total surface area",
        "cylinder", "cone", "sphere", "hemisphere", "cuboid", "cube"
    ],
    
    "grade_6_8_science": [
        "cell", "cell membrane", "cytoplasm", "nucleus", "organelle",
        "tissue", "organ", "organ system", "organism",
        "unicellular", "multicellular", "prokaryotic", "eukaryotic",
        "nutrition", "autotrophic", "heterotrophic", "photosynthesis",
        "chlorophyll", "chloroplast", "stomata", "transpiration",
        "respiration", "aerobic", "anaerobic", "cellular respiration",
        "mitochondria", "glucose", "carbon dioxide", "atp",
        "reproduction", "sexual reproduction", "asexual reproduction",
        "pollination", "fertilization", "seed dispersal", "germination",
        "adolescence", "puberty", "hormone", "endocrine gland",
        "atom", "molecule", "element", "compound", "mixture",
        "physical change", "chemical change", "reaction", "reactant", "product",
        "acid", "base", "alkali", "indicator", "litmus paper",
        "ph scale", "neutral", "acidic", "basic", "salt formation",
        "metal", "non-metal", "metalloid", "conductor", "insulator",
        "corrosion", "rust", "oxidation", "reduction", "displacement",
        "motion", "rest", "speed", "velocity", "acceleration",
        "distance", "displacement", "uniform motion", "non-uniform motion",
        "force", "balanced force", "unbalanced force", "net force",
        "pressure", "thrust", "atmospheric pressure", "buoyancy",
        "work", "energy", "kinetic energy", "potential energy",
        "conservation of energy", "power", "joule", "watt",
        "current", "voltage", "resistance", "ohm's law",
        "series circuit", "parallel circuit", "ammeter", "voltmeter",
        "magnetic field", "electromagnet", "electric motor", "generator",
        "reflection", "refraction", "lens", "concave", "convex",
        "mirror", "plane mirror", "spherical mirror", "focal length",
        "image", "real image", "virtual image", "magnification"
    ],
    
    "grade_6_8_social": [
        "history", "archaeology", "excavation", "artifact", "manuscript",
        "inscription", "chronology", "era", "period", "century",
        "stone age", "paleolithic", "mesolithic", "neolithic",
        "chalcolithic", "bronze age", "iron age",
        "harappan civilization", "mohenjo-daro", "harappa", "lothal",
        "vedic age", "rigveda", "samaveda", "yajurveda", "atharvaveda",
        "mahajanapadas", "magadha", "vajji", "kosala", "kuru",
        "buddhism", "gautama buddha", "jainism", "mahavira",
        "mauryan empire", "chandragupta maurya", "ashoka", "kalinga war",
        "gupta empire", "chandragupta", "samudragupta", "golden age",
        "mughal empire", "babur", "humayun", "akbar", "jahangir",
        "shah jahan", "taj mahal", "aurangzeb", "decline",
        "delhi sultanate", "qutub-ud-din aibak", "iltutmish", "razia sultan",
        "bhakti movement", "sufi movement", "kabir", "guru nanak",
        "maratha empire", "shivaji", "peshwa", "swarajya",
        "british east india company", "battle of plassey", "battle of buxar",
        "colonialism", "imperialism", "exploitation", "drain of wealth",
        "revolt of 1857", "sepoy mutiny", "first war of independence",
        "indian national congress", "moderate", "extremist", "swaraj",
        "swadeshi movement", "partition of bengal", "boycott",
        "non-cooperation movement", "civil disobedience", "quit india",
        "freedom struggle", "mahatma gandhi", "jawaharlal nehru",
        "subhas chandra bose", "bhagat singh", "independence",
        "geography", "lithosphere", "atmosphere", "hydrosphere", "biosphere",
        "plate tectonics", "continental drift", "earthquake", "volcano",
        "rock", "igneous", "sedimentary", "metamorphic", "rock cycle",
        "soil", "soil formation", "weathering", "erosion", "conservation",
        "natural resources", "renewable", "non-renewable", "sustainable",
        "water cycle", "condensation", "precipitation", "groundwater",
        "population", "density", "distribution", "migration", "urbanization",
        "settlement", "rural", "urban", "town", "metropolitan",
        "agriculture", "crop", "kharif", "rabi", "irrigation",
        "green revolution", "fertilizer", "pesticide", "organic farming",
        "industries", "cottage industry", "small scale", "large scale",
        "manufacturing", "raw material", "finished product", "pollution",
        "civics", "democracy", "monarchy", "dictatorship", "republic",
        "preamble", "fundamental rights", "fundamental duties", "directive principles",
        "legislature", "executive", "judiciary", "parliament", "lok sabha",
        "rajya sabha", "member of parliament", "election", "vote",
        "secularism", "equality", "justice", "liberty", "fraternity"
    ],
    
    # ==================== GRADE 9-10: SECONDARY ====================
    "grade_9_10_math": [
        "number system", "real numbers", "euclid's division lemma",
        "fundamental theorem of arithmetic", "hcf and lcm",
        "polynomial", "linear", "quadratic", "cubic", "biquadratic",
        "zeros", "coefficient relation", "division algorithm",
        "remainder theorem", "factor theorem",
        "linear equation in two variables", "simultaneous equations",
        "substitution method", "elimination method", "cross multiplication",
        "quadratic equation", "discriminant", "nature of roots",
        "quadratic formula", "sum of roots", "product of roots",
        "arithmetic progression", "ap", "common difference", "nth term",
        "sum of n terms", "geometric progression", "gp", "common ratio",
        "coordinate geometry", "distance formula", "section formula",
        "midpoint formula", "area of triangle", "slope", "collinear",
        "triangle", "congruence", "similarity", "pythagoras theorem",
        "area", "heron's formula", "semi-perimeter",
        "circle", "chord", "diameter", "radius", "arc", "sector",
        "tangent", "secant", "point of contact", "alternate segment",
        "trigonometry", "trigonometric ratios", "sine", "cosine", "tangent",
        "cosecant", "secant", "cotangent", "complementary angles",
        "trigonometric identities", "pythagorean identity",
        "angle of elevation", "angle of depression", "line of sight",
        "mensuration", "lateral surface area", "frustum", "slant height",
        "statistics", "mean", "median", "mode", "cumulative frequency",
        "ogive", "mean deviation", "standard deviation", "variance",
        "probability", "theoretical probability", "experimental probability",
        "equally likely outcomes", "sample space", "event", "trial"
    ],
    
    "grade_9_10_science_physics": [
        "motion", "scalar", "vector", "position", "path length",
        "uniform motion", "non-uniform motion", "average speed", "instantaneous speed",
        "newton's laws", "first law", "inertia", "second law", "third law",
        "momentum", "conservation of momentum", "impulse",
        "gravitation", "universal law", "gravitational constant",
        "acceleration due to gravity", "free fall", "weight", "mass",
        "work", "work done", "joule", "energy transformation",
        "kinetic energy", "potential energy", "mechanical energy",
        "power", "commercial unit", "kilowatt hour",
        "sound", "longitudinal wave", "transverse wave", "amplitude",
        "frequency", "wavelength", "time period", "speed of sound",
        "reflection of sound", "echo", "reverberation", "ultrasound",
        "current electricity", "electric current", "potential difference",
        "circuit diagram", "ammeter", "voltmeter", "rheostat",
        "ohm's law", "resistance", "resistivity", "conductivity",
        "series connection", "parallel connection", "effective resistance",
        "heating effect", "electric power", "fuse", "short circuit",
        "magnetic effects", "magnetic field", "field lines", "right hand rule",
        "electromagnetic induction", "fleming's right hand rule",
        "fleming's left hand rule", "electric generator", "ac", "dc",
        "light", "ray", "beam", "reflection", "laws of reflection",
        "mirror formula", "magnification", "spherical mirror",
        "refraction", "laws of refraction", "refractive index", "snell's law",
        "total internal reflection", "critical angle", "lens formula",
        "power of lens", "dioptre", "dispersion", "spectrum", "prism",
        "human eye", "retina", "cornea", "iris", "pupil", "lens",
        "accommodation", "near point", "far point", "myopia", "hypermetropia",
        "presbyopia", "astigmatism", "defects of vision"
    ],
    
    "grade_9_10_science_chemistry": [
        "matter", "physical properties", "chemical properties",
        "pure substance", "impure substance", "homogeneous", "heterogeneous",
        "solution", "solvent", "solute", "concentration", "saturated",
        "unsaturated", "supersaturated", "solubility", "suspension", "colloid",
        "atom", "dalton's atomic theory", "law of conservation of mass",
        "law of constant proportions", "law of multiple proportions",
        "molecule", "atomicity", "molecular mass", "mole", "molar mass",
        "avogadro's number", "valency", "chemical formula", "ionic compound",
        "atomic structure", "electron", "proton", "neutron", "nucleus",
        "atomic number", "mass number", "isotope", "isobar", "isotone",
        "electronic configuration", "valence electron", "octet rule",
        "noble gas", "periodic table", "period", "group", "mendeleev",
        "modern periodic law", "alkali metals", "alkaline earth metals",
        "halogens", "noble gases", "transition elements",
        "atomic radius", "ionization energy", "electron affinity", "electronegativity",
        "chemical bonding", "ionic bond", "covalent bond", "polar", "non-polar",
        "coordinate bond", "metallic bond", "hydrogen bond",
        "chemical reaction", "reactants", "products", "balanced equation",
        "combination reaction", "decomposition reaction", "displacement reaction",
        "double displacement", "oxidation", "reduction", "redox reaction",
        "exothermic", "endothermic", "precipitation", "neutralization",
        "acids and bases", "indicator", "ph scale", "strong acid", "weak acid",
        "basicity", "acidity", "neutralization", "salt", "hydration",
        "metals and non-metals", "physical properties", "chemical properties",
        "reactivity series", "displacement", "corrosion", "prevention",
        "alloy", "brass", "bronze", "steel", "amalgam",
        "carbon compounds", "organic chemistry", "catenation", "tetravalency",
        "hydrocarbons", "saturated", "unsaturated", "alkane", "alkene", "alkyne",
        "homologous series", "functional group", "alcohol", "aldehyde", "ketone",
        "carboxylic acid", "ester", "ether", "nomenclature", "iupac",
        "isomerism", "chain isomerism", "position isomerism", "functional isomerism",
        "combustion", "oxidation", "addition reaction", "substitution reaction",
        "periodic classification", "dobereiner's triads", "newlands' octaves",
        "metallic character", "non-metallic character", "periodicity"
    ],
    
    "grade_9_10_science_biology": [
        "cell", "cell theory", "prokaryotic cell", "eukaryotic cell",
        "plasma membrane", "cell wall", "cytoplasm", "nucleus", "nucleolus",
        "chromosome", "gene", "dna", "rna", "protein synthesis",
        "endoplasmic reticulum", "golgi apparatus", "lysosome", "ribosome",
        "mitochondria", "plastid", "chloroplast", "vacuole", "centrosome",
        "diffusion", "osmosis", "plasmolysis", "endocytosis", "exocytosis",
        "tissue", "meristematic tissue", "permanent tissue", "simple tissue",
        "complex tissue", "parenchyma", "collenchyma", "sclerenchyma",
        "xylem", "phloem", "epidermis", "cork", "epithelial tissue",
        "connective tissue", "muscular tissue", "nervous tissue",
        "organ", "organ system", "digestive system", "mouth", "esophagus",
        "stomach", "small intestine", "large intestine", "liver", "pancreas",
        "digestion", "enzyme", "amylase", "pepsin", "trypsin", "lipase",
        "absorption", "assimilation", "egestion", "peristalsis",
        "respiration", "breathing", "inhalation", "exhalation",
        "trachea", "bronchi", "bronchioles", "alveoli", "diaphragm",
        "gaseous exchange", "oxygen", "carbon dioxide", "haemoglobin",
        "transportation", "blood", "plasma", "rbc", "wbc", "platelets",
        "blood vessels", "arteries", "veins", "capillaries", "blood pressure",
        "heart", "atrium", "ventricle", "valve", "cardiac cycle",
        "lymph", "lymphatic system", "immunity", "antibody", "antigen",
        "excretion", "kidney", "nephron", "ureter", "urinary bladder", "urethra",
        "urine formation", "glomerular filtration", "reabsorption", "secretion",
        "osmoregulation", "dialysis", "artificial kidney",
        "nervous system", "neuron", "dendrite", "axon", "synapse",
        "central nervous system", "peripheral nervous system",
        "brain", "cerebrum", "cerebellum", "medulla", "spinal cord",
        "reflex action", "reflex arc", "voluntary action", "involuntary action",
        "sense organs", "eye", "ear", "nose", "tongue", "skin",
        "receptor", "stimulus", "response", "coordination",
        "hormones", "endocrine glands", "exocrine glands",
        "pituitary", "thyroid", "parathyroid", "adrenal", "pancreas",
        "gonads", "testosterone", "estrogen", "progesterone",
        "growth hormone", "insulin", "thyroxine", "adrenaline",
        "plant hormones", "auxin", "gibberellin", "cytokinin", "abscisic acid",
        "reproduction", "asexual reproduction", "sexual reproduction",
        "binary fission", "budding", "fragmentation", "regeneration",
        "spore formation", "vegetative propagation", "tissue culture",
        "flower", "stamen", "anther", "filament", "pistil", "ovary",
        "pollination", "self pollination", "cross pollination", "fertilization",
        "fruit", "seed", "embryo", "cotyledon", "germination",
        "reproduction in humans", "male reproductive system", "female reproductive system",
        "testis", "sperm", "ovary", "ovum", "menstruation", "menstrual cycle",
        "fertilization", "zygote", "implantation", "pregnancy", "placenta",
        "heredity", "genetics", "variation", "mendel", "trait",
        "dominant", "recessive", "genotype", "phenotype", "allele",
        "monohybrid cross", "dihybrid cross", "sex determination",
        "evolution", "origin of life", "natural selection", "darwin",
        "acquired characters", "inherited characters", "speciation",
        "homologous organs", "analogous organs", "vestigial organs",
        "fossil", "embryology", "molecular evidence",
        "ecosystem", "biotic", "abiotic", "producers", "consumers",
        "decomposers", "food chain", "food web", "trophic level",
        "energy flow", "10% law", "biodegradable", "non-biodegradable",
        "ozone layer", "ozone depletion", "greenhouse effect", "global warming"
    ],
    
    # ==================== STUDY ACTIONS & METHODS ====================
    "study_verbs": [
        "study", "learn", "read", "write", "practice", "solve", "calculate",
        "understand", "comprehend", "memorize", "remember", "recall",
        "revise", "review", "prepare", "analyze", "examine",
        "observe", "investigate", "research", "explore", "discover",
        "explain", "describe", "define", "identify", "classify",
        "compare", "contrast", "differentiate", "distinguish",
        "evaluate", "assess", "measure", "estimate", "verify",
        "apply", "implement", "demonstrate", "illustrate", "draw",
        "sketch", "label", "diagram", "map", "plot", "graph"
    ],
    
    "study_materials": [
        "textbook", "reference book", "guidebook", "workbook", "notebook",
        "notes", "study notes", "revision notes", "class notes",
        "chapter", "lesson", "topic", "unit", "module", "section",
        "exercise", "question", "answer", "solution", "example",
        "problem", "sum", "equation", "formula", "theorem",
        "definition", "concept", "theory", "principle", "law",
        "diagram", "figure", "chart", "table", "graph",
        "map", "atlas", "globe", "model", "specimen",
        "worksheet", "assignment", "homework", "project", "activity",
        "pdf", "document", "presentation", "slides", "ppt"
    ],
    
    "exam_terms": [
        "exam", "examination", "test", "quiz", "assessment",
        "mid-term", "final", "board exam", "annual exam", "unit test",
        "class test", "surprise test", "viva", "oral", "practical",
        "marks", "grade", "score", "percentage", "rank",
        "pass", "fail", "result", "report card", "marksheet",
        "syllabus", "curriculum", "portion", "topics covered",
        "question paper", "answer sheet", "omr sheet", "answer key",
        "objective", "subjective", "mcq", "multiple choice", "true false",
        "short answer", "long answer", "essay type", "descriptive",
        "time limit", "duration", "negative marking", "full marks"
    ],
    
    "educational_platforms": [
        "ncert", "cbse", "icse", "state board", "nios",
        "google classroom", "zoom", "microsoft teams", "google meet",
        "online class", "virtual class", "e-learning", "digital learning",
        "byju's", "vedantu", "unacademy", "khan academy", "topper",
        "meritnation", "extramarks", "doubtnut", "toppr", "embibe",
        "youtube education", "educational video", "tutorial", "lecture",
        "edutech", "educational technology", "smart class", "digital board"
    ],
    
    "school_activities": [
        "attendance", "present", "absent", "leave", "late",
        "timetable", "period", "recess", "break", "lunch break",
        "assembly", "prayer", "national anthem", "pledge",
        "roll number", "admission number", "id card", "uniform",
        "library", "laboratory", "lab", "computer lab", "science lab",
        "sports", "games", "physical education", "yoga", "drill",
        "annual day", "sports day", "cultural program", "competition",
        "debate", "speech", "essay writing", "drawing", "painting",
        "science exhibition", "project presentation", "model making"
    ]
}

# ==================== SAMPLE SENTENCES ====================
STUDYING_SAMPLES = [
    # Math samples
    "chapter 5 quadratic equations exercise 5.3 solve using formula method",
    "trigonometry class 10 find sine cosine tangent ratios example problems",
    "coordinate geometry distance formula derivation proof mathematics",
    "arithmetic progression sum of n terms formula application grade 10",
    "probability theoretical experimental equally likely outcomes cbse",
    "surface area and volume of cone cylinder sphere mensuration",
    "linear equations in two variables graphical method solution",
    "pythagoras theorem proof right angled triangle application",
    "rational numbers operations addition subtraction multiplication",
    "statistics mean median mode frequency distribution table",
    
    # Science samples
    "photosynthesis process chlorophyll light energy glucose production biology",
    "periodic table mendeleev classification elements groups periods chemistry",
    "newton's laws of motion inertia acceleration force physics class 9",
    "human digestive system organs enzymes digestion absorption process",
    "chemical reactions types combination decomposition displacement oxidation",
    "reproduction in plants pollination fertilization seed formation botany",
    "electricity current voltage resistance ohm's law series parallel circuit",
    "cell structure prokaryotic eukaryotic organelles functions biology",
    "acid base salt ph scale indicator litmus paper chemistry",
    "light reflection refraction mirror lens image formation physics",
    
    # English samples
    "english grammar tenses present past future continuous perfect",
    "comprehension passage reading understanding questions answers extract",
    "letter writing formal informal application leave principal",
    "essay composition paragraph structure introduction body conclusion",
    "parts of speech noun pronoun verb adjective adverb",
    "literature poem prose drama comprehension appreciation critical analysis",
    "vocabulary synonym antonym homonym prefix suffix word formation",
    "sentence structure subject predicate clause phrase complex simple",
    "punctuation comma full stop question mark quotation marks",
    "active passive voice transformation rules examples practice",
    
    # Social Science samples
    "mughal empire akbar administration art culture history class 7",
    "indian geography rivers mountains plains plateaus physical features",
    "democracy government constitution fundamental rights duties civics",
    "freedom struggle gandhi non-cooperation civil disobedience movement",
    "ancient india harappan civilization vedic period mauryan gupta",
    "world war causes effects league of nations history grade 10",
    "natural resources renewable non-renewable conservation sustainable",
    "population distribution density migration rural urban settlement",
    "agriculture crops irrigation green revolution farming india",
    "medieval history delhi sultanate bhakti sufi movement culture"
]

print("‚úÖ COMPREHENSIVE STUDYING VOCABULARY LOADED")
print(f"üìö Total categories: {len(STUDYING_VOCAB)}")
print(f"üìù Total unique terms: {sum(len(v) for v in STUDYING_VOCAB.values())}")
print(f"üí° Sample sentences: {len(STUDYING_SAMPLES)}")


‚úÖ COMPREHENSIVE STUDYING VOCABULARY LOADED
üìö Total categories: 21
üìù Total unique terms: 1664
üí° Sample sentences: 40


In [23]:
"""
COMPREHENSIVE CODING VOCABULARY FOR GRADES 1-10
Age-appropriate programming concepts from block coding to Python
Total: 1000+ programming terms and concepts
"""

CODING_VOCAB = {
    
    # ==================== BLOCK CODING (Grade 1-5) ====================
    "scratch_basics": [
        "scratch", "scratch jr", "scratch 3.0", "scratch cat", "sprite",
        "stage", "backdrop", "costume", "sound", "music",
        "block", "code block", "snap", "connect", "stack",
        "script", "program", "project", "create", "remix",
        "share", "community", "studio", "my stuff", "backpack",
        "play", "stop", "flag", "green flag", "stop sign",
        "pause", "single stepping", "turbo mode", "full screen"
    ],
    
    "scratch_motion": [
        "move", "move 10 steps", "turn", "turn right", "turn left",
        "go to", "go to x y", "go to mouse pointer", "go to random position",
        "glide", "glide to", "point in direction", "point towards",
        "change x by", "change y by", "set x to", "set y to",
        "x position", "y position", "direction", "rotation style",
        "all around", "left-right", "don't rotate", "on edge bounce"
    ],
    
    "scratch_looks": [
        "say", "say for 2 seconds", "think", "think for 2 seconds",
        "show", "hide", "switch costume", "next costume", "costume number",
        "switch backdrop", "next backdrop", "backdrop name", "backdrop number",
        "size", "change size by", "set size to", "effect",
        "change color effect", "change fisheye effect", "change whirl effect",
        "change pixelate effect", "change mosaic effect", "change brightness",
        "clear graphic effects", "go to front", "go to back", "layer"
    ],
    
    "scratch_sound": [
        "play sound", "play sound until done", "stop all sounds",
        "change pitch effect", "change pan left-right effect",
        "clear sound effects", "volume", "change volume by", "set volume to",
        "meow", "pop", "boing", "drum", "instrument", "note", "beat"
    ],
    
    "scratch_events": [
        "when green flag clicked", "when this sprite clicked", "when stage clicked",
        "when backdrop switches to", "when timer greater than",
        "when loudness greater than", "when i receive", "broadcast",
        "broadcast and wait", "message", "event", "trigger", "start"
    ],
    
    "scratch_control": [
        "wait", "wait 1 second", "wait until", "repeat", "repeat 10",
        "forever", "if then", "if else", "condition", "loop",
        "stop all", "stop this script", "stop other scripts in sprite",
        "create clone of myself", "when i start as a clone", "delete this clone"
    ],
    
    "scratch_sensing": [
        "touching", "touching mouse pointer", "touching edge", "touching color",
        "color is touching", "distance to", "distance to mouse pointer",
        "ask and wait", "answer", "key pressed", "space key", "mouse down",
        "mouse x", "mouse y", "loudness", "timer", "reset timer",
        "current year", "current month", "current date", "current day of week",
        "username", "video motion", "video direction", "set video transparency"
    ],
    
    "scratch_operators": [
        "add", "subtract", "multiply", "divide", "pick random",
        "greater than", "less than", "equal", "and", "or", "not",
        "join", "letter of", "length of", "contains", "mod", "round",
        "mathematical function", "absolute", "floor", "ceiling", "square root",
        "sin", "cos", "tan", "asin", "acos", "atan", "ln", "log", "e", "10"
    ],
    
    "scratch_variables": [
        "variable", "make a variable", "set variable to", "change variable by",
        "show variable", "hide variable", "list", "make a list", "add to list",
        "delete from list", "delete all of list", "insert at list",
        "replace item of list", "item of list", "item number in list",
        "length of list", "list contains", "show list", "hide list"
    ],
    
    "scratch_custom_blocks": [
        "my blocks", "make a block", "custom block", "define",
        "run without screen refresh", "add an input", "number or text",
        "boolean", "add a label", "parameter", "argument"
    ],
    
    "blockly_concepts": [
        "blockly", "google blockly", "code.org", "hour of code",
        "visual programming", "drag and drop", "puzzle piece",
        "workspace", "toolbox", "trash", "undo", "redo",
        "maze", "artist", "flappy bird", "angry birds", "frozen",
        "minecraft hour of code", "star wars", "dance party"
    ],
    
    "tynker_platform": [
        "tynker", "tynker junior", "code this", "make art", "make music",
        "puppet", "actor", "physics", "collision", "game design",
        "animation", "story", "comic", "joke machine", "drawing"
    ],
    
    # ==================== PYTHON BASICS (Grade 6-8) ====================
    "python_keywords": [
        "python", "python3", "idle", "interpreter", "shell",
        "and", "as", "assert", "break", "class", "continue",
        "def", "del", "elif", "else", "except", "False",
        "finally", "for", "from", "global", "if", "import",
        "in", "is", "lambda", "None", "nonlocal", "not",
        "or", "pass", "raise", "return", "True", "try",
        "while", "with", "yield", "async", "await"
    ],
    
    "python_data_types": [
        "integer", "int", "float", "floating point", "string", "str",
        "boolean", "bool", "true", "false", "none", "null",
        "list", "tuple", "dictionary", "dict", "set", "frozenset",
        "complex", "bytes", "bytearray", "range", "type", "casting"
    ],
    
    "python_operators": [
        "addition", "subtraction", "multiplication", "division",
        "floor division", "modulus", "exponentiation", "power",
        "equals", "not equals", "greater than", "less than",
        "greater than or equal to", "less than or equal to",
        "and", "or", "not", "is", "is not", "in", "not in",
        "plus equals", "minus equals", "times equals", "divide equals",
        "increment", "decrement", "assignment operator", "comparison operator"
    ],
    
    "python_built_in_functions": [
        "print", "input", "len", "type", "int", "float", "str", "bool",
        "abs", "max", "min", "sum", "round", "pow", "divmod",
        "range", "list", "tuple", "dict", "set", "sorted", "reversed",
        "enumerate", "zip", "map", "filter", "all", "any",
        "chr", "ord", "hex", "oct", "bin", "ascii",
        "format", "eval", "exec", "compile", "open", "help", "dir"
    ],
    
    "python_string_methods": [
        "upper", "lower", "title", "capitalize", "swapcase",
        "strip", "lstrip", "rstrip", "replace", "split", "join",
        "find", "index", "count", "startswith", "endswith",
        "isalpha", "isdigit", "isalnum", "isspace", "islower", "isupper",
        "center", "ljust", "rjust", "zfill", "format", "encode", "decode"
    ],
    
    "python_list_methods": [
        "append", "extend", "insert", "remove", "pop", "clear",
        "index", "count", "sort", "reverse", "copy",
        "slicing", "indexing", "negative index", "step", "concatenation"
    ],
    
    "python_dict_methods": [
        "keys", "values", "items", "get", "update", "pop", "popitem",
        "clear", "copy", "fromkeys", "setdefault", "key value pair"
    ],
    
    "python_control_flow": [
        "if statement", "elif statement", "else statement", "nested if",
        "for loop", "while loop", "nested loop", "break statement",
        "continue statement", "pass statement", "range function",
        "iteration", "iterator", "iterable", "loop variable", "condition"
    ],
    
    "python_functions": [
        "function", "def keyword", "define function", "function name",
        "parameter", "argument", "positional argument", "keyword argument",
        "default argument", "return statement", "return value", "void function",
        "function call", "function definition", "docstring", "local variable",
        "global variable", "scope", "recursion", "recursive function",
        "lambda function", "anonymous function", "arrow function"
    ],
    
    "python_modules": [
        "module", "import", "from import", "as keyword", "alias",
        "math module", "random module", "time module", "datetime module",
        "os module", "sys module", "turtle module", "tkinter module",
        "statistics module", "collections module", "itertools module",
        "built-in module", "third-party module", "custom module",
        "package", "library", "pip", "install", "requirements"
    ],
    
    "python_file_handling": [
        "file", "open", "close", "read", "write", "append",
        "file mode", "r mode", "w mode", "a mode", "x mode",
        "rb mode", "wb mode", "with statement", "context manager",
        "readline", "readlines", "writelines", "seek", "tell",
        "text file", "binary file", "csv file", "json file"
    ],
    
    "python_exception_handling": [
        "exception", "error", "syntax error", "runtime error", "logical error",
        "try block", "except block", "finally block", "else block",
        "raise exception", "custom exception", "exception handling",
        "ValueError", "TypeError", "IndexError", "KeyError", "NameError",
        "ZeroDivisionError", "FileNotFoundError", "ImportError", "AttributeError"
    ],
    
    "python_oop": [
        "class", "object", "instance", "attribute", "method",
        "constructor", "__init__", "self parameter", "instance variable",
        "class variable", "instance method", "class method", "static method",
        "inheritance", "parent class", "child class", "super", "override",
        "encapsulation", "abstraction", "polymorphism", "private", "public"
    ],
    
    # ==================== PROGRAMMING CONCEPTS ====================
    "fundamental_concepts": [
        "algorithm", "flowchart", "pseudocode", "logic", "step by step",
        "sequence", "selection", "iteration", "input", "output", "process",
        "variable", "constant", "expression", "statement", "comment",
        "syntax", "semantics", "compile", "execute", "run", "debug",
        "bug", "error", "warning", "trace", "breakpoint", "debugging",
        "code", "program", "script", "source code", "executable"
    ],
    
    "data_structures": [
        "data structure", "array", "list", "linked list", "stack", "queue",
        "tree", "binary tree", "graph", "hash table", "dictionary",
        "index", "element", "node", "pointer", "reference",
        "push", "pop", "enqueue", "dequeue", "traverse", "search",
        "insertion", "deletion", "sorting", "bubble sort", "selection sort",
        "linear search", "binary search", "time complexity", "space complexity"
    ],
    
    "problem_solving": [
        "problem", "solution", "approach", "strategy", "pattern",
        "decomposition", "abstraction", "pattern recognition", "algorithm design",
        "computational thinking", "logical thinking", "analytical thinking",
        "test case", "edge case", "corner case", "validation", "verification",
        "optimize", "efficient", "scalable", "maintainable", "readable"
    ],
    
    # ==================== DEVELOPMENT TOOLS ====================
    "ides_and_editors": [
        "ide", "integrated development environment", "editor", "text editor",
        "visual studio code", "vs code", "vscode", "pycharm", "sublime text",
        "atom", "notepad++", "vim", "emacs", "geany", "thonny",
        "spyder", "jupyter notebook", "jupyter lab", "google colab",
        "code editor", "syntax highlighting", "autocomplete", "intellisense",
        "code completion", "refactoring", "code formatting", "linting"
    ],
    
    "python_environments": [
        "python idle", "interactive shell", "command line", "terminal",
        "console", "repl", "read eval print loop", "interpreter",
        "python interpreter", "python shell", "ipython", "anaconda",
        "miniconda", "virtual environment", "venv", "conda", "pip",
        "package manager", "dependency", "requirements.txt"
    ],
    
    "version_control": [
        "version control", "git", "github", "repository", "repo",
        "commit", "push", "pull", "clone", "fork", "branch", "merge",
        "pull request", "issue", "readme", "markdown", "license",
        "open source", "collaboration", "code review", "diff", "changelog"
    ],
    
    "online_platforms": [
        "replit", "repl.it", "trinket", "trinket.io", "code playground",
        "online compiler", "online ide", "coding ground", "jdoodle",
        "ideone", "code pen", "code sandbox", "stackblitz",
        "glitch", "codeanywhere", "cloud9", "gitpod"
    ],
    
    # ==================== CODING PROJECTS (Age-Appropriate) ====================
    "beginner_projects": [
        "hello world", "calculator", "simple calculator", "basic calculator",
        "number guessing game", "guess the number", "random number",
        "rock paper scissors", "rps game", "choice game",
        "even odd checker", "prime number checker", "palindrome checker",
        "factorial calculator", "fibonacci series", "armstrong number",
        "sum of digits", "reverse number", "swap two numbers",
        "temperature converter", "fahrenheit to celsius", "unit converter",
        "simple interest calculator", "compound interest", "percentage calculator"
    ],
    
    "intermediate_projects": [
        "tic tac toe", "noughts and crosses", "x and o game",
        "hangman game", "word guessing game", "letter game",
        "quiz program", "mcq quiz", "trivia game", "question answer",
        "to-do list", "task manager", "reminder app", "notes app",
        "password generator", "random password", "strong password",
        "password checker", "password validator", "password strength",
        "story generator", "mad libs", "random story", "text adventure",
        "dice simulator", "coin flip", "random choice", "lucky draw",
        "countdown timer", "stopwatch", "alarm clock", "digital clock",
        "simple chatbot", "echo bot", "question bot", "conversation"
    ],
    
    "graphics_projects": [
        "turtle graphics", "turtle drawing", "shapes drawing", "pattern drawing",
        "spiral", "circle", "square", "triangle", "star", "polygon",
        "random walk", "fractal", "koch snowflake", "sierpinski triangle",
        "animation", "moving sprite", "bouncing ball", "moving object",
        "color changing", "rainbow colors", "gradient", "fill color",
        "spirograph", "mandala", "geometric art", "pixel art"
    ],
    
    "game_projects": [
        "snake game", "snake and ladder", "board game", "card game",
        "memory game", "matching game", "puzzle game", "maze game",
        "pong", "breakout", "brick breaker", "paddle game",
        "flappy bird", "jumping game", "obstacle game", "endless runner",
        "space invaders", "shooter game", "alien game", "arcade game",
        "platformer", "jump and run", "side scroller", "2d game"
    ],
    
    "data_projects": [
        "student database", "contact book", "phonebook", "address book",
        "library management", "book catalog", "inventory system",
        "expense tracker", "budget calculator", "money manager",
        "grade calculator", "marks calculator", "percentage finder",
        "attendance system", "roll call", "student records",
        "file organizer", "file manager", "folder sorter", "file search"
    ],
    
    # ==================== PYTHON LIBRARIES (Beginner-Friendly) ====================
    "turtle_module": [
        "turtle", "screen", "forward", "backward", "left", "right",
        "penup", "pendown", "pensize", "pencolor", "fillcolor",
        "begin_fill", "end_fill", "circle", "dot", "stamp", "clearstamp",
        "speed", "hideturtle", "showturtle", "shape", "goto", "home",
        "position", "xcor", "ycor", "heading", "setheading", "reset", "clear"
    ],
    
    "random_module": [
        "random", "randint", "random integer", "choice", "random choice",
        "shuffle", "sample", "uniform", "random float", "seed",
        "randrange", "random range", "random selection", "probability"
    ],
    
    "math_module": [
        "math", "pi", "e", "sqrt", "square root", "pow", "power",
        "ceil", "ceiling", "floor", "round", "abs", "absolute",
        "factorial", "gcd", "greatest common divisor", "lcm",
        "sin", "cos", "tan", "degrees", "radians", "log", "exp"
    ],
    
    "time_module": [
        "time", "sleep", "time.sleep", "delay", "pause",
        "current time", "timestamp", "epoch", "localtime", "gmtime",
        "strftime", "strptime", "time format", "date format"
    ],
    
    "tkinter_basics": [
        "tkinter", "gui", "graphical user interface", "window", "frame",
        "label", "button", "entry", "text box", "canvas", "menu",
        "pack", "grid", "place", "geometry", "title", "mainloop",
        "click event", "button click", "event handler", "callback"
    ],
    
    # ==================== CODING TERMINOLOGY ====================
    "code_structure": [
        "indentation", "whitespace", "tab", "space", "block", "code block",
        "nested", "nesting", "hierarchy", "parent", "child", "sibling",
        "line", "line of code", "statement", "expression", "declaration",
        "definition", "implementation", "body", "header", "footer"
    ],
    
    "naming_conventions": [
        "variable name", "function name", "class name", "module name",
        "camel case", "snake case", "pascal case", "kebab case",
        "identifier", "naming rules", "reserved words", "keywords",
        "meaningful name", "descriptive name", "self-documenting"
    ],
    
    "code_quality": [
        "clean code", "readable code", "maintainable code", "efficient code",
        "best practices", "coding standards", "style guide", "pep8",
        "documentation", "docstring", "inline comment", "block comment",
        "refactor", "optimize", "simplify", "modularize", "reusable"
    ],
    
    "testing_debugging": [
        "test", "unit test", "test case", "assert", "assertion",
        "debug", "debugger", "breakpoint", "step over", "step into",
        "watch", "inspect", "trace", "stack trace", "error message",
        "exception", "catch", "handle error", "graceful degradation"
    ],
    
    # ==================== PROGRAMMING PARADIGMS ====================
    "programming_styles": [
        "procedural programming", "functional programming", "object oriented",
        "imperative", "declarative", "structured programming",
        "modular programming", "event-driven", "concurrent programming"
    ],
    
    "code_patterns": [
        "pattern", "design pattern", "algorithm pattern", "code template",
        "boilerplate", "scaffold", "skeleton code", "starter code",
        "recursion pattern", "iteration pattern", "accumulator pattern"
    ],
    
    # ==================== WEB BASICS (Introduction) ====================
    "web_intro": [
        "html", "css", "javascript", "web page", "website",
        "browser", "chrome", "firefox", "safari", "edge",
        "url", "link", "hyperlink", "click", "navigate",
        "tag", "element", "attribute", "heading", "paragraph",
        "image", "video", "audio", "button", "form", "input"
    ],
    
    # ==================== COMPUTATIONAL THINKING ====================
    "thinking_skills": [
        "computational thinking", "logical reasoning", "problem decomposition",
        "pattern matching", "abstraction", "algorithm design", "debugging mindset",
        "trial and error", "systematic approach", "step-by-step thinking",
        "cause and effect", "input output relationship", "black box",
        "flowchart thinking", "sequential thinking", "conditional thinking"
    ],
    
    # ==================== CODE ACTIONS ====================
    "coding_verbs": [
        "code", "program", "write code", "type", "compile", "run", "execute",
        "debug", "test", "fix", "solve", "implement", "develop", "build",
        "create", "make", "design", "plan", "think", "analyze",
        "declare", "define", "initialize", "assign", "update", "modify",
        "call", "invoke", "return", "output", "display", "show", "print",
        "input", "read", "get", "fetch", "store", "save", "load",
        "iterate", "loop", "repeat", "traverse", "search", "find",
        "sort", "arrange", "organize", "compare", "check", "validate",
        "optimize", "improve", "refactor", "simplify", "comment", "document"
    ],
    
    # ==================== LEARNING RESOURCES ====================
    "learning_platforms": [
        "code.org", "scratch.mit.edu", "khan academy", "codecademy",
        "freecodecamp", "w3schools", "geeksforgeeks", "tutorialspoint",
        "python.org", "python documentation", "official docs", "tutorial",
        "video tutorial", "youtube coding", "online course", "coding bootcamp",
        "practice problems", "coding challenges", "hackerrank", "codewars",
        "leetcode", "project euler", "codingame", "topcoder"
    ],
    
    # ==================== COMMON ERRORS ====================
    "error_types": [
        "syntax error", "indentation error", "name error", "type error",
        "value error", "index error", "key error", "attribute error",
        "zero division error", "import error", "file not found", "permission error",
        "runtime error", "logical error", "infinite loop", "stack overflow",
        "memory error", "timeout error", "connection error"
    ],
    
    # ==================== COMPUTER SCIENCE BASICS ====================
    "cs_concepts": [
        "computer", "cpu", "processor", "memory", "ram", "storage", "hard disk",
        "input device", "output device", "keyboard", "mouse", "monitor",
        "binary", "bit", "byte", "kilobyte", "megabyte", "gigabyte",
        "ascii", "unicode", "character encoding", "text encoding",
        "hardware", "software", "operating system", "application", "program",
        "network", "internet", "wifi", "cloud", "server", "client"
    ]
}

# ==================== CODING SAMPLE SENTENCES ====================
CODING_SAMPLES = [
    # Python basics
    "python print hello world function input output statement",
    "for loop range iteration variable i counter increment",
    "if elif else condition boolean true false comparison",
    "def function definition parameter argument return value",
    "list append method index element data structure array",
    "while loop break continue statement iteration condition",
    "import random randint choice module library function",
    "string concatenation format upper lower split join",
    "dictionary key value pair get update items method",
    "try except error handling exception valueError typeError",
    
    # Block coding
    "scratch sprite move 10 steps forward turn right animation",
    "repeat forever loop if then condition sensing touching",
    "when green flag clicked event broadcast message receive",
    "change costume switch backdrop looks effect sprite",
    "ask and wait answer input sensing keyboard mouse",
    "variable set to change by show hide list add",
    "go to x y glide motion coordinate stage position",
    "play sound until done volume music note instrument",
    "create clone delete this clone stamping graphics",
    "blockly code.org maze loop repeat times blocks",
    
    # Projects
    "calculator program add subtract multiply divide operation",
    "number guessing game random randint input compare",
    "rock paper scissors choice game user computer winner",
    "tic tac toe board game player move win check",
    "password generator random string uppercase lowercase digits",
    "turtle graphics circle square triangle polygon drawing",
    "quiz program question answer score correct wrong",
    "to-do list task add remove display menu option",
    "simple chatbot input response if elif greeting",
    "countdown timer sleep delay second minute display",
    
    # Concepts
    "algorithm flowchart pseudocode logic step by step sequence",
    "variable assignment data type integer float string boolean",
    "function parameter return reusable code modular programming",
    "loop iteration for while repeat until condition",
    "array list index element append insert remove search",
    "debugging error syntax runtime logical trace fix",
    "class object method attribute self init constructor",
    "file handling open read write append close mode",
    "module import library package pip install dependency",
    "version control git github repository commit push pull",
    
    # Development
    "vs code editor python file .py run execute terminal",
    "jupyter notebook cell code markdown output interactive",
    "idle python shell interpreter execute command prompt",
    "replit online ide collaborative coding project share",
    "github repository clone fork commit pull request issue",
    "syntax highlighting autocomplete intellisense code completion",
    "debugger breakpoint step over inspect variable watch",
    "virtual environment venv conda activate deactivate install",
    "documentation docstring comment explain code purpose",
    "refactor optimize clean code readable maintainable best practice"
]

print("‚úÖ COMPREHENSIVE CODING VOCABULARY LOADED")
print(f"üíª Total categories: {len(CODING_VOCAB)}")
print(f"üìù Total unique terms: {sum(len(v) for v in CODING_VOCAB.values())}")
print(f"üí° Sample sentences: {len(CODING_SAMPLES)}")


‚úÖ COMPREHENSIVE CODING VOCABULARY LOADED
üíª Total categories: 54
üìù Total unique terms: 1250
üí° Sample sentences: 50


In [24]:
"""
COMPREHENSIVE GAMING VOCABULARY
Universal gaming terms, UI elements, actions, and popular games
Total: 1000+ gaming-related terms
"""

GAMING_VOCAB = {
    
    # ==================== GAME UI ELEMENTS ====================
    "hud_elements": [
        "hud", "heads up display", "health bar", "hp bar", "health points",
        "mana bar", "mp bar", "magic points", "energy bar", "stamina bar",
        "experience bar", "xp bar", "level progress", "progress bar",
        "score", "points", "high score", "top score", "best score",
        "lives", "remaining lives", "extra life", "life counter",
        "ammo", "ammunition", "bullets", "magazine", "reload",
        "timer", "countdown", "time limit", "remaining time", "clock",
        "minimap", "radar", "compass", "map", "location indicator",
        "objective marker", "waypoint", "quest marker", "mission marker",
        "crosshair", "reticle", "aim", "targeting", "scope"
    ],
    
    "menu_elements": [
        "main menu", "start menu", "pause menu", "settings menu", "options menu",
        "start game", "new game", "continue", "resume", "load game", "save game",
        "exit", "quit", "back", "return", "cancel", "confirm", "accept",
        "play", "stop", "restart", "retry", "try again", "replay",
        "difficulty", "easy", "normal", "hard", "expert", "nightmare",
        "volume", "sound", "music", "effects", "sfx", "audio settings",
        "graphics", "video settings", "resolution", "fullscreen", "windowed",
        "brightness", "contrast", "gamma", "quality", "low", "medium", "high",
        "controls", "key bindings", "button mapping", "sensitivity", "invert",
        "profile", "account", "user", "player name", "avatar", "username"
    ],
    
    "notifications": [
        "achievement unlocked", "trophy earned", "badge collected",
        "level up", "rank up", "tier up", "promotion", "upgrade available",
        "mission complete", "quest complete", "objective complete", "task done",
        "new item", "item unlocked", "item found", "loot", "reward",
        "bonus", "multiplier", "combo", "streak", "perfect", "flawless",
        "checkpoint reached", "save point", "autosave", "saved",
        "loading", "please wait", "connecting", "synchronizing",
        "error", "connection lost", "disconnected", "reconnecting",
        "warning", "alert", "notification", "message", "announcement"
    ],
    
    "inventory_shop": [
        "inventory", "bag", "backpack", "storage", "stash", "vault",
        "item", "object", "gear", "equipment", "tools", "supplies",
        "weapon", "armor", "shield", "helmet", "boots", "gloves",
        "potion", "elixir", "healing potion", "mana potion", "buff",
        "consumable", "use item", "equip", "unequip", "drop", "discard",
        "shop", "store", "merchant", "vendor", "buy", "sell", "trade",
        "price", "cost", "currency", "coins", "gold", "gems", "crystals",
        "purchase", "transaction", "checkout", "cart", "owned", "locked",
        "upgrade", "enhance", "improve", "forge", "craft", "combine"
    ],
    
    # ==================== PLAYER ACTIONS ====================
    "movement": [
        "move", "walk", "run", "sprint", "jog", "dash", "rush",
        "jump", "hop", "leap", "double jump", "wall jump", "bounce",
        "crouch", "duck", "slide", "crawl", "prone", "stealth",
        "climb", "scale", "ascend", "descend", "grab", "hang",
        "swim", "dive", "float", "surface", "underwater", "paddle",
        "fly", "glide", "hover", "soar", "boost", "thrust",
        "forward", "backward", "left", "right", "up", "down",
        "turn", "rotate", "pivot", "spin", "flip", "roll"
    ],
    
    "combat_actions": [
        "attack", "strike", "hit", "slash", "stab", "punch", "kick",
        "shoot", "fire", "blast", "launch", "throw", "hurl", "toss",
        "aim", "target", "lock on", "focus", "charge", "power up",
        "defend", "block", "parry", "dodge", "evade", "roll away",
        "counter", "counterattack", "riposte", "reflect", "deflect",
        "combo", "chain attack", "special move", "ultimate", "super move",
        "critical hit", "crit", "headshot", "weak point", "vulnerable",
        "damage", "hurt", "wound", "injury", "knockout", "ko", "eliminate",
        "heal", "recover", "regenerate", "restore", "revive", "resurrect"
    ],
    
    "interaction": [
        "interact", "use", "activate", "trigger", "press", "hold", "tap",
        "open", "close", "push", "pull", "grab", "pick up", "collect",
        "talk", "speak", "chat", "dialogue", "conversation", "cutscene",
        "examine", "inspect", "look at", "observe", "read", "scan",
        "unlock", "open door", "break", "destroy", "smash", "demolish",
        "build", "construct", "place", "craft", "create", "make",
        "mine", "gather", "harvest", "farm", "collect resources",
        "fish", "hunt", "forage", "search", "explore", "discover"
    ],
    
    # ==================== GAME MECHANICS ====================
    "character_stats": [
        "character", "player", "avatar", "hero", "protagonist", "warrior",
        "level", "lvl", "experience", "exp", "xp", "experience points",
        "health", "hp", "hit points", "life", "vitality", "endurance",
        "mana", "mp", "magic", "energy", "power", "resource",
        "attack", "atk", "attack power", "damage", "dps", "damage per second",
        "defense", "def", "armor", "resistance", "protection", "guard",
        "speed", "velocity", "agility", "dexterity", "quickness",
        "strength", "str", "might", "power", "force", "muscle",
        "intelligence", "int", "wisdom", "mind", "magic power",
        "luck", "fortune", "critical chance", "crit rate", "accuracy"
    ],
    
    "progression": [
        "level up", "gain level", "rank up", "tier up", "advance",
        "skill tree", "talent tree", "ability tree", "progression path",
        "skill point", "talent point", "ability point", "upgrade point",
        "unlock", "available", "locked", "requirement", "prerequisite",
        "class", "job", "profession", "role", "specialization", "spec",
        "prestige", "rebirth", "new game plus", "ng+", "veteran",
        "mastery", "expertise", "proficiency", "rank", "rating",
        "achievement", "trophy", "badge", "medal", "ribbon", "star",
        "milestone", "goal", "objective", "challenge", "task"
    ],
    
    "game_modes": [
        "single player", "solo", "story mode", "campaign", "adventure mode",
        "multiplayer", "co-op", "cooperative", "team", "squad", "party",
        "versus", "vs", "pvp", "player versus player", "duel", "battle",
        "pve", "player versus environment", "ai", "bots", "computer",
        "online", "offline", "local", "split screen", "lan",
        "ranked", "competitive", "ladder", "tournament", "esports",
        "casual", "unranked", "practice", "training", "tutorial",
        "survival", "endless", "wave", "horde", "zombie mode",
        "creative", "sandbox", "free play", "custom", "modded"
    ],
    
    # ==================== GAME STATES ====================
    "match_states": [
        "loading", "loading screen", "now loading", "please wait",
        "connecting", "waiting for players", "searching", "matchmaking",
        "lobby", "waiting room", "ready", "not ready", "start",
        "countdown", "3 2 1 go", "begin", "fight", "round start",
        "in progress", "ongoing", "active", "playing", "live",
        "paused", "pause", "resume", "unpause", "suspended",
        "round", "wave", "stage", "phase", "turn", "cycle",
        "overtime", "extra time", "sudden death", "tiebreaker",
        "victory", "win", "winner", "champion", "first place",
        "defeat", "loss", "loser", "eliminated", "knocked out",
        "draw", "tie", "stalemate", "even", "no winner",
        "game over", "end", "finished", "complete", "done"
    ],
    
    "performance": [
        "kill", "elimination", "frag", "takedown", "knockout",
        "death", "died", "killed", "eliminated", "fallen",
        "assist", "support", "helped", "aided", "backup",
        "kd ratio", "kill death ratio", "kda", "score", "stats",
        "headshot", "critical", "perfect", "flawless", "untouched",
        "multi kill", "double kill", "triple kill", "quad kill",
        "killing spree", "rampage", "unstoppable", "legendary",
        "first blood", "ace", "clutch", "mvp", "most valuable player",
        "damage dealt", "damage taken", "healing done", "accuracy"
    ],
    
    # ==================== POPULAR GAMES (Age-Appropriate) ====================
    "mobile_games": [
        "mobile game", "mobile gaming", "smartphone game", "tablet game",
        "among us", "impostor", "crewmate", "emergency meeting", "sus",
        "subway surfers", "endless runner", "jake", "surf", "coin dash",
        "temple run", "temple", "idol", "obstacle", "power up run",
        "candy crush", "match 3", "candy", "sweet", "crush saga",
        "angry birds", "bird", "slingshot", "pig", "destruction",
        "fruit ninja", "ninja", "fruit", "slice", "combo slice",
        "clash of clans", "coc", "clan", "village", "troops", "raid",
        "clash royale", "cards", "arena", "tower", "elixir", "battle deck",
        "pokemon go", "pokemon", "catch", "pokeball", "trainer", "gym"
    ],
    
    "sandbox_games": [
        "minecraft", "block", "cube", "craft", "mine", "build",
        "survival", "creative", "hardcore", "adventure", "spectator",
        "overworld", "nether", "end", "dimension", "portal",
        "diamond", "iron", "gold", "emerald", "netherite", "ore",
        "pickaxe", "sword", "axe", "shovel", "hoe", "tool",
        "creeper", "zombie", "skeleton", "enderman", "spider", "mob",
        "village", "villager", "trade", "emerald", "profession",
        "redstone", "circuit", "piston", "repeater", "comparator",
        "enchant", "enchantment", "anvil", "brewing", "potion",
        "roblox", "robux", "avatar", "game creation", "obby", "tycoon"
    ],
    
    "battle_royale": [
        "battle royale", "br", "last man standing", "survive",
        "fortnite", "building", "edit", "ramp", "wall", "floor",
        "storm", "zone", "circle", "safe zone", "closing",
        "loot", "chest", "supply drop", "legendary", "epic", "rare",
        "victory royale", "winner winner", "chicken dinner", "top 1",
        "drop", "landing", "hot drop", "cold drop", "spawn island",
        "free fire", "gloo wall", "character", "pet", "booyah",
        "pubg", "pubg mobile", "erangel", "miramar", "sanhok",
        "airdrop", "crate", "level 3", "helmet", "vest", "backpack",
        "zone damage", "blue zone", "red zone", "play zone"
    ],
    
    "action_adventure": [
        "mario", "super mario", "mushroom", "coin", "star", "power up",
        "jump", "stomp", "pipe", "goomba", "koopa", "bowser",
        "zelda", "link", "triforce", "master sword", "hyrule",
        "sonic", "hedgehog", "rings", "speed", "loop", "emerald",
        "pokemon", "pikachu", "trainer", "battle", "evolution", "type",
        "fire", "water", "grass", "electric", "catch em all",
        "spiderman", "web swing", "combat", "stealth", "city"
    ],
    
    "sports_racing": [
        "fifa", "football", "soccer", "goal", "shoot", "pass", "dribble",
        "real cricket", "cricket", "bat", "bowl", "wicket", "run", "over",
        "basketball", "nba", "dunk", "three pointer", "layup", "court",
        "racing", "car", "speed", "drift", "nitro", "boost", "lap",
        "track", "circuit", "checkpoint", "finish line", "pole position",
        "asphalt", "real racing", "gear", "acceleration", "brake"
    ],
    
    "puzzle_strategy": [
        "puzzle", "brain teaser", "logic", "solve", "solution",
        "match", "connect", "line", "merge", "combine", "swap",
        "tower defense", "td", "tower", "enemy wave", "path", "upgrade",
        "chess", "checkmate", "king", "queen", "rook", "knight",
        "sudoku", "number", "grid", "row", "column", "box",
        "2048", "tile", "merge", "slide", "combo", "high tile"
    ],
    
    "casual_arcade": [
        "arcade", "high score", "coin operated", "cabinet", "retro",
        "flappy bird", "flap", "pipe", "tap", "fly", "obstacle",
        "doodle jump", "jump", "platform", "tilt", "spring", "monster",
        "cut the rope", "rope", "candy", "om nom", "star", "physics",
        "hill climb racing", "hill", "vehicle", "fuel", "distance"
    ],
    
    # ==================== GAME GENRES ====================
    "genre_types": [
        "action", "adventure", "rpg", "role playing game", "jrpg",
        "fps", "first person shooter", "tps", "third person shooter",
        "moba", "multiplayer online battle arena", "arena",
        "mmorpg", "massively multiplayer", "online rpg",
        "strategy", "rts", "real time strategy", "turn based",
        "simulation", "sim", "life sim", "farming sim", "city builder",
        "platformer", "2d platformer", "side scroller", "jump and run",
        "puzzle", "puzzle game", "brain game", "logic game",
        "racing", "racing game", "driving", "kart racing", "arcade racing",
        "sports", "sports game", "football", "basketball", "cricket",
        "fighting", "fighting game", "beat em up", "brawler",
        "stealth", "stealth game", "sneaking", "infiltration",
        "horror", "survival horror", "scary", "psychological horror",
        "rhythm", "music game", "rhythm game", "dance game",
        "educational", "learning game", "edutainment", "brain training"
    ],
    
    # ==================== MULTIPLAYER TERMS ====================
    "online_multiplayer": [
        "server", "host", "join", "create room", "private", "public",
        "friends", "friend list", "invite", "party", "squad", "team",
        "clan", "guild", "alliance", "faction", "crew", "group",
        "voice chat", "voice", "mic", "microphone", "mute", "unmute",
        "text chat", "chat", "message", "whisper", "shout", "emote",
        "ping", "latency", "lag", "connection", "disconnect", "timeout",
        "matchmaking", "queue", "searching", "finding match", "waiting",
        "kick", "ban", "report", "block", "ignore", "mute player",
        "spectate", "watch", "observer", "replay", "stream", "broadcast"
    ],
    
    "competitive_terms": [
        "ranked", "rating", "mmr", "matchmaking rating", "elo", "rank",
        "bronze", "silver", "gold", "platinum", "diamond", "master",
        "grandmaster", "legend", "mythic", "challenger", "immortal",
        "promotion", "demotion", "division", "tier", "bracket",
        "leaderboard", "top player", "world ranking", "global rank",
        "season", "season end", "season reward", "reset", "decay",
        "tournament", "competition", "championship", "finals", "bracket",
        "esports", "professional", "pro player", "team", "organization",
        "meta", "metagame", "strategy", "tier list", "op", "overpowered",
        "nerf", "buff", "patch", "update", "balance", "hotfix"
    ],
    
    # ==================== GAME CONTROLS ====================
    "control_inputs": [
        "keyboard", "mouse", "controller", "gamepad", "joystick",
        "wasd", "arrow keys", "space bar", "shift", "ctrl", "alt",
        "left click", "right click", "scroll", "wheel", "middle mouse",
        "button", "trigger", "bumper", "dpad", "d-pad", "analog stick",
        "touchscreen", "swipe", "tap", "hold", "pinch", "gesture",
        "motion control", "gyroscope", "accelerometer", "tilt", "shake",
        "hotkey", "keybind", "shortcut", "macro", "combo input"
    ],
    
    # ==================== GAME ECONOMY ====================
    "currency_rewards": [
        "coins", "gold", "silver", "money", "cash", "credits",
        "gems", "diamonds", "rubies", "crystals", "jewels", "stones",
        "points", "tokens", "tickets", "vouchers", "coupons",
        "premium currency", "free currency", "soft currency", "hard currency",
        "earn", "gain", "collect", "gather", "farm", "grind",
        "reward", "prize", "gift", "bonus", "daily reward", "login bonus",
        "chest", "loot box", "crate", "pack", "bundle", "deal",
        "free to play", "f2p", "pay to win", "p2w", "microtransaction",
        "battle pass", "season pass", "premium pass", "vip", "subscription"
    ],
    
    # ==================== VISUAL EFFECTS ====================
    "graphics_effects": [
        "graphics", "visuals", "animation", "effects", "particle effect",
        "explosion", "blast", "boom", "fire", "flame", "smoke",
        "lightning", "electricity", "spark", "glow", "shine", "flash",
        "blood", "gore", "impact", "hit effect", "damage number",
        "trail", "motion blur", "speed lines", "after image",
        "shadow", "lighting", "ray tracing", "reflection", "refraction",
        "texture", "shader", "material", "polygon", "model", "mesh",
        "frame rate", "fps", "frames per second", "smooth", "laggy",
        "resolution", "pixel", "hd", "4k", "ultra hd", "quality"
    ],
    
    # ==================== SOUND EFFECTS ====================
    "audio_elements": [
        "sound effect", "sfx", "sound", "audio", "noise",
        "music", "background music", "bgm", "theme", "soundtrack",
        "footstep", "walking sound", "running sound", "jump sound",
        "gunshot", "explosion sound", "hit sound", "damage sound",
        "coin sound", "collect sound", "pickup sound", "item sound",
        "victory music", "defeat music", "game over sound",
        "ambient", "atmosphere", "environmental sound", "wind", "rain",
        "voice", "voice acting", "dialogue", "speech", "narrator",
        "mute", "unmute", "volume up", "volume down", "audio settings"
    ],
    
    # ==================== COMMON GAME VERBS ====================
    "gaming_actions": [
        "play", "gaming", "game", "start", "begin", "launch",
        "join", "enter", "connect", "log in", "sign in", "login",
        "select", "choose", "pick", "decide", "confirm", "accept",
        "navigate", "browse", "scroll", "swipe", "flip", "switch",
        "upgrade", "enhance", "improve", "boost", "power up",
        "unlock", "reveal", "discover", "find", "obtain", "acquire",
        "complete", "finish", "achieve", "accomplish", "clear",
        "win", "victory", "triumph", "succeed", "dominate",
        "lose", "fail", "defeat", "game over", "try again",
        "restart", "retry", "replay", "rematch", "new game",
        "save", "load", "continue", "checkpoint", "autosave",
        "quit", "exit", "leave", "disconnect", "logout", "close"
    ],
    
    # ==================== GAMING SLANG ====================
    "gaming_terms": [
        "noob", "newbie", "beginner", "pro", "professional", "veteran",
        "skill", "skilled", "talent", "gifted", "good", "bad",
        "gg", "good game", "wp", "well played", "nt", "nice try",
        "glhf", "good luck have fun", "ggs", "ggwp", "ez", "easy",
        "rekt", "destroyed", "owned", "dominated", "crushed",
        "camping", "camper", "spawn camping", "spawn kill",
        "rushing", "rusher", "aggressive", "passive", "defensive",
        "farming", "grinding", "leveling", "xp farming", "loot farming",
        "speedrun", "speedrunning", "glitch", "bug", "exploit",
        "mod", "modification", "cheat", "hack", "aimbot", "wallhack",
        "rage quit", "afk", "away from keyboard", "brb", "be right back"
    ],
    
    # ==================== GAME PHASES ====================
    "game_progression": [
        "tutorial", "training", "practice", "learning", "guide",
        "early game", "beginning", "start", "opening", "first level",
        "mid game", "middle", "progression", "development", "advance",
        "late game", "end game", "final", "conclusion", "climax",
        "boss fight", "boss battle", "final boss", "raid boss",
        "mini boss", "elite", "champion", "stronger enemy",
        "new game", "first playthrough", "virgin run",
        "new game plus", "ng+", "second playthrough", "replay",
        "post game", "after credits", "bonus content", "extra"
    ]
}

# ==================== GAMING SAMPLE SENTENCES ====================
GAMING_SAMPLES = [
    # Match states
    "level 15 complete victory points 2500 high score achievement",
    "loading match please wait connecting players lobby",
    "game over defeat try again restart continue menu",
    "victory win winner first place champion leaderboard",
    "round 5 wave 10 survive enemies incoming boss fight",
    
    # Player stats
    "health bar hp 75 mana energy stamina regenerate heal",
    "level up experience points xp gain skill unlock",
    "attack damage defense armor strength speed stats",
    "player 1 character avatar hero warrior class",
    "inventory weapon armor shield potion consumable equip",
    
    # Actions
    "move forward jump run sprint dash action button",
    "attack shoot fire aim target enemy hit damage",
    "collect coins gems rewards loot chest treasure",
    "mission objective complete quest task achievement",
    "pause menu settings controls volume graphics quit",
    
    # Popular games
    "minecraft survival mode creative build blocks craft",
    "among us impostor crewmate emergency meeting sus vote",
    "fortnite battle royale victory building storm zone",
    "roblox game avatar robux play create join server",
    "pokemon catch trainer battle evolution type weakness",
    
    # UI elements
    "score points lives timer countdown remaining time",
    "main menu start new game continue load save",
    "leaderboard rank 1 top player high score stats",
    "achievement unlocked trophy badge reward earned",
    "press start button click tap hold swipe control",
    
    # Multiplayer
    "multiplayer online co-op team squad party versus",
    "join server create room waiting lobby ready start",
    "voice chat mute message friend invite clan guild",
    "ranked competitive rating rank promotion tier",
    "match found connecting players 5v5 team battle",
    
    # Mobile games
    "subway surfers endless runner jake coins power up",
    "candy crush match 3 level star combo crush",
    "clash of clans village troops raid attack defend",
    "free fire booyah gloo wall character squad winner",
    "temple run obstacle jump slide turn collect",
    
    # Game mechanics
    "upgrade enhance level up unlock skill tree talent",
    "difficulty easy normal hard expert mode select",
    "checkpoint save point autosave progress saved",
    "boss battle health bar weak point critical hit",
    "combo chain special move ultimate power finish",
    
    # Performance
    "kill death assist kd ratio mvp best player",
    "damage dealt damage taken healing accuracy stats",
    "headshot critical perfect flawless victory",
    "double kill triple kill multi kill streak",
    "first blood ace clutch legendary performance"
]

print("‚úÖ COMPREHENSIVE GAMING VOCABULARY LOADED")
print(f"üéÆ Total categories: {len(GAMING_VOCAB)}")
print(f"üìù Total unique terms: {sum(len(v) for v in GAMING_VOCAB.values())}")
print(f"üí° Sample sentences: {len(GAMING_SAMPLES)}")


‚úÖ COMPREHENSIVE GAMING VOCABULARY LOADED
üéÆ Total categories: 29
üìù Total unique terms: 1405
üí° Sample sentences: 45


In [27]:
class SyntheticDatasetGenerator:
    def __init__(self):
        self.classes = {
            "Studying": STUDYING_VOCAB,
            "Coding": CODING_VOCAB,
            "Gaming": GAMING_VOCAB
        }
        
        self.base_samples = {
            "Studying": STUDYING_SAMPLES,
            "Coding": CODING_SAMPLES,
            "Gaming": GAMING_SAMPLES
        }
    
    def generate_realistic_sample(self, class_name):
        """Generate realistic text samples"""
        vocab = self.classes[class_name]
        
        if class_name == "Studying":
            # Flatten all study topics
            all_topics = []
            for key in vocab.keys():
                if isinstance(vocab[key], list):
                    all_topics.extend(vocab[key])
            
            templates = [
                "chapter {num} {subject} {topic} notes class {grade}",
                "{subject} {topic} exercise {num} question {num} answer",
                "{topic} definition formula example {subject} grade {grade}",
                "study material {subject} {topic} pdf download ncert",
                "{topic} diagram explanation {subject} chapter {num}",
                "{subject} class {grade} {topic} practice worksheet assignment",
                "homework {subject} {topic} solve problems exercise {num}",
                "{topic} textbook page {num} {subject} ncert solutions",
                "revision notes {subject} {topic} important questions exam",
                "{subject} {topic} theory concepts grade {grade} cbse"
            ]
            
            # Get subjects and topics
            subjects = vocab.get("subjects", [])
            if not subjects:
                subjects = ["mathematics", "science", "english", "history"]
            
            template = random.choice(templates)
            sample = template.format(
                num=random.randint(1, 15),
                subject=random.choice(subjects) if subjects else "mathematics",
                topic=random.choice(all_topics[:100]) if all_topics else "equation",
                grade=random.randint(6, 10)
            )
            
            # Add context words
            study_verbs = vocab.get("study_verbs", ["study", "learn", "practice"])
            extra = random.sample(study_verbs, k=min(3, len(study_verbs)))
            return f"{sample} {' '.join(extra)}"
        
        elif class_name == "Coding":
            # Flatten all coding terms
            all_terms = []
            for key in vocab.keys():
                if isinstance(vocab[key], list):
                    all_terms.extend(vocab[key])
            
            templates = [
                "def {func}({param}): {action} return {var} python code",
                "for {var} in range({num}): print {action} loop",
                "import {lib} {func} function {action} example code",
                "{ide} editor {lang} file run {action} terminal output",
                "{project} program {lang} {action} function {var}",
                "python {func} {action} {struct} code example tutorial",
                "scratch {action} sprite repeat block {project} game",
                "vs code {lang} {action} debug error syntax {func}",
                "{project} coding {lang} {action} algorithm {struct}",
                "jupyter notebook {lang} {action} cell execute output"
            ]
            
            # Get specific vocab lists
            python_keywords = vocab.get("python_keywords", ["print", "input", "for", "if"])
            projects = vocab.get("beginner_projects", vocab.get("intermediate_projects", ["calculator", "game"]))
            
            template = random.choice(templates)
            sample = template.format(
                func=random.choice(["calculate", "display", "process", "get", "set"]),
                param=random.choice(["x", "y", "data", "value", "num"]),
                action=random.choice(all_terms[:50]) if all_terms else "execute",
                var=random.choice(["result", "output", "value", "sum", "data"]),
                num=random.randint(1, 100),
                lib=random.choice(["random", "math", "time", "turtle"]),
                ide=random.choice(["vs code", "python idle", "jupyter"]),
                lang=random.choice(["python", "scratch"]),
                project=random.choice(projects[:20]) if projects else "calculator",
                struct=random.choice(["list", "array", "dictionary", "string"])
            )
            
            # Add keywords
            extra = random.sample(python_keywords, k=min(3, len(python_keywords)))
            return f"{sample} {' '.join(extra)}"
        
        elif class_name == "Gaming":
            # Flatten all gaming terms
            all_terms = []
            for key in vocab.keys():
                if isinstance(vocab[key], list):
                    all_terms.extend(vocab[key])
            
            templates = [
                "level {num} {action} {enemy} {ui} {points} score",
                "{game} {genre} {screen} {action} {ui} menu",
                "mission {num} {action} {objective} {reward} {ui}",
                "{screen} {game} {action} {ui} {points} rank {num}",
                "player {ui} {action} {item} inventory {points} xp",
                "{game} multiplayer {action} {screen} lobby join",
                "victory {ui} {game} {action} leaderboard rank {num}",
                "boss battle {action} {ui} damage {points} defeat",
                "{game} {genre} play {action} {ui} achievement unlock",
                "match {screen} {action} {ui} {game} team squad"
            ]
            
            # Get specific vocab lists
            hud_elements = vocab.get("hud_elements", ["health", "score", "points"])
            menu_elements = vocab.get("menu_elements", ["start", "pause", "quit"])
            mobile_games = vocab.get("mobile_games", ["minecraft", "roblox", "among us"])
            
            template = random.choice(templates)
            sample = template.format(
                num=random.randint(1, 100),
                action=random.choice(all_terms[:50]) if all_terms else "attack",
                enemy=random.choice(["enemy", "boss", "opponent", "monster"]),
                ui=random.choice(hud_elements[:20]) if hud_elements else "health",
                points=random.randint(100, 9999),
                game=random.choice(mobile_games[:15]) if mobile_games else "minecraft",
                genre=random.choice(["action", "adventure", "puzzle", "racing"]),
                screen=random.choice(menu_elements[:10]) if menu_elements else "loading",
                objective=random.choice(["collect", "defeat", "reach", "find"]),
                reward=random.choice(["coins", "gems", "rewards", "prizes"]),
                item=random.choice(["weapon", "armor", "potion", "shield"])
            )
            
            # Add mechanics
            combat_actions = vocab.get("combat_actions", ["attack", "defend", "jump"])
            extra = random.sample(combat_actions, k=min(3, len(combat_actions)))
            return f"{sample} {' '.join(extra)}"
        
        return ""
    
    def generate_dataset(self, samples_per_class=1000):
        """Generate complete dataset"""
        dataset = []
        
        print("üéØ Generating Synthetic Dataset")
        print("=" * 60)
        
        for class_name in self.classes.keys():
            print(f"\nüìù Generating {samples_per_class} samples for {class_name}...")
            
            for i in range(samples_per_class):
                try:
                    text = self.generate_realistic_sample(class_name)
                    
                    dataset.append({
                        "text": text.lower(),
                        "label": class_name
                    })
                    
                    if (i + 1) % 200 == 0:
                        print(f"  Progress: {i + 1}/{samples_per_class} samples")
                except Exception as e:
                    print(f"  Warning: Error at sample {i}: {e}")
                    # Use a base sample as fallback
                    fallback = random.choice(self.base_samples[class_name])
                    dataset.append({
                        "text": fallback.lower(),
                        "label": class_name
                    })
        
        random.shuffle(dataset)
        print(f"\n‚úÖ Dataset generation complete!")
        print(f"üìä Total samples: {len(dataset)}")
        
        return dataset

print("‚úÖ FIXED Dataset generator class created")


‚úÖ FIXED Dataset generator class created


In [28]:
# Generate dataset
generator = SyntheticDatasetGenerator()
dataset = generator.generate_dataset(samples_per_class=1000)

# Show sample
print("\nüìã Sample texts from dataset:")
print("=" * 60)
for i in range(3):
    sample = random.choice(dataset)
    print(f"\nClass: {sample['label']}")
    print(f"Text: {sample['text'][:150]}...")


üéØ Generating Synthetic Dataset

üìù Generating 1000 samples for Studying...
  Progress: 200/1000 samples
  Progress: 400/1000 samples
  Progress: 600/1000 samples
  Progress: 800/1000 samples
  Progress: 1000/1000 samples

üìù Generating 1000 samples for Coding...
  Progress: 200/1000 samples
  Progress: 400/1000 samples
  Progress: 600/1000 samples
  Progress: 800/1000 samples
  Progress: 1000/1000 samples

üìù Generating 1000 samples for Gaming...
  Progress: 200/1000 samples
  Progress: 400/1000 samples
  Progress: 600/1000 samples
  Progress: 800/1000 samples
  Progress: 1000/1000 samples

‚úÖ Dataset generation complete!
üìä Total samples: 3000

üìã Sample texts from dataset:

Class: Coding
Text: python set pause string code example tutorial def as python3...

Class: Gaming
Text: endless runner adventure play minimap mp bar achievement unlock kick counterattack punch...

Class: Studying
Text: chapter 9 sports hindi notes class 10 map demonstrate understand...


In [29]:
# Extract texts and labels
texts = [sample['text'] for sample in dataset]
labels_text = [sample['label'] for sample in dataset]

# Label encoding
label_map = {"Studying": 0, "Coding": 1, "Gaming": 2}
labels = [label_map[label] for label in labels_text]

print(f"üìä Dataset Statistics:")
print(f"Total samples: {len(texts)}")
print(f"Class distribution: {np.bincount(labels)}")
print(f"Classes: {list(label_map.keys())}")


üìä Dataset Statistics:
Total samples: 3000
Class distribution: [1000 1000 1000]
Classes: ['Studying', 'Coding', 'Gaming']


In [30]:
# Vectorize with hashing
MAX_FEATURES = 20000

vectorizer = HashingVectorizer(
    n_features=MAX_FEATURES,
    ngram_range=(1, 3),  # unigrams, bigrams, trigrams
    norm='l2',
    alternate_sign=False
)

print("üîÑ Vectorizing text data...")
X = vectorizer.transform(texts).toarray()
y = tf.keras.utils.to_categorical(labels, 3)

print(f"‚úÖ Vectorization complete!")
print(f"Feature matrix shape: {X.shape}")
print(f"Label matrix shape: {y.shape}")


üîÑ Vectorizing text data...
‚úÖ Vectorization complete!
Feature matrix shape: (3000, 20000)
Label matrix shape: (3000, 3)


In [31]:
# Split data
X_train, X_val, y_train, y_val = train_test_split(
    X, y, 
    test_size=0.2, 
    random_state=42, 
    stratify=labels
)

print(f"Training set: {X_train.shape[0]} samples")
print(f"Validation set: {X_val.shape[0]} samples")


Training set: 2400 samples
Validation set: 600 samples


In [32]:
# Build neural network
model = models.Sequential([
    layers.Input(shape=(MAX_FEATURES,)),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.4),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(3, activation='softmax')
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print(model.summary())


None


In [33]:
# Train
print("\nüöÄ Starting training...")

history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=20,
    batch_size=32,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            patience=5, 
            restore_best_weights=True,
            verbose=1
        )
    ],
    verbose=1
)

print("\n‚úÖ Training complete!")



üöÄ Starting training...
Epoch 1/20
[1m75/75[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m2s[0m 15ms/step - accuracy: 0.9467 - loss: 0.5643 - val_accuracy: 1.0000 - val_loss: 0.0200
Epoch 2/20
[1m75/75[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 1.0000 - loss: 0.0099 - val_accuracy: 1.0000 - val_loss: 0.0018
Epoch 3/20
[1m75/75[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 1.0000 - loss: 0.0024 - val_accuracy: 1.0000 - val_loss: 7.5412e-04
Epoch 4/20
[1m75/75[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 1.0000 - loss: 0.0015 - val_accuracy: 1.0000 - val_loss: 3.8126e-04
Epoch 5/20
[1m75/75[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 1.0000 - loss: 7.5517e-04 - 

In [34]:
# Evaluate
loss, accuracy = model.evaluate(X_val, y_val, verbose=0)

print(f"\nüìä Final Results:")
print(f"Validation Loss: {loss:.4f}")
print(f"Validation Accuracy: {accuracy*100:.2f}%")

# Test predictions
print("\nüîç Sample Predictions:")
print("=" * 60)

test_samples = [
    "chapter 5 photosynthesis biology class 10 ncert notes",
    "python for loop range function code example",
    "minecraft level 10 survival mode inventory items"
]

for text in test_samples:
    vec = vectorizer.transform([text]).toarray()
    pred = model.predict(vec, verbose=0)
    pred_class = np.argmax(pred)
    class_names = ["Studying", "Coding", "Gaming"]
    
    print(f"\nText: {text}")
    print(f"Predicted: {class_names[pred_class]} ({pred[0][pred_class]*100:.1f}%)")



üìä Final Results:
Validation Loss: 0.0000
Validation Accuracy: 100.00%

üîç Sample Predictions:

Text: chapter 5 photosynthesis biology class 10 ncert notes
Predicted: Studying (100.0%)

Text: python for loop range function code example
Predicted: Coding (100.0%)

Text: minecraft level 10 survival mode inventory items
Predicted: Gaming (94.3%)


In [35]:
# Save Keras model
os.makedirs('models', exist_ok=True)
model.save('models/text_activity_model.h5')
print("‚úÖ Keras model saved to: models/text_activity_model.h5")




‚úÖ Keras model saved to: models/text_activity_model.h5


In [36]:
import tensorflowjs as tfjs

# Convert and save
tfjs.converters.save_keras_model(model, 'models/tfjs_text_model')

print("‚úÖ TensorFlow.js model saved to: models/tfjs_text_model/")
print("\nüì¶ Files created:")
print("  - model.json")
print("  - group1-shard1of1.bin")
print("\nüéâ Model ready for deployment to React app!")




failed to lookup keras version from the file,
    this is likely a weight only file
‚úÖ TensorFlow.js model saved to: models/tfjs_text_model/

üì¶ Files created:
  - model.json
  - group1-shard1of1.bin

üéâ Model ready for deployment to React app!


In [37]:
# Comprehensive test
print("üß™ Testing Final Model")
print("=" * 60)

test_cases = {
    "Studying": [
        "mathematics chapter 7 quadratic equations exercise 7.3",
        "biology photosynthesis notes class 10 ncert pdf",
        "history freedom struggle worksheet grade 8"
    ],
    "Coding": [
        "python def function return value code example",
        "scratch sprite move repeat forever animation",
        "vs code terminal run python file execute"
    ],
    "Gaming": [
        "fortnite battle royale victory match complete",
        "minecraft survival mode level inventory items",
        "mobile game score points leaderboard rank"
    ]
}

for expected_class, samples in test_cases.items():
    print(f"\n{expected_class} Examples:")
    for text in samples:
        vec = vectorizer.transform([text]).toarray()
        pred = model.predict(vec, verbose=0)
        pred_class = np.argmax(pred)
        class_names = ["Studying", "Coding", "Gaming"]
        confidence = pred[0][pred_class] * 100
        
        result = "‚úÖ" if class_names[pred_class] == expected_class else "‚ùå"
        print(f"  {result} {class_names[pred_class]} ({confidence:.1f}%): {text[:50]}...")

print("\n" + "=" * 60)
print("üéâ All done! Model is ready to use!")


üß™ Testing Final Model

Studying Examples:
  ‚úÖ Studying (99.3%): mathematics chapter 7 quadratic equations exercise...
  ‚úÖ Studying (100.0%): biology photosynthesis notes class 10 ncert pdf...
  ‚úÖ Studying (99.6%): history freedom struggle worksheet grade 8...

Coding Examples:
  ‚úÖ Coding (100.0%): python def function return value code example...
  ‚úÖ Coding (99.3%): scratch sprite move repeat forever animation...
  ‚úÖ Coding (100.0%): vs code terminal run python file execute...

Gaming Examples:
  ‚úÖ Gaming (98.2%): fortnite battle royale victory match complete...
  ‚úÖ Gaming (98.4%): minecraft survival mode level inventory items...
  ‚úÖ Gaming (100.0%): mobile game score points leaderboard rank...

üéâ All done! Model is ready to use!
