In [None]:
import ast
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB

# Sample Python code snippets (as training data)
code_samples = [
    # Expressions
    "x = 5 + 3",
    "y = 10 - 2",
    "z = 3 * 4",
    "a = 10 / 2",
    "b = 3 ** 2",
    "x = 3 * (2 + 1)",
    
    # Functions
    "def foo():\n    return 42",
    "def bar(a, b):\n    return a + b",
    "def multiply(x, y):\n    return x * y",
    "def greet(name):\n    print(f'Hello {name}')",
    "def factorial(n):\n    return 1 if n == 0 else n * factorial(n - 1)",
    
    # Conditionals
    "if x > 10:\n    print('Hello')",
    "if x < 5:\n    print('Small')",
    "if x == 10:\n    print('Ten')",
    "if x != 0:\n    print('Non-zero')",
    "if y == 0:\n    print('Zero')",
    "if a > b:\n    print('A is greater')",
    
    # Loops
    "for i in range(5):\n    print(i)",
    "while x < 5:\n    x += 1",
    "for i in range(10):\n    print(i * i)",
    "while x > 0:\n    x -= 1",
    "for i in range(2, 5):\n    print(i)",
    "while True:\n    break",
    
    # More complex Loops and Conditionals
    "for i in range(1, 11):\n    if i % 2 == 0:\n        print(f'{i} is even')",
    "while x != 10:\n    x += 1",
    "for i in range(3):\n    if i % 2 != 0:\n        print(f'{i} is odd')",
    "if x > 0 and y < 5:\n    print('x is positive and y is less than 5')",
]

labels = [
    # Expressions
    0, 0, 0, 0, 0, 0,
    
    # Functions
    1, 1, 1, 1, 1,
    
    # Conditionals
    2, 2, 2, 2, 2, 2,
    
    # Loops
    3, 3, 3, 3, 3, 3,
    
    # More complex Loops and Conditionals
    3, 3, 2, 2
]


# Convert code to AST representation (string-based)
def extract_ast_node_types(code):
    try:
        tree = ast.parse(code)
        return " ".join(node.__class__.__name__ for node in ast.walk(tree))
    except SyntaxError:
        return ""

# Transform code into AST strings
ast_features = [extract_ast_node_types(code) for code in code_samples]

# Convert AST strings to numerical features using CountVectorizer
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(ast_features)

# Split dataset into training & testing sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)

# Train a simple Naive Bayes classifier
model = MultinomialNB()
model.fit(X_train, y_train)

# Test the model with a new piece of code
new_code = "while x < 5:\n    x += 1"
new_ast = extract_ast_node_types(new_code)  # Correct function name
new_X = vectorizer.transform([new_ast])

prediction = model.predict(new_X)
print(f"Predicted category: {prediction[0]}")  # Should predict 'Loop' (3)

new_code2 = "if x == 10:\n    print('Ten')"
new_ast2 = extract_ast_node_types(new_code2)  
new_X2 = vectorizer.transform([new_ast2])

prediction = model.predict(new_X2)
print(f"Predicted category 2: {prediction[0]}")  