In [1]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import string

# Download NLTK resources (uncomment and run this line if needed)
#nltk.download('punkt')
#nltk.download('stopwords')
#nltk.download('wordnet')

# Initialize NLTK components
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def preprocess_input(input_text):
    # Tokenize the input text
    tokens = word_tokenize(input_text.lower())
    
    # Remove punctuation and stop words
    tokens = [token for token in tokens if token not in string.punctuation and token not in stop_words]
    
    # Lemmatize tokens
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    
    return tokens


In [2]:
def recognize_intent(tokens):
    # Define intents and corresponding keywords
    intents = {
        'solve_equation': {'solve', 'equation'},
        'explain_concept': {'explain', 'concept'},
        'ask_help': {'help', 'understand'}
    }
    
    # Check for keywords in the preprocessed tokens
    for intent, keywords in intents.items():
        if any(keyword in tokens for keyword in keywords):
            return intent
    
    # If no intent is recognized, return a default intent
    return 'unknown_intent'


In [3]:
# Sample queries to test intent recognition
query1 = "Can you help me understand linear equations?"
query2 = "How do I solve a quadratic equation?"
query3 = "Explain factoring polynomials to me."

# Preprocess input queries
tokens1 = preprocess_input(query1)
tokens2 = preprocess_input(query2)
tokens3 = preprocess_input(query3)

# Recognize intents for each query
intent1 = recognize_intent(tokens1)
intent2 = recognize_intent(tokens2)
intent3 = recognize_intent(tokens3)

# Print recognized intents
print("Intent for query 1:", intent1)
print("Intent for query 2:", intent2)
print("Intent for query 3:", intent3)


Intent for query 1: solve_equation
Intent for query 2: solve_equation
Intent for query 3: explain_concept


In [4]:
def generate_response(intent):
    # Define sample responses for each intent
    responses = {
        'solve_equation': "To solve for x, isolate it on one side of the equation and perform the necessary operations.",
        'explain_concept': "Sure! What concept would you like me to explain?",
        'ask_help': "Of course! How can I assist you?"
    }
    
    # Retrieve the response for the recognized intent
    response = responses.get(intent, "I'm sorry, I'm not sure how to respond to that.")
    
    return response


In [5]:
# Generate responses for each recognized intent
response1 = generate_response(intent1)
response2 = generate_response(intent2)
response3 = generate_response(intent3)

# Print generated responses
print("Response for query 1:", response1)
print("Response for query 2:", response2)
print("Response for query 3:", response3)


Response for query 1: To solve for x, isolate it on one side of the equation and perform the necessary operations.
Response for query 2: To solve for x, isolate it on one side of the equation and perform the necessary operations.
Response for query 3: Sure! What concept would you like me to explain?


In [None]:
# Define the main interactive loop
def chat():
    print("Welcome to the Algebraic Conversational AI!")
    print("You can ask questions about algebra, and I'll do my best to help you.")

    while True:
        # Get user input
        user_input = input("\nYou: ").strip()

        # Preprocess user input
        tokens = preprocess_input(user_input)

        # Recognize intent
        intent = recognize_intent(tokens)

        # Generate response
        response = generate_response(intent)

        # Print AI response
        print("AI:", response)

        # Check if the user wants to end the conversation
        if user_input.lower() == 'exit':
            print("Goodbye!")
            break

# Start the conversation
chat()


In [None]:
import random

# Define templates for different types of algebraic questions
equation_template = "Solve the equation {} for {}."
expression_template = "Simplify the expression {}."
concept_template = "Explain {} to me."

# Generate synthetic questions and answers
def generate_synthetic_data(num_questions_per_intent):
    synthetic_data = []
    
    # Generate questions for solving equations
    for _ in range(num_questions_per_intent):
        equation = "2x + 3 = 9"  # Example equation
        variable = "x"
        question = equation_template.format(equation, variable)
        answer = "To solve for x, subtract 3 from both sides and then divide both sides by 2."
        synthetic_data.append((question, answer))
    
    # Generate questions for simplifying expressions
    for _ in range(num_questions_per_intent):
        expression = "2x^2 + 3x - 5"  # Example expression
        question = expression_template.format(expression)
        answer = "The expression can be simplified by combining like terms."
        synthetic_data.append((question, answer))
    
    # Generate questions for explaining concepts
    for _ in range(num_questions_per_intent):
        concept = "factoring polynomials"  # Example concept
        question = concept_template.format(concept)
        answer = "Factoring polynomials involves expressing a polynomial as a product of its factors."
        synthetic_data.append((question, answer))
    
    return synthetic_data

# Generate synthetic data with 5 questions per intent
synthetic_data = generate_synthetic_data(num_questions_per_intent=5)

# Print the generated synthetic data
for i, (question, answer) in enumerate(synthetic_data, start=1):
    print(f"Question {i}: {question}")
    print(f"Answer {i}: {answer}\n")


In [None]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import string

# Download NLTK resources (uncomment and run this line if needed)
# nltk.download('punkt')
# nltk.download('stopwords')
# nltk.download('wordnet')

# Initialize NLTK components
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    # Tokenize the text
    tokens = word_tokenize(text.lower())
    
    # Remove punctuation and stop words
    tokens = [token for token in tokens if token not in string.punctuation and token not in stop_words]
    
    # Lemmatize tokens
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    
    # Join tokens back into a string
    processed_text = ' '.join(tokens)
    
    return processed_text

# Preprocess synthetic data
preprocessed_synthetic_data = [(preprocess_text(question), preprocess_text(answer)) for question, answer in synthetic_data]

# Print preprocessed synthetic data
for i, (question, answer) in enumerate(preprocessed_synthetic_data, start=1):
    print(f"Question {i}: {question}")
    print(f"Answer {i}: {answer}\n")



In [None]:
from sklearn.model_selection import train_test_split

# Split the preprocessed data into training and testing sets (80% train, 20% test)
train_data, test_data = train_test_split(preprocessed_synthetic_data, test_size=0.2, random_state=42)

# Print the number of samples in each set
print("Number of training samples:", len(train_data))
print("Number of testing samples:", len(test_data))


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Initialize TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer()

# Fit vectorizer on training data and transform training and testing data
X_train = tfidf_vectorizer.fit_transform([question for question, _ in train_data])
X_test = tfidf_vectorizer.transform([question for question, _ in test_data])

# Print the shape of the vectorized data
print("Shape of training data:", X_train.shape)
print("Shape of testing data:", X_test.shape)


In [None]:
print("hello")

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

# Split the preprocessed data into training and testing sets (80% train, 20% test)
train_data, test_data = train_test_split(preprocessed_synthetic_data, test_size=0.2, random_state=42)

# Vectorize the text data using TF-IDF
tfidf_vectorizer = TfidfVectorizer()
X_train = tfidf_vectorizer.fit_transform([question for question, _ in train_data])
X_test = tfidf_vectorizer.transform([question for question, _ in test_data])

# Convert sparse matrices to dense NumPy arrays
X_train = X_train.toarray()
X_test = X_test.toarray()

# Extract the true answers from the training data
true_answers_train = [answer for _, answer in train_data]

# Create a dictionary mapping each unique answer to its index
answer_to_index = {answer: idx for idx, answer in enumerate(set(true_answers_train))}

# Convert true answers to their corresponding indices for both training and testing data
train_indices = [answer_to_index[answer] for answer in true_answers_train]

# Split the training data into training and validation sets (80% train, 20% validation)
X_train, X_val, train_indices, val_indices = train_test_split(X_train, train_indices, test_size=0.2, random_state=42)

# Define the model architecture
model = Sequential([
    Dense(128, activation='relu', input_dim=X_train.shape[1]),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(len(answer_to_index), activation='softmax')
])

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model using the training set and validate on the validation set
history = model.fit(X_train, np.array(train_indices), epochs=10, batch_size=32, validation_data=(X_val, np.array(val_indices)))

# Evaluate the model on testing data
true_answers_test = [answer for _, answer in test_data]
test_indices = [answer_to_index.get(answer, -1) for _, answer in test_data]  # Use get() to handle missing answers
loss, accuracy = model.evaluate(X_test, np.array(test_indices), verbose=0)

# Print the model's accuracy
print("Model Accuracy:", accuracy)


In [None]:
# Get the model's predictions on the testing data
predictions = model.predict(X_test)
predicted_indices = np.argmax(predictions, axis=1)

# Map predicted indices back to answer labels
predicted_answers = [list(answer_to_index.keys())[idx] for idx in predicted_indices]

# Get the true labels from the testing data
true_answers_test = [answer for _, answer in test_data]

# Compare predicted and true labels
for i in range(len(predicted_answers)):
    print(f"Text: {test_data[i][0]}")
    print(f"True Label: {true_answers_test[i]}")
    print(f"Predicted Label: {predicted_answers[i]}")
    print("---------------------------------------------")


In [None]:
from sklearn.metrics import classification_report

# Convert true labels and predicted labels to numerical indices
true_indices = np.array([answer_to_index.get(answer, -1) for answer in true_answers_test])

# Generate a classification report
print(classification_report(true_indices, predicted_indices))


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

# Generate confusion matrix
conf_matrix = confusion_matrix(true_indices, predicted_indices)

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=sorted(set(true_answers_test)), yticklabels=sorted(set(true_answers_test)))
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()


In [None]:
from sklearn.metrics import accuracy_score

def custom_scorer(model, X, y):
    y_pred = np.argmax(model.predict(X), axis=1)
    return accuracy_score(y, y_pred)

# Initialize GridSearchCV and RandomizedSearchCV with custom scoring
grid_search = GridSearchCV(model, param_grid, cv=3, scoring=custom_scorer)
random_search = RandomizedSearchCV(model, param_distributions=param_grid, n_iter=10, cv=3, scoring=custom_scorer)


In [None]:
from sklearn.base import BaseEstimator, ClassifierMixin
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Input

class KerasModel(BaseEstimator, ClassifierMixin):
    def __init__(self, learning_rate=0.001, dropout_rate=0.2, num_layers=2, num_units=64, epochs=10, batch_size=32):
        self.learning_rate = learning_rate
        self.dropout_rate = dropout_rate
        self.num_layers = num_layers
        self.num_units = num_units
        self.epochs = epochs
        self.batch_size = batch_size
        self.model = self._build_model()
        
    def _build_model(self):
        input_layer = Input(shape=(X_train.shape[1],))
        x = input_layer
        x = Dense(self.num_units, activation='relu')(x)
        x = Dropout(self.dropout_rate)(x)
        for _ in range(self.num_layers - 1):
            x = Dense(self.num_units, activation='relu')(x)
            x = Dropout(self.dropout_rate)(x)
        output_layer = Dense(num_classes, activation='softmax')(x)
        
        model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
        
        optimizer = Adam(learning_rate=self.learning_rate)
        model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
        
        return model
        
    def fit(self, X, y):
        self.model.fit(X, y, epochs=self.epochs, batch_size=self.batch_size, validation_split=0.2)
        
    def predict(self, X):
        return np.argmax(self.model.predict(X), axis=1)
    
    def score(self, X, y):
        _, accuracy = self.model.evaluate(X, y)
        return accuracy


In [None]:
from sklearn.model_selection import StratifiedKFold

# Define a custom cross-validation strategy
kfolds = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

# Initialize GridSearchCV and RandomizedSearchCV with custom scoring and cross-validation
grid_search = GridSearchCV(model, param_grid, cv=kfolds.split(X_train, train_indices), scoring=custom_scorer)
random_search = RandomizedSearchCV(model, param_distributions=param_grid, n_iter=10, cv=kfolds.split(X_train, train_indices), scoring=custom_scorer)

# Fit the models
grid_search.fit(X_train, train_indices)
random_search.fit(X_train, train_indices)

# Print the best hyperparameters
print("Best hyperparameters for grid search:", grid_search.best_params_)
print("Best hyperparameters for random search:", random_search.best_params_)
