In [1]:
# @title libraries import

import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
import joblib


In [2]:
# @title drive mount
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [3]:
# @title read file
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/train.csv')


In [5]:
# @title Combine questions and codes for feature extraction
#df['text'] = df['question'] + " " + df['solutions']

In [30]:
# @title Vectorization
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['question'])

In [50]:
# @title Encode labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df['difficulty'])


In [51]:
from sklearn.cluster import KMeans

# Define the number of clusters based on the number of unique difficulty levels
num_clusters = len(df['difficulty'].unique())

kmeans = KMeans(n_clusters=num_clusters, random_state=42)
kmeans.fit(X)




In [52]:
# Add cluster labels to the original DataFrame
df['cluster_label'] = kmeans.labels_


In [55]:
def get_random_question_by_level(df, level):
    # Map difficulty labels to cluster labels
    label_to_cluster = {label: df[df['difficulty'] == label]['cluster_label'].iloc[0] for label in df['difficulty'].unique()}

    # Get cluster label for the specified difficulty level
    cluster_label = label_to_cluster[level]

    # Filter DataFrame to questions with the specified cluster label
    filtered_questions = df[df['cluster_label'] == cluster_label]['question']

    # Return a random question from the filtered questions
    return np.random.choice(filtered_questions)

# Example usage to get a random question from 'easy' level
#random_question_easy = get_random_question_by_level(df, 'competition')
#print(f"Random question from 'easy' level:\n {random_question_easy}")


Random question from 'easy' level:
 Devu loves to play with his dear mouse Jerry.  One day they play a game on 2 dimensional grid of dimensions n * n (n ≥ 2). Jerry is currently at coordinates (sx, sy) and wants to move to location (ex, ey) where cheese is placed by Devu. Also Devu is very cunning and has placed a bomb at location (bx, by). All these three locations are distinct. 

In a single move, Jerry can go either up, down, left or right in the grid such that it never goes out of the grid. Also, it has to avoid the bomb. Find out minimum number of moves Jerry needs. It is guaranteed that it is always possible to do so.

-----Input-----
- The first line of the input contains an integer T denoting the number of test cases. The description of T test cases follows."
- The first line of each test case contains seven space separated integers n, sx, sy , ex, ey, bx, by. 

-----Output-----
- For each test case, output a single line containing an integer corresponding to minimum number of 

In [56]:
# @title Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [57]:
# @title Train the model
model = LogisticRegression()
model.fit(X_train, y_train)


In [58]:
# @title Save the model and vectorizer
joblib.dump(model, 'model.pkl')
joblib.dump(vectorizer, 'vectorizer.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')

['label_encoder.pkl']

In [60]:
from flask import Flask, request, jsonify
import joblib

app = Flask(__name__)

# Load the model and vectorizer
model = joblib.load('model.pkl')
vectorizer = joblib.load('vectorizer.pkl')
label_encoder = joblib.load('label_encoder.pkl')



#@app.route('/predict', methods=['POST'])
"""def predict():
  data = request.json
  question = data['question']
  code = data['code']
  #question = df['question']
  #code = df['solutions']

    # Use make_prediction function
  level = make_prediction(question, code)

  return jsonify({'level': level})"""





def make_prediction(question):#, code):
    # Combine question and code for prediction
    input_text = question #+ " " + code

    # Vectorize the input
    X_new = vectorizer.transform([input_text])

    # Make a prediction
    prediction = model.predict(X_new)
    level = label_encoder.inverse_transform(prediction)

    return level[0]

In [64]:
sample_question = "Solve the N-Queens problem"
sample_code = "def n_queens(n): ..."




# Call the prediction function
predicted_level = make_prediction(sample_question) #sample_code)

print(f"Predicted Level: {predicted_level}\n")

random_question_easy = get_random_question_by_level(df, predicted_level)
print(f"Random question from 'easy' level:\n {random_question_easy}")

Predicted Level: introductory

Random question from 'easy' level:
 Consider the infinite sequence $s$ of positive integers, created by repeating the following steps:

  Find the lexicographically smallest triple of positive integers $(a, b, c)$ such that   $a \oplus b \oplus c = 0$, where $\oplus$ denotes the bitwise XOR operation.  $a$, $b$, $c$ are not in $s$.  Here triple of integers $(a_1, b_1, c_1)$ is considered to be lexicographically smaller than triple $(a_2, b_2, c_2)$ if sequence $[a_1, b_1, c_1]$ is lexicographically smaller than sequence $[a_2, b_2, c_2]$.  Append $a$, $b$, $c$ to $s$ in this order.  Go back to the first step. 

You have integer $n$. Find the $n$-th element of $s$.

You have to answer $t$ independent test cases.

A sequence $a$ is lexicographically smaller than a sequence $b$ if in the first position where $a$ and $b$ differ, the sequence $a$ has a smaller element than the corresponding element in $b$.


-----Input-----

The first line contains a single in