**Closest Users (Buddy)**


In [2]:
Key="nvapi"

In [12]:
import pandas as pd
import numpy as np
from scipy.spatial.distance import cdist

def find_closest_users(user_number, user_data, num_closest=4):
    expertise_matrix = user_data.iloc[:, 1:].to_numpy()
    if user_number < 0 or user_number >= len(expertise_matrix):
        raise ValueError("Invalid user number. Please select a valid number.")

    selected_user_vector = expertise_matrix[user_number].reshape(1, -1)

    distances = cdist(selected_user_vector, expertise_matrix, metric="euclidean")[0]

    closest_indices = np.argsort(distances)[1:num_closest + 1]

    max_distance = distances[closest_indices].max()
    confidence_scores = 1 - (distances[closest_indices] / max_distance)


    closest_users = user_data.iloc[closest_indices].copy()
    closest_users["Confidence Score"] = confidence_scores

    return closest_users


file_path = "users_expertise_dataset.csv"
user_data = pd.read_csv(file_path)


selected_user_number = int(input("Enter User"))
closest_users = find_closest_users(selected_user_number, user_data)

print(f"Selected User: {user_data.iloc[selected_user_number]['User Name']}")
print("\nClosest Users:")
print(closest_users)

Enter User4
Selected User: Avery Wilson

Closest Users:
          User Name  Data Science  Web Development  Graphic Design  \
189   Morgan Taylor      0.425849         0.064026        0.225741   
147  Quinn Anderson      0.196303         0.470391        0.538847   
66   Taylor Johnson      0.518439         0.594103        0.601768   
195     Casey Moore      0.896096         0.803599        0.205292   

     Digital Marketing  Business Analytics  Cybersecurity  \
189           0.883829            0.625225       0.113459   
147           0.597283            0.438455       0.461872   
66            0.909881            0.256984       0.356108   
195           0.605725            0.475097       0.639981   

     Mobile App Development   AI & ML  Project Management  ...  \
189                0.183672  0.428377            0.988839  ...   
147                0.325702  0.430340            0.489678  ...   
66                 0.133494  0.608339            0.559328  ...   
195                0.67

**Mentor Assigning**

In [13]:
import pandas as pd
import numpy as np
from scipy.spatial.distance import cdist

def suggest_mentors(user_number, selected_subjects, user_data, num_suggestions=5, min_diff=0.04, max_diff=0.15):

    if user_number < 0 or user_number >= len(user_data):
        raise ValueError("Invalid user number. Please select a valid number.")

    selected_user = user_data.iloc[user_number]
    selected_user_expertise = selected_user[selected_subjects].to_numpy()

    def is_mentor(candidate):
        candidate_expertise = candidate[selected_subjects].to_numpy()
        differences = candidate_expertise - selected_user_expertise
        return np.all((differences >= min_diff) & (differences <= max_diff))

    potential_mentors = user_data[user_data.apply(is_mentor, axis=1)].copy()

    potential_mentors = potential_mentors.drop(user_number, errors='ignore')

    if potential_mentors.empty:
        return pd.DataFrame([], columns=["User Name", "Similarity Score"] + selected_subjects)
    selected_user_vector = selected_user[selected_subjects].to_numpy().reshape(1, -1)
    mentor_vectors = potential_mentors[selected_subjects].to_numpy()
    similarity_scores = 1 - cdist(selected_user_vector, mentor_vectors, metric="cosine")[0]
    potential_mentors["Similarity Score"] = similarity_scores
    potential_mentors = potential_mentors.sort_values(by=selected_subjects, ascending=False)
    return potential_mentors.head(num_suggestions)

file_path = "users_expertise_dataset.csv"
user_data = pd.read_csv(file_path)

selected_user_number = int(input("Enter User"))
selected_subjects = ["Data Science","E-commerce"]
mentors = suggest_mentors(selected_user_number, selected_subjects, user_data)

print(f"Selected User: {user_data.iloc[selected_user_number]['User Name']}")
print("\nSuggested Mentors:")
print(mentors)


Enter User4
Selected User: Avery Wilson

Suggested Mentors:
        User Name  Data Science  Web Development  Graphic Design  \
117  Alex Johnson      0.352563         0.132598        0.300811   
123  Jamie Wilson      0.347980         0.203217        0.758162   

     Digital Marketing  Business Analytics  Cybersecurity  \
117           0.415345            0.550656       0.475618   
123           0.357428            0.935823       0.106486   

     Mobile App Development   AI & ML  Project Management  ...  \
117                0.142313  0.994775            0.138558  ...   
123                0.147193  0.790293            0.116736  ...   

     Game Development  Software Engineering  UI/UX Design  \
117          0.005865              0.803307      0.230515   
123          0.436983              0.583793      0.964759   

     Database Management    DevOps  Networking  Augmented Reality  E-commerce  \
117             0.577497  0.635198    0.122553           0.371488    0.617107   
123   

**Youtube Course**

In [11]:
import pandas as pd
import numpy as np
from openai import OpenAI
from sentence_transformers import SentenceTransformer
from sklearn.cluster import KMeans

class CurriculumSequencer:
    def __init__(self, api_key):
        # Initialize NVIDIA API client
        self.client = OpenAI(
            base_url="https://integrate.api.nvidia.com/v1",
            api_key=api_key
        )

        # Sentence transformer for semantic embeddings
        self.embedder = SentenceTransformer('all-mpnet-base-v2')

        # Curriculum complexity levels
        self.levels = [
            "basic introduction",
            "fundamental concepts",
            "essential techniques",
            "practical applications",
            "advanced theories",
            "expert-level content",
            "research-oriented material",
            "cutting-edge developments",
            "specialized mastery"
        ]

    def estimate_complexity(self, text):
        # Use Llama-3 via NVIDIA API for complexity estimation
        prompt = f"""
        Analyze this educational content and classify its complexity level (1-9):
        {text}

        Complexity Scale:
        1. Basic introduction
        2. Fundamental concepts
        3. Essential techniques
        4. Practical applications
        5. Advanced theories
        6. Expert-level content
        7. Research-oriented material
        8. Cutting-edge developments
        9. Specialized mastery

        Return ONLY the number corresponding to the complexity level.
        """

        try:
            completion = self.client.chat.completions.create(
                model="meta/llama3-70b-instruct",
                messages=[{"role": "user", "content": prompt}],
                temperature=0.3,
                top_p=0.9,
                max_tokens=10
            )
            response = completion.choices[0].message.content
            return int(response.strip())
        except Exception as e:
            print(f"Error estimating complexity: {str(e)}")
            return 1  # Fallback to basic level

    def create_curriculum(self, df, subject):
        # Filter by subject
        subject_df = df[df['Subject'].str.lower() == subject.lower()]

        # Combine relevant text features
        text_data = subject_df['Name'] + " " + subject_df['Description']

        # Create semantic embeddings
        embeddings = self.embedder.encode(text_data.tolist(), show_progress_bar=True)

        # Cluster videos into 9 groups
        kmeans = KMeans(n_clusters=9, random_state=42)
        clusters = kmeans.fit_predict(embeddings)

        # Estimate complexity for each cluster
        cluster_complexities = []
        for i in range(9):
            cluster_samples = text_data[clusters == i].sample(min(3, sum(clusters == i)))
            avg_complexity = np.mean([self.estimate_complexity(t) for t in cluster_samples])
            cluster_complexities.append(avg_complexity)

        # Sort clusters by complexity
        cluster_order = np.argsort(cluster_complexities)

        # Generate final sequence
        sequence = []
        for cluster in cluster_order:
            cluster_videos = subject_df.iloc[np.where(clusters == cluster)[0]]
            sequence.append(cluster_videos.sample(1).iloc[0])

        return pd.DataFrame(sequence)

def main():
    # Get API key securely
    api_key = Key
    sequencer = CurriculumSequencer(api_key)

    # Load data
    csv_path = r"""YouTube_Video_Dataset.csv"""
    df = pd.read_csv(csv_path)

    # Get subject
    #subject = input("Enter target subject: ")

    # Generate curriculum
    curriculum = sequencer.create_curriculum(df, "Maths")

    print("\nRecommended Learning Path:")
    print(curriculum)

if __name__ == "__main__":
    main()

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Recommended Learning Path:
                                  Name  \
54       Discrete Mathematics Overview   
55          Number Theory Fundamentals   
51           Linear Algebra Essentials   
58                 Graph Theory Basics   
59  Math Tricks for Quick Calculations   
53         Integral Calculus Made Easy   
49          Understanding Trigonometry   
50          Probability and Statistics   
57    Complex Numbers and Applications   

                                          Description  \
54  Introduction to discrete math, covering sets, ...   
55  Explore the basics of number theory, including...   
51  Learn the core concepts of linear algebra, inc...   
58  Learn the basics of graph theory, including ty...   
59  Discover math tricks and shortcuts for perform...   
53  Learn the basics of integral calculus with pra...   
49  Introduction to trigonometric functions, ident...   
50  Explore the basics of probability and statisti...   
57  Understand complex numbers and the

**Wikipedia Questionaire**

In [4]:
!pip install wikipedia-api
!pip install openai

Collecting wikipedia-api
  Downloading wikipedia_api-0.8.1.tar.gz (19 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: wikipedia-api
  Building wheel for wikipedia-api (setup.py) ... [?25l[?25hdone
  Created wheel for wikipedia-api: filename=Wikipedia_API-0.8.1-py3-none-any.whl size=15384 sha256=050730cad3a5cfa48dbd4f9b19cae9b6bed6ab8733468ebc679cbe22a99df0e9
  Stored in directory: /root/.cache/pip/wheels/0b/0f/39/e8214ec038ccd5aeb8c82b957289f2f3ab2251febeae5c2860
Successfully built wikipedia-api
Installing collected packages: wikipedia-api
Successfully installed wikipedia-api-0.8.1


In [7]:
import wikipediaapi

# Define a user agent string that describes your bot
user_agent = "MyWikipediaBot/1.0 (sohamyedgaonkar@gmail.com)"

# Pass the user agent to the Wikipedia class constructor
wiki_wiki = wikipediaapi.Wikipedia(
    language='en',
    user_agent=user_agent # Specify the user agent here
)

page = wiki_wiki.page("Python programming")

if page.exists():
    print(f"Title: {page.title}")
    #print(page.text)
else:
    print("Page does not exist.")
from openai import OpenAI

client = OpenAI(
  base_url = "https://integrate.api.nvidia.com/v1",
  api_key = Key
)

completion = client.chat.completions.create(
  model="meta/llama3-70b-instruct",
  messages=[{"role":"user","content":"""Generate 10 multiple-choice questions (MCQs) based on the provided context.
            Each question should have exactly four options, with one correct answer clearly indicated.
            Dont add unnessecary text as "Here are the 10 multiple-choice questions based on the provided context:

```"
             Format the output as a valid JSON object with the following structure:

json
{
  "questions": [
    {
      "question": "Question 1 text here",
      "options": {
        "A": "Option 1",
        "B": "Option 2",
        "C": "Option 3",
        "D": "Option 4"
      },
      "answer": "A"
    },
    {
      "question": "Question 2 text here",
      "options": {
        "A": "Option 1",
        "B": "Option 2",
        "C": "Option 3",
        "D": "Option 4"
      },
      "answer": "C"
    }
    // Continue this structure for all 10 questions
  ]
} context :"""+page.text[:10000]}],
  temperature=0.5,
  top_p=1,
  max_tokens=1024,
  stream=True
)

for chunk in completion:
  if chunk.choices[0].delta.content is not None:
    print(chunk.choices[0].delta.content, end="")



Title: Python (programming language)
Here are the 10 multiple-choice questions based on the provided context:

```
{
  "questions": [
    {
      "question": "What is the design philosophy of Python?",
      "options": {
        "A": "Code readability with the use of significant indentation",
        "B": "Code complexity with the use of minimal indentation",
        "C": "Code simplicity with the use of no indentation",
        "D": "Code flexibility with the use of random indentation"
      },
      "answer": "A"
    },
    {
      "question": "Who began working on Python in the late 1980s?",
      "options": {
        "A": "Guido van Rossum",
        "B": "Alex Martelli",
        "C": "Monty Python",
        "D": "SETL"
      },
      "answer": "A"
    },
    {
      "question": "What is the name of the comedy series that inspired the name 'Python'?",
      "options": {
        "A": "Monty Python's Flying Circus",
        "B": "The Python Show",
        "C": "Python Comedy Hour",
  