In [23]:
from dotenv import load_dotenv
load_dotenv()
import os

api_key = os.getenv('STACK_OVERFLOW_KEY')
search_query = "Using Vicuna + langchain + llama_index for creating a self hosted LLM model"
url = "https://api.stackexchange.com/2.3/search"

In [15]:
import requests

params = {
    "site": "stackoverflow",
    "key": api_key,
    "intitle": search_query,
    "sort": "relevance"  # Sort by relevance to get the top answer
}

response = requests.get(url, params=params)
data = response.json()

# Extract relevant information from the API response
items = data["items"]

for item in items:
    question_title = item["title"]
    question_url = item["link"]
    print(question_title)
    print(question_url)
    print("---")


How can I retrieve the list of input documents used by ChatGPT in LangChain?
https://stackoverflow.com/questions/76300233/how-can-i-retrieve-the-list-of-input-documents-used-by-chatgpt-in-langchain
---


In [16]:
items

[{'tags': ['python', 'prompt', 'openai-api', 'langchain'],
  'owner': {'account_id': 11188708,
   'reputation': 81,
   'user_id': 8210008,
   'user_type': 'registered',
   'profile_image': 'https://www.gravatar.com/avatar/7c91911b4157f35ae1ce3bb2dfcf3178?s=256&d=identicon&r=PG&f=y&so-version=2',
   'display_name': 'Omered',
   'link': 'https://stackoverflow.com/users/8210008/omered'},
  'is_answered': False,
  'view_count': 439,
  'answer_count': 1,
  'score': 1,
  'last_activity_date': 1686062465,
  'creation_date': 1684677119,
  'question_id': 76300233,
  'content_license': 'CC BY-SA 4.0',
  'link': 'https://stackoverflow.com/questions/76300233/how-can-i-retrieve-the-list-of-input-documents-used-by-chatgpt-in-langchain',
  'title': 'How can I retrieve the list of input documents used by ChatGPT in LangChain?'}]

---

In [24]:
import requests

params = {
    "site": "stackoverflow",
    "key": api_key,
    "intitle": search_query,
    "sort": "relevance"
}

response = requests.get(url, params=params)
data = response.json()

items = data["items"]

for item in items:
    question_id = item["question_id"]  # Get the ID of the question

    # Fetch the question details using the question ID
    question_url = f"https://api.stackexchange.com/2.3/questions/{question_id}"
    question_params = {
        "site": "stackoverflow",
        "key": api_key,
        "filter": "withbody"  # Include the question body in the response
    }

    question_response = requests.get(question_url, params=question_params)
    question_data = question_response.json()
    question_title = question_data["items"][0]["title"]
    question_body = question_data["items"][0]["body"]

In [25]:
print("Question Title:", question_title)
print("Question URL:", question_data["items"][0]["link"])
print("Question Body:", question_body)

# Check if an accepted answer exists for the question
if "accepted_answer_id" in question_data["items"][0]:
    answer_id = question_data["items"][0]["accepted_answer_id"]

    # Fetch the answer details using the answer ID
    answer_url = f"https://api.stackexchange.com/2.3/answers/{answer_id}"
    answer_params = {
        "site": "stackoverflow",
        "key": api_key,
        "filter": "withbody"  # Include the answer body in the response
    }

    answer_response = requests.get(answer_url, params=answer_params)
    answer_data = answer_response.json()
    answer_body = answer_data["items"][0]["body"]

    print("Top Answer:")
    print(answer_body)
    print("---")
else:
    print("No accepted answer found.")
    print("---")


Question Title: Using Vicuna + langchain + llama_index for creating a self hosted LLM model
Question URL: https://stackoverflow.com/questions/76067104/using-vicuna-langchain-llama-index-for-creating-a-self-hosted-llm-model
Question Body: <p>I want to create a self hosted LLM model that will be able to have a context of my own custom data (Slack conversations for that matter).</p>
<p>I've heard Vicuna is a great alternative to ChatGPT and so I made the below code:</p>
<pre><code>from llama_index import SimpleDirectoryReader, LangchainEmbedding, GPTListIndex, \
    GPTSimpleVectorIndex, PromptHelper, LLMPredictor, Document, ServiceContext
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
import torch
from langchain.llms.base import LLM
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM

!export PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512
    
class CustomLLM(LLM):
    model_name = &quot;eachadea/vicuna-13b-1.1&quot;
    tokenizer = AutoTokenizer.fro

In [26]:
question_data

{'items': [{'tags': ['python',
    'machine-learning',
    'pytorch',
    'chatgpt-api',
    'langchain'],
   'owner': {'account_id': 1614194,
    'reputation': 421,
    'user_id': 1492337,
    'user_type': 'registered',
    'profile_image': 'https://www.gravatar.com/avatar/e4508bd9a4d0df8c0a788a8d6d0a1768?s=256&d=identicon&r=PG',
    'display_name': 'Ben',
    'link': 'https://stackoverflow.com/users/1492337/ben'},
   'is_answered': True,
   'view_count': 4910,
   'accepted_answer_id': 76074046,
   'answer_count': 2,
   'score': 6,
   'last_activity_date': 1685981124,
   'creation_date': 1682014477,
   'question_id': 76067104,
   'content_license': 'CC BY-SA 4.0',
   'link': 'https://stackoverflow.com/questions/76067104/using-vicuna-langchain-llama-index-for-creating-a-self-hosted-llm-model',
   'title': 'Using Vicuna + langchain + llama_index for creating a self hosted LLM model',
 'has_more': False,
 'quota_max': 10000,
 'quota_remaining': 9992}

---

In [33]:
search_query = "getting segmentation fault in linux"

In [34]:
import requests

params = {
    "site": "stackoverflow",
    "key": api_key,
    "intitle": search_query,
    "sort": "relevance",
    "pagesize": 5  # Number of items per page, adjust as needed
}

page = 1  # Starting page

# while True:
params["page"] = page
response = requests.get(url, params=params)
data = response.json()

items = data["items"]
print(len(items))

# if len(items) == 0:
#     break  # No more items, exit the loop

for item in items:
    question_id = item["question_id"]  # Get the ID of the question

    # Fetch the question details using the question ID
    question_url = f"https://api.stackexchange.com/2.3/questions/{question_id}"
    question_params = {
        "site": "stackoverflow",
        "key": api_key,
        "filter": "withbody"  # Include the question body in the response
    }

    question_response = requests.get(question_url, params=question_params)
    question_data = question_response.json()
    question_title = question_data["items"][0]["title"]
    question_body = question_data["items"][0]["body"]

    # Check if an accepted answer exists for the question
    if "accepted_answer_id" in question_data["items"][0]:
        answer_id = question_data["items"][0]["accepted_answer_id"]

        # Fetch the answer details using the answer ID
        answer_url = f"https://api.stackexchange.com/2.3/answers/{answer_id}"
        answer_params = {
            "site": "stackoverflow",
            "key": api_key,
            "filter": "withbody"  # Include the answer body in the response
        }

        answer_response = requests.get(answer_url, params=answer_params)
        answer_data = answer_response.json()
        answer_body = answer_data["items"][0]["body"]

        print("Question Title:", question_title)
        print("Question URL:", question_data["items"][0]["link"])
        print("Question Body:", question_body)
        print("Top Answer:")
        print(answer_body)
        print("---")
    else:
        print("Question Title:", question_title)
        print("Question URL:", question_data["items"][0]["link"])
        print("Question Body:", question_body)
        print("No accepted answer found.")
        print("---")

    # page += 1  # Move to the next page


2
Question Title: Getting segmentation fault in linux but not in Windows
Question URL: https://stackoverflow.com/questions/47724281/getting-segmentation-fault-in-linux-but-not-in-windows
Question Body: <p>I'm trying to protect a program from invalid inputs and I've made a function that is not working in Linux but, apparently, works fine in Windows. I receive a String and the program is supposed to accept only Strings that have 3 " " and the last 3 values are digits. E.g "Mike 1 1 1".</p>

<p>The solution I came up with was</p>

<pre><code>int verify(char string[]){
    int n = 0;
    char* aux = strtok(string, " ");
    while((aux=strtok(NULL, " "))!=NULL){
        if(isdigit(aux[0])==0)break;
        n++;
    }
    return n;
}
</code></pre>

<p>If n is 3 I accept the input. I know that this solution won't be correct for an input like "Mike 1 1a 1" but for now that won't be important. I really wanted to know why this doesn't work in Linux and how can I test this in a way that will work

In [35]:
items

[{'tags': ['c', 'linux', 'segmentation-fault'],
  'owner': {'account_id': 11886052,
   'reputation': 11,
   'user_id': 8698394,
   'user_type': 'registered',
   'profile_image': 'https://www.gravatar.com/avatar/9f6111cd8a4c69ed42aad72a5dc6bb7f?s=256&d=identicon&r=PG&f=y&so-version=2',
   'display_name': 'Mike',
   'link': 'https://stackoverflow.com/users/8698394/mike'},
  'is_answered': False,
  'view_count': 383,
  'answer_count': 0,
  'score': 0,
  'last_activity_date': 1512781764,
  'creation_date': 1512781446,
  'last_edit_date': 1512781764,
  'question_id': 47724281,
  'content_license': 'CC BY-SA 3.0',
  'link': 'https://stackoverflow.com/questions/47724281/getting-segmentation-fault-in-linux-but-not-in-windows',
  'title': 'Getting segmentation fault in linux but not in Windows'},
 {'tags': ['c', 'linux', 'simulation'],
  'owner': {'account_id': 2238459,
   'reputation': 600,
   'user_id': 1974137,
   'user_type': 'registered',
   'accept_rate': 85,
   'profile_image': 'https://

In [36]:
 question_data["items"]

[{'tags': ['c', 'linux', 'simulation'],
  'owner': {'account_id': 2238459,
   'reputation': 600,
   'user_id': 1974137,
   'user_type': 'registered',
   'accept_rate': 85,
   'profile_image': 'https://www.gravatar.com/avatar/50a1a180963d83ad9c5af5b2dbeb9d55?s=256&d=identicon&r=PG',
   'display_name': 'ashwinbhy',
   'link': 'https://stackoverflow.com/users/1974137/ashwinbhy'},
  'is_answered': True,
  'view_count': 2551,
  'accepted_answer_id': 16306304,
  'answer_count': 3,
  'score': 1,
  'last_activity_date': 1388180746,
  'creation_date': 1367338861,
  'last_edit_date': 1388180746,
  'question_id': 16304595,
  'content_license': 'CC BY-SA 3.0',
  'link': 'https://stackoverflow.com/questions/16304595/getting-segmentation-fault-in-linux-when-running-my-c-program',
  'title': 'Getting segmentation fault in Linux when running my C program',
  'body': '<p>I am trying to simulate the <a href="http://en.wikipedia.org/wiki/Cat_%28Unix%29" rel="nofollow">cat</a> command in <a href="http://e

In [39]:
import requests

params = {
    "site": "stackoverflow",
    "key": api_key,
    "intitle": search_query,
    "sort": "relevance",
    "pagesize": 5  # Number of items per page, adjust as needed
}

params["page"] = page
response = requests.get(url, params=params)
data = response.json()

items = data["items"]

for item in items:
    question_id = item["question_id"]  # Get the ID of the question

    # Fetch the question details using the question ID
    question_url = f"https://api.stackexchange.com/2.3/questions/{question_id}"
    question_params = {
        "site": "stackoverflow",
        "key": api_key,
        "filter": "withbody"  # Include the question body in the response
    }

    question_response = requests.get(question_url, params=question_params)
    question_data = question_response.json()
    question_title = question_data["items"][0]["title"]
    question_body = question_data["items"][0]["body"]

    # Fetch all answers for the question
    answers_url = f"https://api.stackexchange.com/2.3/questions/{question_id}/answers"
    answers_params = {
        "site": "stackoverflow",
        "key": api_key,
        "filter": "withbody",  # Include the answer body in the response
        "sort": "votes",  # Sort answers by votes (highest first)
        "pagesize": 5  # Fetch only the top answer
    }

    answers_response = requests.get(answers_url, params=answers_params)
    answers_data = answers_response.json()
    if len(answers_data["items"]) > 0:
        answer_body = answers_data["items"][0]["body"]
        answer_votes = answers_data["items"][0]["score"]
    else:
        answer_body = "No answers found."
        answer_votes = 0

    print("Question Title:", question_title)
    print("Question URL:", question_data["items"][0]["link"])
    print("Question Body:", question_body)
    print("Top Answer (Votes:", answer_votes, "):")
    print(answer_body)
    print("---")



Question Title: Getting segmentation fault in linux but not in Windows
Question URL: https://stackoverflow.com/questions/47724281/getting-segmentation-fault-in-linux-but-not-in-windows
Question Body: <p>I'm trying to protect a program from invalid inputs and I've made a function that is not working in Linux but, apparently, works fine in Windows. I receive a String and the program is supposed to accept only Strings that have 3 " " and the last 3 values are digits. E.g "Mike 1 1 1".</p>

<p>The solution I came up with was</p>

<pre><code>int verify(char string[]){
    int n = 0;
    char* aux = strtok(string, " ");
    while((aux=strtok(NULL, " "))!=NULL){
        if(isdigit(aux[0])==0)break;
        n++;
    }
    return n;
}
</code></pre>

<p>If n is 3 I accept the input. I know that this solution won't be correct for an input like "Mike 1 1a 1" but for now that won't be important. I really wanted to know why this doesn't work in Linux and how can I test this in a way that will work. 

In [40]:
answers_data

{'items': [{'owner': {'account_id': 196869,
    'reputation': 397859,
    'user_id': 440558,
    'user_type': 'registered',
    'accept_rate': 75,
    'profile_image': 'https://i.stack.imgur.com/9ZYta.jpg?s=256&g=1',
    'display_name': 'Some programmer dude',
    'link': 'https://stackoverflow.com/users/440558/some-programmer-dude'},
   'is_accepted': False,
   'score': 6,
   'last_activity_date': 1367339348,
   'last_edit_date': 1367339348,
   'creation_date': 1367338961,
   'answer_id': 16304619,
   'question_id': 16304595,
   'content_license': 'CC BY-SA 3.0',
   'body': "<p>It's probably because the redirection is handled by the shell and not by your program, so <code>argv[2]</code> is <code>NULL</code> and <code>argv[3]</code> does not exist.</p>\n\n<p>However you should use a debugger to find out what is really happening. And then add proper error checking.</p>\n"},
  {'owner': {'account_id': 112612,
    'reputation': 88729,
    'user_id': 296974,
    'user_type': 'registered',


In [44]:
answers_data['items'][0]

{'owner': {'account_id': 196869,
  'reputation': 397859,
  'user_id': 440558,
  'user_type': 'registered',
  'accept_rate': 75,
  'profile_image': 'https://i.stack.imgur.com/9ZYta.jpg?s=256&g=1',
  'display_name': 'Some programmer dude',
  'link': 'https://stackoverflow.com/users/440558/some-programmer-dude'},
 'is_accepted': False,
 'score': 6,
 'last_activity_date': 1367339348,
 'last_edit_date': 1367339348,
 'creation_date': 1367338961,
 'answer_id': 16304619,
 'question_id': 16304595,
 'content_license': 'CC BY-SA 3.0',
 'body': "<p>It's probably because the redirection is handled by the shell and not by your program, so <code>argv[2]</code> is <code>NULL</code> and <code>argv[3]</code> does not exist.</p>\n\n<p>However you should use a debugger to find out what is really happening. And then add proper error checking.</p>\n"}

In [45]:
answers_data['items'][1]

{'owner': {'account_id': 112612,
  'reputation': 88729,
  'user_id': 296974,
  'user_type': 'registered',
  'accept_rate': 100,
  'profile_image': 'https://www.gravatar.com/avatar/0e26cfe3ac75cd582ca200fbc6fa0421?s=256&d=identicon&r=PG',
  'display_name': 'glglgl',
  'link': 'https://stackoverflow.com/users/296974/glglgl'},
 'is_accepted': False,
 'score': 4,
 'last_activity_date': 1367339808,
 'creation_date': 1367339808,
 'answer_id': 16304873,
 'question_id': 16304595,
 'content_license': 'CC BY-SA 3.0',
 'body': '<p>You can live without <code>gdb</code> here - but you have to start solving the problem in a structured manner:</p>\n\n<ul>\n<li><p>Don\'t take anything as granted. E.g., even if you call your program as <code>program &gt; file</code>, don\'t assume that <code>argv</code> looks the way you assume, but check it by outputting each of them:</p>\n\n<pre><code>printf("argc: %d\\n", argc);\nprintf("argv[0]: %s\\n", argv[0]);\nprintf("argv[1]: %s\\n", argv[1]);\nprintf("argv[2]