In [2]:
!pip install llama-index pypdf

Collecting pypdf
  Downloading pypdf-3.17.4-py3-none-any.whl (278 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m278.2/278.2 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pypdf
Successfully installed pypdf-3.17.4


In [10]:
import os
import tiktoken
from llama_index import ServiceContext, LLMPredictor, OpenAIEmbedding, PromptHelper
from llama_index.llms import OpenAI
from llama_index.text_splitter import TokenTextSplitter, SentenceSplitter
from llama_index.node_parser import SimpleNodeParser
from llama_index import VectorStoreIndex, SimpleDirectoryReader
from llama_index import set_global_service_context

In [4]:
os.environ["OPENAI_API_KEY"] = ""

In [6]:
documents = SimpleDirectoryReader(input_dir="document").load_data()

In [11]:
text_splitter = SentenceSplitter(
  separator=" ",
  chunk_size=1024,
  chunk_overlap=20,
  paragraph_separator="\n\n\n",
  secondary_chunking_regex="[^,.;。]+[,.;。]?",
  tokenizer=tiktoken.encoding_for_model("gpt-3.5-turbo").encode

)

In [17]:
node_parser = SimpleNodeParser.from_defaults()

In [14]:
llm = OpenAI(model='gpt-3.5-turbo',temperature=0,max_tokens=256)

embed_model = OpenAIEmbedding()

prompt_helper = PromptHelper(
    context_window=4096,
    num_output=256,
    chunk_overlap_ratio=0.1,
    chunk_size_limit=None
)

In [18]:
service_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model=embed_model,
    node_parser=node_parser,
    prompt_helper=prompt_helper
)

In [19]:
index = VectorStoreIndex.from_documents(
    documents,
    service_context = service_context
)

In [20]:
query_engine = index.as_query_engine(service_context=service_context)
response = query_engine.query("Is linear regression suitable for time series analysis?")
print(response)

Linear regression can be used for time series analysis, but its performance may not be particularly remarkable. This is because time series data often have seasonal or periodic trends, which can be treated as outliers in linear regression and not appropriately accounted for. Additionally, linear regression may not be well-suited for future prediction, which is a common use case in time series analysis. Other algorithms such as ARIMA, ARCH, and LSTM are widely used and generally perform better for time series analysis.
