In [None]:
! pip install -q transformers torch transformers_stream_generator optimum auto-gptq tiktoken

In [None]:
import io
import pandas as pd
import transformers
import torch
import optimum

forecast_arima = pd.read_csv('/content/arima_rl.csv')
past_year_data = pd.read_csv('/content/rl_12_month.csv')

arima_str = forecast_arima.to_csv(index=False)
past_year_str = past_year_data.to_csv(index=False)

user_prompt = f"""
You are a senior financial advisor specializing in fixed income and bond markets, with over 20 years of experience. Your expertise covers analyzing market trends, forecasting yield movements, and evaluating risk factors.

The dataset provided contains **both forecasted and actual bond yields**. Your task is to analyze **each column separately** and provide insights based on:
1. The **forecasted yield values from the ARIMA model**.
2. The **actual yield values from the dataset**.
3. **Historical trends from the past 12 months**.
4. **Volatility, risk factors, and investment implications**.

Each of the following columns should be **analyzed separately**, with clear insights:

### **Columns to Analyze:**
1. **US_10Y_Yield** (Forecasted US 10-Year Treasury Yield)
2. **US_10Y_Yield_Actual** (Actual US 10-Year Treasury Yield)
3. **AAA_Bond_Yield** (Forecasted AAA Bond Yield)
4. **AAA_Bond_Yield_Actual** (Actual AAA Bond Yield)
5. **BAA_Bond_Yield** (Forecasted BAA Bond Yield)
6. **BAA_Bond_Yield_Actual** (Actual BAA Bond Yield)
7. **Junk_Bond_Yield** (Forecasted Junk Bond Yield)
8. **Junk_Bond_Yield_Actual** (Actual Junk Bond Yield)

---

### **Expected Output Format**
For each column, provide insights using this structured format:

-----------------------------------------------------------
**Column:** [Column Name]
**Value:** [Value from dataset]
**Analysis:** [Explain key insights based on forecasted/actual values]
**Trend Observation:** [How this yield has moved over time]
**Volatility & Risk Factors:** [Assess stability or risk level]
**Investment Implication:** [How investors should act based on this data]
-----------------------------------------------------------

Ensure that each column is **analyzed individually** and not grouped together.
Each column's forecasted and actual value should have a **separate** analysis.

### **Dataset:**
#### **Forecasted vs. Actual Bond Yields (First Dataset)**
{arima_str}

#### **Historical Bond Yields (Second Dataset)**
{past_year_str}

Please follow the structured format above and provide **separate** insights for each column.
"""

from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

model_name_or_path = "Qwen/Qwen2.5-7B-Instruct-1M"
model = AutoModelForCausalLM.from_pretrained(
    model_name_or_path,
    device_map="auto",
    trust_remote_code=True,
    revision="main"
)
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)

system_message = "You are a Senior Financial Advisor with deep expertise in fixed income investments and bond market analysis."
prompt_template = f"""<|im_start|>system
{system_message}<|im_end|>
<|im_start|>user
{user_prompt}<|im_end|>
<|im_start|>assistant
"""

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=1010000,
    do_sample=True,
    temperature=0.7,
    top_p=0.95,
    top_k=40,
    repetition_penalty=1.1
)

output = pipe(prompt_template)[0]['generated_text']
print(output)
