In [1]:
from langchain.chains import LLMChain
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.output_parsers import StrOutputParser
from langchain.output_parsers import PandasDataFrameOutputParser



In [7]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI

model = ChatOpenAI(model="gpt-4o-mini",temperature=0)

In [8]:
class QuarterlyFinanceData(BaseModel):
    quarter: str
    revenue_billion_usd: float
    net_income_billion_usd: float

In [11]:

# And a query intented to prompt a language model to populate the data structure.
query = "Tell me the profit and revenue for apple for all quarters of 2001 and 2002."

# Set up a parser + inject instructions into the prompt template.
parser = JsonOutputParser(pydantic_object=QuarterlyFinanceData)

prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | model | parser


[{'quarter': 'Q1 2001',
  'revenue_billion_usd': 5.36,
  'net_income_billion_usd': 0.07},
 {'quarter': 'Q2 2001',
  'revenue_billion_usd': 5.51,
  'net_income_billion_usd': 0.12},
 {'quarter': 'Q3 2001',
  'revenue_billion_usd': 5.34,
  'net_income_billion_usd': 0.06},
 {'quarter': 'Q4 2001',
  'revenue_billion_usd': 5.36,
  'net_income_billion_usd': 0.07},
 {'quarter': 'Q1 2002',
  'revenue_billion_usd': 5.8,
  'net_income_billion_usd': 0.15},
 {'quarter': 'Q2 2002',
  'revenue_billion_usd': 6.23,
  'net_income_billion_usd': 0.2},
 {'quarter': 'Q3 2002',
  'revenue_billion_usd': 6.52,
  'net_income_billion_usd': 0.25},
 {'quarter': 'Q4 2002',
  'revenue_billion_usd': 6.78,
  'net_income_billion_usd': 0.3}]

In [35]:
result=chain.invoke({"query": query})

In [34]:
result

[{'quarter': 'Q1 2001',
  'revenue_billion_usd': 5.36,
  'net_income_billion_usd': 0.07},
 {'quarter': 'Q2 2001',
  'revenue_billion_usd': 5.51,
  'net_income_billion_usd': 0.12},
 {'quarter': 'Q3 2001',
  'revenue_billion_usd': 5.34,
  'net_income_billion_usd': 0.06},
 {'quarter': 'Q4 2001',
  'revenue_billion_usd': 5.36,
  'net_income_billion_usd': 0.07},
 {'quarter': 'Q1 2002',
  'revenue_billion_usd': 5.75,
  'net_income_billion_usd': 0.15},
 {'quarter': 'Q2 2002',
  'revenue_billion_usd': 6.23,
  'net_income_billion_usd': 0.2},
 {'quarter': 'Q3 2002',
  'revenue_billion_usd': 6.52,
  'net_income_billion_usd': 0.25},
 {'quarter': 'Q4 2002',
  'revenue_billion_usd': 6.78,
  'net_income_billion_usd': 0.3}]

In [36]:
import pandas as pd
df = pd.DataFrame(result)


In [37]:
df

Unnamed: 0,quarter,revenue_billion_usd,net_income_billion_usd
0,Q1 2001,5.36,0.07
1,Q2 2001,5.51,0.12
2,Q3 2001,5.34,0.06
3,Q4 2001,5.36,0.07
4,Q1 2002,5.75,0.1
5,Q2 2002,6.23,0.14
6,Q3 2002,6.52,0.16
7,Q4 2002,6.66,0.18


In [38]:
# Extracting the 'revenue_billion_usd' column
revenue = df['revenue_billion_usd']
print(revenue)


0    5.36
1    5.51
2    5.34
3    5.36
4    5.75
5    6.23
6    6.52
7    6.66
Name: revenue_billion_usd, dtype: float64


In [17]:
# Extracting the first row
first_row = df.iloc[0]
print(first_row)


quarter                   Q1 2001
revenue_billion_usd          5.36
net_income_billion_usd       0.07
Name: 0, dtype: object


In [18]:
# Filter rows where revenue is greater than 6 billion USD
filtered_df = df[df['revenue_billion_usd'] > 6]
print(filtered_df)


   quarter  revenue_billion_usd  net_income_billion_usd
7  Q4 2002                 6.23                    0.09


In [19]:
# Extract the 'net_income_billion_usd' for 'Q2 2002'
net_income_q2_2002 = df.loc[df['quarter'] == 'Q2 2002', 'net_income_billion_usd'].iloc[0]
print(net_income_q2_2002)


0.06


In [20]:
# Extract the first three rows
first_three_rows = df.iloc[:3]
print(first_three_rows)

# Extract the 'quarter' and 'revenue_billion_usd' columns for the first three rows
first_three_revenue = df.iloc[:3][['quarter', 'revenue_billion_usd']]
print(first_three_revenue)


   quarter  revenue_billion_usd  net_income_billion_usd
0  Q1 2001                 5.36                    0.07
1  Q2 2001                 5.51                    0.12
2  Q3 2001                 5.34                    0.06
   quarter  revenue_billion_usd
0  Q1 2001                 5.36
1  Q2 2001                 5.51
2  Q3 2001                 5.34


In [39]:
# Group by year (assuming the year is extracted into a separate column)
df['year'] = df['quarter'].str[-4:]
grouped = df.groupby('year').sum()
print(grouped)



                           quarter  revenue_billion_usd  \
year                                                      
2001  Q1 2001Q2 2001Q3 2001Q4 2001                21.57   
2002  Q1 2002Q2 2002Q3 2002Q4 2002                25.16   

      net_income_billion_usd  
year                          
2001                    0.32  
2002                    0.58  


In [40]:

from tabulate import tabulate

# Convert the DataFrame to a tabular format using tabulate
print(tabulate(grouped, headers='keys', tablefmt='grid'))


+--------+------------------------------+-----------------------+--------------------------+
|   year | quarter                      |   revenue_billion_usd |   net_income_billion_usd |
|   2001 | Q1 2001Q2 2001Q3 2001Q4 2001 |                 21.57 |                     0.32 |
+--------+------------------------------+-----------------------+--------------------------+
|   2002 | Q1 2002Q2 2002Q3 2002Q4 2002 |                 25.16 |                     0.58 |
+--------+------------------------------+-----------------------+--------------------------+


In [41]:
summary = df.describe()
print(summary)


       revenue_billion_usd  net_income_billion_usd
count             8.000000                 8.00000
mean              5.841250                 0.11250
std               0.549635                 0.04496
min               5.340000                 0.06000
25%               5.360000                 0.07000
50%               5.630000                 0.11000
75%               6.302500                 0.14500
max               6.660000                 0.18000


In [42]:
# Calculate mean revenue
mean_revenue = df['revenue_billion_usd'].mean()
print(f"Mean Revenue: {mean_revenue} billion USD")


Mean Revenue: 5.8412500000000005 billion USD


In [43]:
import numpy as np

# Assuming we have weights for each quarter
weights = [1, 1, 1, 1, 2, 2, 2, 2]

# Calculate weighted mean for revenue
weighted_mean_revenue = np.average(df['revenue_billion_usd'], weights=weights)
print(f"Weighted Mean Revenue: {weighted_mean_revenue} billion USD")


Weighted Mean Revenue: 5.990833333333334 billion USD


In [44]:
# Calculate median net income
median_net_income = df['net_income_billion_usd'].median()
print(f"Median Net Income: {median_net_income} billion USD")


Median Net Income: 0.11 billion USD


In [45]:
import pandas as pd

# Assuming you have already grouped your data by year
df['year'] = df['quarter'].str[-4:]
grouped = df.groupby('year').sum()

# Print the grouped DataFrame in a tabular format
print(grouped)


                           quarter  revenue_billion_usd  \
year                                                      
2001  Q1 2001Q2 2001Q3 2001Q4 2001                21.57   
2002  Q1 2002Q2 2002Q3 2002Q4 2002                25.16   

      net_income_billion_usd  
year                          
2001                    0.32  
2002                    0.58  


In [31]:
!pip install prettytable



Collecting prettytable
  Downloading prettytable-3.11.0-py3-none-any.whl.metadata (30 kB)
Downloading prettytable-3.11.0-py3-none-any.whl (28 kB)
Installing collected packages: prettytable
Successfully installed prettytable-3.11.0


In [29]:
from tabulate import tabulate

# Convert the DataFrame to a tabular format using tabulate
print(tabulate(grouped, headers='keys', tablefmt='grid'))


+--------+------------------------------+-----------------------+--------------------------+
|   year | quarter                      |   revenue_billion_usd |   net_income_billion_usd |
|   2001 | Q1 2001Q2 2001Q3 2001Q4 2001 |                 21.57 |                     0.32 |
+--------+------------------------------+-----------------------+--------------------------+
|   2002 | Q1 2002Q2 2002Q3 2002Q4 2002 |                 22.09 |                     0.26 |
+--------+------------------------------+-----------------------+--------------------------+


In [32]:
from prettytable import PrettyTable

# Create a PrettyTable object
table = PrettyTable()

# Add columns
table.field_names = grouped.columns.tolist()

# Add rows
for row in grouped.itertuples():
    table.add_row(row[1:])

print(table)


+------------------------------+---------------------+------------------------+
|           quarter            | revenue_billion_usd | net_income_billion_usd |
+------------------------------+---------------------+------------------------+
| Q1 2001Q2 2001Q3 2001Q4 2001 |        21.57        |          0.32          |
| Q1 2002Q2 2002Q3 2002Q4 2002 |        22.09        |          0.26          |
+------------------------------+---------------------+------------------------+
