In [1]:
%reload_ext dotenv
%dotenv

In [2]:
from llama_index import VectorStoreIndex, SimpleDirectoryReader
import os
import openai
import pandas as pd

def get_query_engine_from_text(txtfile):
  openai.api_key = os.environ['OPENAI_API_KEY']
  documents = SimpleDirectoryReader(txtfile).load_data()
  index = VectorStoreIndex.from_documents(documents)
  query_engine = index.as_query_engine()
  return query_engine

def csv_to_llminput(csvpath,txtOutputPath):
    df = pd.read_csv(csvpath)
    f = open(txtOutputPath,"w")
    print("Started writing to: ",txtOutputPath)
    for index, row in df.iterrows():
      f.write("Price of house with area of {area} and {bedrooms} bedrooms is {price}\n".format(area=row['area'],bedrooms=row['bedrooms'],price=row['price']))
    f.close()
    print("Completed writing to: ",txtOutputPath)

In [38]:
csv_to_llminput('./data/Housing.csv','./data/Housing-csv-llminput.txt')

Started writing to:  ./data/Housing-csv-llminput.txt
Completed writing to:  ./data/Housing-csv-llminput.txt


In [40]:
query_engine = get_query_engine_from_text('data')
resp = query_engine.query("What house has least number of bedrooms?")

resp.response

'\nThe house with the least number of bedrooms is a house with an area of 3970 and 1 bedroom, with a price of 2275000.'

In [3]:
from pandasai import PandasAI
from pandasai.llm.openai import OpenAI

llm = OpenAI(api_token=os.environ['OPENAI_API_KEY'])
pandas_ai = PandasAI(llm,enable_cache=False)

df = pd.read_csv('./data/Housing.csv')
pandas_ai.run(df,prompt='display house with the highest area')


Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
7,10150000,16200,5,3,2,yes,no,no,no,no,0,no,unfurnished


In [12]:
df = pd.read_csv('./data/Dish.csv')
df = df.drop(['description','id'],axis=1)
df.head()

Unnamed: 0,name,menus_appeared,times_appeared,first_appeared,last_appeared,lowest_price,highest_price
0,Consomme printaniere royal,8,8,1897,1927,0.2,0.4
1,Chicken gumbo,111,117,1895,1960,0.1,0.8
2,Tomato aux croutons,14,14,1893,1917,0.25,0.4
3,Onion au gratin,41,41,1900,1971,0.25,1.0
4,St. Emilion,66,68,1881,1981,0.0,18.0


In [16]:
pandas_ai.run(df,prompt='show top 5 dishes in terms of price')

Unnamed: 0,name,menus_appeared,times_appeared,first_appeared,last_appeared,lowest_price,highest_price
31505,Cream cheese with bar-le-duc jelly,16,16,1900,1933,0.45,3050.0
61,Grape fruit,713,747,1895,1968,0.0,2540.0
100452,Oysters Baked in Shell,3,3,1857,1943,0.85,2065.0
21814,Pommery & Greno. Ex. Dry,10,10,1900,1901,1.4,2050.0
199195,luso,1,1,1987,1987,600.0,1100.0


In [17]:
df = pd.read_csv('./data/usedcar-prices-train-data.csv')
# df.head()

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Mileage,Engine,Power,Seats,New_Price,Price
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,26.6 km/kg,998 CC,58.16 bhp,5.0,,1.75
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,19.67 kmpl,1582 CC,126.2 bhp,5.0,,12.5
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,18.2 kmpl,1199 CC,88.7 bhp,5.0,8.61 Lakh,4.5
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,20.77 kmpl,1248 CC,88.76 bhp,7.0,,6.0
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,15.2 kmpl,1968 CC,140.8 bhp,5.0,,17.74


In [19]:
pandas_ai.run(df,prompt='show name, year and price of top 5 cars in terms of price')

Unnamed: 0,Name,Year,Price
4079,Land Rover Range Rover 3.0 Diesel LWB Vogue,2017,160.0
5781,Lamborghini Gallardo Coupe,2011,120.0
5919,Jaguar F Type 5.0 V8 S,2015,100.0
1505,Land Rover Range Rover Sport SE,2019,97.07
1974,BMW 7 Series 740Li,2018,93.67
