In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('output_file.csv')

In [3]:
data_filled = df.fillna('')

data_filled['knowledge_entry'] = data_filled.apply(
    lambda row: f"Equipment: {row['Equipment']} | Down Time Issue: {row['Down time issue']} | "
                f"Issue: {row['Issue']} | Temporary Action Taken: {row['Temporary action taken']}",
    axis=1
)

data_filled['knowledge_entry'] = data_filled['knowledge_entry'].str.replace(r'\s+', ' ', regex=True).str.strip()

data_filled


Unnamed: 0.1,Unnamed: 0,Equipment,Sub equipment (if applicable),Down time issue,Machine/ process stop time,Machine/ process start time,Time taken (Duration),"Type (B/D, PM, SC, Process)",Attended by - department,Attended by - senior PM/ PC/ other,Issue,Temporary action taken,knowledge_entry
0,1,Sleeve,Mould,water leakage from spt side,19:38,0:18,4.67,B/D,Mechanical,Tapash,Huge water leakage from spt taper plate gap in...,Spt taper plate bolts hole thread were tapped ...,Equipment: Sleeve | Down Time Issue: water lea...
1,2,MTC,ccm,m/c under to mech,11:45,12:50,1.08,B/D,Mechanical,,oil leakage from mtc gland,in mtc gland packing was given and mounting bo...,Equipment: MTC | Down Time Issue: m/c under to...
2,3,Mtc,Mtc,gland tightning,23:00,0:00,1.0,B/D,Mechanical,Srikanta,mtc gland leakage + m/c automatically getting ...,mtc gland made tight,Equipment: Mtc | Down Time Issue: gland tightn...
3,4,Core loader,Core loader,Core loader centering prob,0:00,0:15,0.25,B/D,Mechanical,Srikanta,No record,,Equipment: Core loader | Down Time Issue: Core...
4,5,Mtc,Mtc,Mtc prob,2:40,6:00,3.33,B/D,Mechanical,Srikanta,casting time variation + m/c automatically get...,mtc gland made tight + Sleeve changed,Equipment: Mtc | Down Time Issue: Mtc prob | I...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1604,1605,PTT,PTT,PTT limit prob,10:44,11:16,0.53,B/D,Mechanical,,,,Equipment: PTT | Down Time Issue: PTT limit pr...
1605,1606,Trough,Trough,A side tilting hose damage,4:50,5:28,0.63,B/D,Mechanical,,A-Side Trough Tilting Hose Burst,Hose Changed,Equipment: Trough | Down Time Issue: A side ti...
1606,1607,Mould,Mould,water leakage from spt side,21:06,21:15,0.15,B/D,Mechanical,,,,Equipment: Mould | Down Time Issue: water leak...
1607,1608,Mould,Mould,water leakage from skt side,10:55,11:30,0.58,B/D,Mechanical,,A-Side Socket Roller Adjusting Lock Nut Hugw W...,Didn't arrest by Sacking so water drained and ...,Equipment: Mould | Down Time Issue: water leak...


In [4]:
data_cleaned = data_filled.replace('', pd.NA)

data_cleaned = data_cleaned.dropna(subset=['Equipment', 'Down time issue', 'Issue', 'Temporary action taken'])

remaining_empty_check = data_cleaned[['Equipment', 'Down time issue', 'Issue', 'Temporary action taken']].isnull().sum()

num_rows_remaining = data_cleaned.shape[0]
remaining_empty_check, num_rows_remaining


(Equipment                 0
 Down time issue           0
 Issue                     0
 Temporary action taken    0
 dtype: int64,
 572)

In [None]:
%pip install sentence-transformers

In [5]:
from sentence_transformers import SentenceTransformer

In [6]:
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

data_cleaned['embeddings'] = data_cleaned['knowledge_entry'].apply(lambda x: embedding_model.encode(x))

In [7]:
data_cleaned[['knowledge_entry', 'embeddings']].head()

Unnamed: 0,knowledge_entry,embeddings
0,Equipment: Sleeve | Down Time Issue: water lea...,"[-0.15470324, 0.023782805, 0.07781514, 0.00720..."
1,Equipment: MTC | Down Time Issue: m/c under to...,"[-0.111168675, 0.012267963, 0.103150494, 0.014..."
2,Equipment: Mtc | Down Time Issue: gland tightn...,"[-0.06780671, -0.017271845, 0.08175827, 0.0385..."
4,Equipment: Mtc | Down Time Issue: Mtc prob | I...,"[-0.06915189, -0.004755037, 0.054270275, 0.008..."
7,Equipment: Diabola | Down Time Issue: Diabola ...,"[0.0041433084, 0.003490047, 0.009966484, -0.02..."


In [None]:
%pip install pinecone-client


In [8]:
import pinecone
from pinecone import Pinecone, ServerlessSpec

In [9]:
pc = Pinecone(api_key='pcsk_698u7b_21nbwt2sJsicAo7gvHhbi8VxXF5GcqAdYp7g2Zy7gempstxYEarYC4zaPShG6Ke')

index_name = 'machine-repair-index'
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=384,
        metric='cosine',
        spec=ServerlessSpec(cloud='aws', region='us-east-1')  # Use 'gcp-starter' for free tier
    )

# Connect to the index
index = pc.Index(index_name)

print("Pinecone initialized and index connected successfully.")

Pinecone initialized and index connected successfully.


In [10]:
# Prepare the data for upsert
upsert_data = [
    (str(i), row['embeddings'], {'knowledge_entry': row['knowledge_entry']})
    for i, row in data_cleaned.iterrows()
]

index.upsert(vectors=upsert_data)

print(f"Successfully upserted {len(upsert_data)} embeddings into the Pinecone index.")


Successfully upserted 572 embeddings into the Pinecone index.


In [11]:
print(index.describe_index_stats())


{'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 572}},
 'total_vector_count': 572}


In [None]:
%pip install langchain llama-cpp-python

In [None]:
%pip install langchain-community

In [16]:
from langchain.llms import LlamaCpp

In [17]:
model_path = r"C:\Users\ASUS\Downloads\llama.cpp\scripts\llama-3.1-8b-instruct.gguf"

# Load the Llama model
llama = LlamaCpp(model_path=model_path)

print("Llama model loaded successfully.")

llama_model_loader: loaded meta data with 33 key-value pairs and 292 tensors from C:\Users\ASUS\Downloads\llama.cpp\scripts\llama-3.1-8b-instruct.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Llama 3.1 8B Instruct
llama_model_loader: - kv   3:                           general.finetune str              = Instruct
llama_model_loader: - kv   4:                           general.basename str              = Llama-3.1
llama_model_loader: - kv   5:                         general.size_label str              = 8B
llama_model_loader: - kv   6:                            general.license str              = llama3.1
llama_model_loa

Llama model loaded successfully.


CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | LLAMAFILE = 1 | OPENMP = 1 | AARCH64_REPACK = 1 | 
Model metadata: {'general.name': 'Llama 3.1 8B Instruct', 'general.architecture': 'llama', 'general.type': 'model', 'general.basename': 'Llama-3.1', 'general.finetune': 'Instruct', 'general.size_label': '8B', 'general.license': 'llama3.1', 'general.base_model.count': '1', 'general.base_model.0.name': 'Meta Llama 3.1 8B', 'general.base_model.0.organization': 'Meta Llama', 'general.base_model.0.repo_url': 'https://huggingface.co/meta-llama/Meta-Llama-3.1-8B', 'llama.block_count': '32', 'llama.context_length': '131072', 'llama.embedding_length': '4096', 'llama.feed_forward_length': '14336', 'llama.attention.head_count': '32', 'tokenizer.ggml.eos_token_id': '128009', 'general.file_type': '1', 'llama.attention.head_count_kv': '8', 'llama.rope.freq_base': '500000.000000', 'llama.attention.layer_norm_rms_epsilon': '0.000010', 'llama.vocab_size': '128256', 'llama.rope.dimen

In [None]:
pip install langchain-community langchain-huggingface langchain-pinecone

In [None]:
%pip install --upgrade langchain-community langchain-huggingface langchain-pinecone

In [54]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Pinecone
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.llms import LlamaCpp
from langchain_core.prompts import PromptTemplate
from langchain_core.documents import Document

In [None]:
embedding_model = HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2')

retriever = Pinecone(index=index, embedding=embedding_model, text_key='knowledge_entry')

model_path = r"C:\Users\ASUS\Downloads\llama.cpp\scripts\llama-3.1-8b-instruct.gguf"
llama = LlamaCpp(model_path=model_path)

prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template="""
    You are a helpful assistant. Use the following context to answer the question.

    Context: {context}

    Question: {question}
    """
)

combine_docs_chain = create_stuff_documents_chain(llm=llama, prompt=prompt_template)

def rag_pipeline(query):
    # Retrieve relevant documents
    docs = retriever.similarity_search(query)

    # Combine the retrieved documents and generate the response
    response = combine_docs_chain.invoke({
        "context": docs,  
        "question": query
    })

    return response

print("RAG pipeline created successfully.")

In [None]:
# Example user query
query = "How do I fix an oil leakage in the MTC gland?"

response = rag_pipeline(query)
print("Response:", response)

In [21]:
def retrieve_repair_suggestions(user_query, embedding_model, pinecone_index, top_k=3):
    # Generate embeddings for the user query
    user_query_embedding = embedding_model.encode(user_query).tolist()
    
    # Query Pinecone for the top-k closest matches
    search_results = pinecone_index.query(
        vector=user_query_embedding,
        top_k=top_k,
        include_metadata=True
    )
    
    # Extract and return the repair suggestions
    suggestions = [
        result['metadata']['knowledge_entry'] for result in search_results['matches']
    ]
    return suggestions


In [22]:
user_query = "Water leakage from a pump"
suggestions = retrieve_repair_suggestions(user_query, embedding_model, index)
print("Retrieved Suggestions:")
for i, suggestion in enumerate(suggestions, 1):
    print(f"{i}. {suggestion}")


Retrieved Suggestions:
1. Equipment: pump | Down Time Issue: pump 3 start up & casting time varriation | Issue: Casting Time Variation | Temporary Action Taken: Pump No-3 was connected & pressure set at 70kg/cm^2 (Casting Time: 25.8 to 26.4s), Individual Pump Pressure checked (All at 70kg/cm^2), No Internal Leakage in MTC. Electrical person checked Feedback System and automatically casting Time coming constant at 21.3s.
2. Equipment: Mould | Down Time Issue: water leakage from spt side | Issue: Spt water leakage due to Seal twist | Temporary Action Taken: Seal Changed
3. Equipment: Mould | Down Time Issue: water leakage from spt side | Issue: Spigot side water leakage due to seal damage | Temporary Action Taken: Seal Changed


In [14]:
def generate_response_with_llama(llama_model, user_query, suggestions):
    # Combine the suggestions into a single prompt
    context = "\n".join([f"Suggestion {i}: {s}" for i, s in enumerate(suggestions, 1)])
    prompt = (
        f"You are a repair expert. A user has reported the following issue:\n"
        f"{user_query}\n\n"
        f"Here are some relevant suggestions from the database:\n"
        f"{context}\n\n"
        f"Based on these suggestions, provide a detailed and user-friendly repair plan."
    )
    
    # Generate a response
    response = llama_model(prompt)
    return response


In [None]:
response = generate_response_with_llama(llama, user_query, suggestions)
print("Chatbot Response:")
print(response)


In [None]:
def chatbot_pipeline(user_query, embedding_model, pinecone_index, llama_model, top_k=3):
    # Step 1: Retrieve repair suggestions
    suggestions = retrieve_repair_suggestions(user_query, embedding_model, pinecone_index, top_k)
    
    # Step 2: Generate a refined response with LLaMA
    response = generate_response_with_llama(llama_model, user_query, suggestions)
    return response


In [None]:
user_query = "The machine is leaking oil from the gasket."
response = chatbot_pipeline(user_query, embedding_model, index, llama, top_k=3)
print("Final Chatbot Response:")
print(response)
