In [4]:
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

In [5]:
from google.colab import userdata
import os
os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')

model= ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [6]:
parser= StrOutputParser()

In [7]:
prompt1= PromptTemplate(
    input_variables=["name"],
    template="Tell me in details about the following \n {name}",
)

In [8]:
prompt2= PromptTemplate(
    input_variables=["text"],
    template="Tell me 5 most important points in engageable manner\n {text}",
)

In [9]:
chain= prompt1 | model | parser | prompt2 | model | parser

In [10]:
chain.invoke({"name":"transformers in ai"})

'1. Transformers are a game-changing type of deep learning model in the field of artificial intelligence, known for their efficiency in handling sequential data.\n\n2. Based on self-attention mechanism, transformers can weigh the importance of different input tokens, allowing them to capture long-range dependencies in data effectively.\n\n3. Unlike traditional RNNs, transformers can process input sequences in parallel, making them much faster and more efficient, especially with long sequences of data.\n\n4. Transformers consist of an encoder and a decoder, each made up of multiple layers of self-attention and feedforward neural networks, which are adjusted during training to minimize a loss function.\n\n5. Popular transformer-based models like BERT, GPT, and T5 have shown impressive results in various NLP tasks, showcasing the power and versatility of transformer architecture in AI advancements.'

In [13]:
# !pip install grandalf

In [14]:
chain.get_graph().print_ascii()

     +-------------+       
     | PromptInput |       
     +-------------+       
            *              
            *              
            *              
    +----------------+     
    | PromptTemplate |     
    +----------------+     
            *              
            *              
            *              
      +------------+       
      | ChatOpenAI |       
      +------------+       
            *              
            *              
            *              
   +-----------------+     
   | StrOutputParser |     
   +-----------------+     
            *              
            *              
            *              
+-----------------------+  
| StrOutputParserOutput |  
+-----------------------+  
            *              
            *              
            *              
    +----------------+     
    | PromptTemplate |     
    +----------------+     
            *              
            *              
            *       

## Parallel Project

In [15]:
model1= ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [16]:
model2= ChatOpenAI(model_name="gpt-4", temperature=0)

In [20]:
prompt3= PromptTemplate(
    input_variables=["para"],
    template="Generate a short summary, but cover all the important points from following \n {para}",
)

In [30]:
prompt4= PromptTemplate(
    input_variables=["para"],
    template="create a quiz having question and answers at least 4 from the following provided\n {para}"
)

In [31]:
prompt5= PromptTemplate(
    input_variables=["summary", "quiz"],
    template="combine the summary {summary}\n and quiz {quiz} in an engageable manner into single document\n"
)

In [23]:
sample_text="""Combine SMOTE with Edited Nearest Neighbor (ENN) using Python to balance your dataset

Motivation

There are many methods to overcome imbalanced datasets in classification modeling by oversampling the minority class or undersampling the majority class. To increase the model performance even further, many researchers suggest combining oversampling and undersampling methods to balance the dataset better.

In my previous article, I have already explained one of the combined oversampling and undersampling methods, named the SMOTE-Tomek Links method. This time, I will explain the other variation, by combining SMOTE and Edited Nearest Neighbor (ENN) method – or in short, SMOTE-ENN – and its implementation using Python.

The Concept: K-Nearest Neighbor (KNN)

The idea of KNN is to assume that the nearest neighbor of each data based on its distance is having a similar class. When the new observation in the dataset exists, KNN will search its K-nearest neighbor to determine the class that the new observation will belong to. Many distance metrics can be used to calculate each observation distance in KNN, but the most common one is by using Euclidean distance.

For example, suppose that the dataset is consists of two classes, black and white. Now, suppose that there is a new observation with an unknown class. By using KNN, if the majority of the new observation’s K-nearest neighbor belongs to the black class, then the new observation will belong to that black class and vice versa.

Given a dataset that consists of N observations, the algorithm of KNN can be explained as follows.

Determine K, as the number of nearest neighbors.
For each observation in the dataset, calculate the distance between each observation, then add the distance and the observation to an ordered set.
Sort the ordered set of distances and observations in ascending order based on the distances.
Pick the first K entries from the sorted ordered set. In other words, pick the K nearest neighbor of each observation.
Return the majority class from the selected K entries.
The Concept: Edited Nearest Neighbor (ENN)

Developed by Wilson (1972), the ENN method works by finding the K-nearest neighbor of each observation first, then check whether the majority class from the observation’s k-nearest neighbor is the same as the observation’s class or not. If the majority class of the observation’s K-nearest neighbor and the observation’s class is different, then the observation and its K-nearest neighbor are deleted from the dataset. In default, the number of nearest-neighbor used in ENN is K=3.

The algorithm of ENN can be explained as follows.

Given the dataset with N observations, determine K, as the number of nearest neighbors. If not determined, then K=3.
Find the K-nearest neighbor of the observation among the other observations in the dataset, then return the majority class from the K-nearest neighbor.
If the class of the observation and the majority class from the observation’s K-nearest neighbor is different, then the observation and its K-nearest neighbor are deleted from the dataset.
Repeat step 2 and 3 until the desired proportion of each class is fulfilled.
This method is more powerful than Tomek Links, where ENN removes the observation and its K-nearest neighbor when the class of the observation and the majority class from the observation’s K-nearest neighbor are different, instead of just removing observation and its 1-nearest neighbor that are having different classes. Thus, ENN can be expected to give more in-depth data cleaning than Tomek Links.

SMOTE-ENN Method

Developed by Batista et al (2004), this method combines the SMOTE ability to generate synthetic examples for minority class and ENN ability to delete some observations from both classes that are identified as having different class between the observation’s class and its K-nearest neighbor majority class. The process of SMOTE-ENN can be explained as follows.

(Start of SMOTE) Choose random data from the minority class.
Calculate the distance between the random data and its k nearest neighbors.
Multiply the difference with a random number between 0 and 1, then add the result to the minority class as a synthetic sample.
Repeat step number 2–3 until the desired proportion of minority class is met. (End of SMOTE)
(Start of ENN) Determine K, as the number of nearest neighbors. If not determined, then K=3.
Find the K-nearest neighbor of the observation among the other observations in the dataset, then return the majority class from the K-nearest neighbor.
If the class of the observation and the majority class from the observation’s K-nearest neighbor is different, then the observation and its K-nearest neighbor are deleted from the dataset.
Repeat step 2 and 3 until the desired proportion of each class is fulfilled. (End of ENN)
To understand more about this method in practice, here I will give some implementation of SMOTE-ENN in Python using imbalanced-learn library. For this article, the model that I will use is AdaBoost Classifier by using AdaBoostClassifier . And to evaluate our model, here I will use the Repeated Stratified K-fold Cross Validation method."""

In [32]:
from langchain_core.runnables import RunnableParallel
parallel_chain= RunnableParallel(
    summary= prompt3 | model1 | parser,
    quiz= prompt4 | model1 | parser,
)

In [33]:
seq_chain= prompt5 | model2 | parser

In [34]:
final_chain= parallel_chain | seq_chain

In [35]:
final_chain.invoke({"para":sample_text})

'The article emphasizes the significance of balancing imbalanced datasets in classification modeling, using a combination of oversampling and undersampling techniques. It introduces the SMOTE-ENN method, which merges SMOTE for creating synthetic examples for the minority class and ENN for eliminating observations with different classes from their K-nearest neighbors. The concepts of K-Nearest Neighbor (KNN) and Edited Nearest Neighbor (ENN) are thoroughly explained. The KNN concept assumes that the nearest neighbor of each data point, based on its distance, belongs to a similar class. On the other hand, ENN works by identifying the K-nearest neighbor of each observation and deleting them if they belong to different classes. The SMOTE-ENN method is detailed step by step, including its implementation in Python using the imbalanced-learn library. The AdaBoost Classifier is utilized as the model, and the Repeated Stratified K-fold Cross Validation method is used for evaluation.\n\nQuiz:\n1

In [28]:
final_chain.get_graph().print_ascii()

          +-----------------------------+            
          | Parallel<summary,quiz>Input |            
          +-----------------------------+            
                 **               **                 
              ***                   ***              
            **                         **            
+----------------+                +----------------+ 
| PromptTemplate |                | PromptTemplate | 
+----------------+                +----------------+ 
          *                               *          
          *                               *          
          *                               *          
  +------------+                    +------------+   
  | ChatOpenAI |                    | ChatOpenAI |   
  +------------+                    +------------+   
          *                               *          
          *                               *          
          *                               *          
+-----------------+         

## Conditional Chains

In [39]:
from langchain_core.output_parsers import JsonOutputParser
parser_json= JsonOutputParser()

In [57]:
prompt6= PromptTemplate(
    input_variables=["feedback"],
    template="""Analyze the sentiment of the following customer feedback {feedback}\n.
    Classify it as Positive, Negative, or Neutral. Then, provide a brief explanation highlighting the key phrases or words that led to your classification.
    Also, summaries it in short.
    give me in output in sentiment, explaination and summary key. {format_instruction}""",
    partial_variables={"format_instruction": parser_json.get_format_instructions()},
)

In [58]:
chain_sentiment= prompt6 | model1 | parser_json

In [59]:
chain_sentiment.invoke({"feedback":"This mobile should worth buying if more RAM is provided into it, but overall it is good product."})

{'sentiment': 'Neutral',
 'explanation': 'The customer feedback mentions that the mobile is a good product but suggests that it would be worth buying if it had more RAM. This indicates a neutral sentiment as the customer is overall satisfied with the product but sees room for improvement.',
 'summary': 'Satisfied with the product but suggests improvement with more RAM.'}

In [60]:
prompt7 = PromptTemplate(
    input_variables=["sentiment", "summary"],
    template="""Write an gentle, simple, appropriate and always thankful for the feeback kind response for the positive and neutral.\n {sentiment} and if possible try to add few points from summary.\n {summary} .""",
)

In [61]:
prompt8= PromptTemplate(
    input_variables=["sentiment", "summary"],
    template="""Write an thankful for the feeback and try to make it like that customer is very valuable and we will definately improve on the points mentioned in the negative sentiment{sentiment}\n. if possible to add few points from following summary {summary}.""",
)

In [64]:
from langchain_core.runnables import RunnableBranch, RunnableLambda

In [73]:
branch_chain= RunnableBranch(
    (lambda x: x["sentiment"] == "Positive", prompt7 | model1 | parser),
    (lambda x: x["sentiment"] == "Negative", prompt8 | model1 | parser),
    (lambda x: x["sentiment"] == "Neutral", prompt7 | model1 | parser),
    # Add a default branch to handle other cases
    RunnableLambda(lambda x: "Unable to classify sentiment.")
)

In [74]:
chain_final= chain_sentiment | branch_chain

In [76]:
chain_final.invoke({"feedback":"This is good product i ever seen."})

'Thank you so much for your kind words and positive feedback! We are thrilled to hear that you think our product is the best you have ever seen. Your satisfaction is our top priority, and we are grateful for your support. We will continue to strive for excellence and provide you with the best products and service possible. Thank you again for taking the time to share your experience with us!'

In [77]:
chain_final.get_graph().print_ascii()

  +-------------+    
  | PromptInput |    
  +-------------+    
          *          
          *          
          *          
 +----------------+  
 | PromptTemplate |  
 +----------------+  
          *          
          *          
          *          
   +------------+    
   | ChatOpenAI |    
   +------------+    
          *          
          *          
          *          
+------------------+ 
| JsonOutputParser | 
+------------------+ 
          *          
          *          
          *          
     +--------+      
     | Branch |      
     +--------+      
          *          
          *          
          *          
  +--------------+   
  | BranchOutput |   
  +--------------+   
