Common NLP Task

In [2]:
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize

text = "Natural Language Processing is fascinating."
tokens = word_tokenize(text)
print(tokens)

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


['Natural', 'Language', 'Processing', 'is', 'fascinating', '.']


In [3]:
from nltk.stem import PorterStemmer

stemmer = PorterStemmer()
words = ["running", "ran", "runs"]
stems = [stemmer.stem(word) for word in words]
print(stems)

['run', 'ran', 'run']


In [4]:
from nltk.stem import WordNetLemmatizer
nltk.download('wordnet')

lemmatizer = WordNetLemmatizer()
words = ["running", "ran", "runs"]
lemmas = [lemmatizer.lemmatize(word, pos='v') for word in words]
print(lemmas)

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


['run', 'run', 'run']


In [5]:
from nltk.corpus import stopwords
nltk.download('stopwords')

stop_words = set(stopwords.words('english'))
filtered_text = [word for word in tokens if word.lower() not in stop_words]
print(filtered_text)

['Natural', 'Language', 'Processing', 'fascinating', '.']


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Designing and Training a simple Chatbot

In [6]:
#-----------------******  Data Preprocessing  *******-------------------
import pandas as pd

# Load the dataset
data = pd.read_csv("C:/Users/hp/Downloads/My_Work/chatbot_dataset.csv")

# Preprocess the data
nltk.download('punkt')
data['Question'] = data['Question'].apply(lambda x: ' '.join(nltk.word_tokenize(x.lower())))
print(data.head())

                                      Question  \
0                   introduction to the course   
1  overview of data science and its importance   
2    introduction to the data science workflow   
3         key skills and tools in data science   
4          where can i find my course videos ?   

                                              Answer  
0  Welcome to the data science course. Here you w...  
1  Data science is crucial for making informed de...  
2  The data science workflow includes data collec...  
3  Important skills include programming, statisti...  
4  You can find all your course videos on the Cip...  


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\hp\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [7]:
#-------------------****** Data Vectorization ******----------------------

from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(data['Question'])
print(X.shape)

(48, 112)


In [8]:
#------------------****** Training a Text Classification Model ********------------------

from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(data['Question'], data['Answer'], test_size=0.2, random_state=42)

# Create a model pipeline
model = make_pipeline(TfidfVectorizer(), MultinomialNB())

# Train the model
model.fit(X_train, y_train)
print("Model training complete.")

Model training complete.


In [9]:
#-------------******* Implementing a Function to Get Chatbot Responses ******-------------
# Function to get a response from the chatbot
def get_response(question):
    question = ' '.join(nltk.word_tokenize(question.lower()))
    answer = model.predict([question])[0]
    return answer

# Testing the function
print(get_response("What is NLP?"))

Seaborn is a Python visualization library based on Matplotlib that provides a high-level interface for drawing attractive statistical graphics. This is covered in the Introduction to Matplotlib and Seaborn module.


Initializing the Dash

In [10]:
pip install dash

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [11]:
import dash

# Initialize the Dash app
app = dash.Dash(__name__)

In [12]:
#-----------------******* Define the Layout ******---------------
from dash import dcc, html

app.layout = html.Div([
    html.H1("My Dash App", style={'textAlign': 'center'}),
    dcc.Input(id='input-box', type='text', value='Type something...'),
    html.Button('Submit', id='button'),
    html.Div(id='output-div')
])

In [13]:
#----------------******* Callback to Update Output ******-------------
from dash.dependencies import Input, Output

@app.callback(
    Output('output-div', 'children'),
    Input('button', 'n_clicks'),
    [dash.dependencies.State('input-box', 'value')]
)
def update_output(n_clicks, value):
    if n_clicks is not None:
        return f'You have entered: {value}'
    return ''


Running The App

In [14]:
if __name__ == '__main__':
    app.run_server(debug=True)