In [None]:
!pip install simplenlg

In [None]:
import simplenlg as slg

In [None]:
from simplenlg import NLGFactory, Realiser
from simplenlg.lexicon import Lexicon
from simplenlg.phrasespec import SPhraseSpec
from simplenlg.features import Feature, Tense, NumberAgreement, Form
import matplotlib.pyplot as plt

# Sample data for the migration report
source = "Database A"
target = "Database B"
issues = ["missing data", "data inconsistency"]
errors = ["server connection error", "data format error"]
status = "completed"
pie_chart_data = [50, 25, 25]
histogram_data = [1, 2, 2, 3, 3, 3, 4, 4, 5, 5]

# Initialize the SimpleNLG components
lexicon = Lexicon.getDefaultLexicon()
nlg_factory = NLGFactory(lexicon)
realiser = Realiser(lexicon)

# Generate the report text using SimpleNLG
report = nlg_factory.createClause()
report.setSubject("migration")
report.setVerb("be")
report.setObject("complete")
report.setFeature(Feature.TENSE, Tense.PAST)

source_phrase = nlg_factory.createNounPhrase("the", "source")
source_phrase.setFeature(Feature.NUMBER, NumberAgreement.PLURAL)
source_phrase.setPlural(True)

target_phrase = nlg_factory.createNounPhrase("the", "target")
target_phrase.setFeature(Feature.NUMBER, NumberAgreement.PLURAL)
target_phrase.setPlural(True)

# report.addComplement("from", source_phrase)
# report.addComplement("to", target_phrase)

report.addComplement(source_phrase)
report.addComplement(target_phrase)

if len(issues) > 0:
    issues_phrase = nlg_factory.createNounPhrase("the", "issue")
    issues_phrase.setFeature(Feature.NUMBER, NumberAgreement.PLURAL)
    issues_phrase.setPlural(True)
    issues_list = nlg_factory.createCoordinatedPhrase()
    for issue in issues:
        issue_phrase = nlg_factory.createAdjectivePhrase(issue)
        issues_list.addCoordinate(issue_phrase)
    issues_list.setFeature(Feature.CONJUNCTION, "and")
#     report.addComplement("with", issues_phrase)
#     report.addComplement("such as", issues_list)
    report.addComplement(issues_phrase)
    report.addComplement(issues_list)

if len(errors) > 0:
    errors_phrase = nlg_factory.createNounPhrase("the", "error")
    errors_phrase.setFeature(Feature.NUMBER, NumberAgreement.PLURAL)
    errors_phrase.setPlural(True)
    errors_list = nlg_factory.createCoordinatedPhrase()
    for error in errors:
        error_phrase = nlg_factory.createAdjectivePhrase(error)
        errors_list.addCoordinate(error_phrase)
    errors_list.setFeature(Feature.CONJUNCTION, "and")
#     report.addComplement("due to", errors_phrase)
#     report.addComplement("like", errors_list)
    report.addComplement(errors_phrase)
    report.addComplement(errors_list)

if status == "completed":
    report.addComplement("successfully")

report_text = realiser.realiseSentence(report)

# Generate the pie chart using Matplotlib
labels = ['Data Transferred', 'Data Skipped', 'Data Errors']
sizes = pie_chart_data
fig1, ax1 = plt.subplots()
ax1.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90)
ax1.axis('equal')
plt.title('Migration Statistics: {}'.format(report_text))
plt.show()

# Generate the histogram using Matplotlib
plt.hist(histogram_data, bins=[1, 2, 3, 4, 5, 6])
#plt.title('Data Distribution


In [13]:
!pip3 install rosaenlg

ERROR: Could not find a version that satisfies the requirement rosaenlg (from versions: none)
ERROR: No matching distribution found for rosaenlg


In [None]:
import pandas as pd
import numpy as np
from rosaenlg import NLGEngine

# Define NLG templates
templates = {
    "report": "Here is the migration report for {source} to {target}. {errors} errors were encountered during the migration process. {status} status was achieved. Here are the details:\n\n{details}",
    "pie_chart": "The following pie chart shows the distribution of errors:\n{chart}",
    "histogram": "The following histogram shows the frequency of errors:\n{chart}",
    "line_chart": "The following line chart shows the status of the migration over time:\n{chart}"
}

# Define the report data
source = "System A"
target = "System B"
errors = 10
status = "Successful"

data = {
    "source": source,
    "target": target,
    "errors": errors,
    "status": status,
    "details": pd.DataFrame({
        "Issue": ["Issue 1", "Issue 2", "Issue 3"],
        "Count": [20, 30, 10]
    })
}

# Generate the charts
pie_chart = pd.DataFrame({
    "Error Type": ["Type 1", "Type 2", "Type 3"],
    "Count": [10, 20, 30]
})
histogram = pd.DataFrame({
    "Error Count": np.random.normal(50, 10, 100)
})
line_chart = pd.DataFrame({
    "Date": pd.date_range("2022-01-01", "2022-01-31"),
    "Status": ["Successful"]*31
})

# Generate the report using NLG
engine = NLGEngine()
report = engine.generate(templates["report"], data)
pie_chart = engine.generate(templates["pie_chart"], {"chart": pie_chart.to_html()})
histogram = engine.generate(templates["histogram"], {"chart": histogram.to_html()})
line_chart = engine.generate(templates["line_chart"], {"chart": line_chart.to_html()})

# Print the report
print(report)
print(pie_chart)
print(histogram)
print(line_chart)


In [None]:
import pandas as pd
from simplenlg import SPhraseSpec, NLGFactory, Realiser , StringElement
from simplenlg.lexicon import Lexicon
from simplenlg.features import Feature, Tense, NumberAgreement

# load the migration data
migration_data = pd.read_csv('sample_migration_data.csv')

# extract relevant statistics from the data
source = migration_data['source'].value_counts().index[0]
target = migration_data['target'].value_counts().index[0]
num_issues = str(migration_data['issues'].sum())
num_errors = str(migration_data['errors'].sum())
status = 'successful' if migration_data['status'].value_counts().index[0] == 'success' else 'unsuccessful'

# num_issues = StringElement(num_issues)

# print(num_issues)
# print(num_errors)
# print(type(num_issues))

# initialize the SimpleNLG components
lexicon = Lexicon.getDefaultLexicon()
factory = NLGFactory(lexicon)
realiser = Realiser(lexicon)

# create sentence templates for the report
source_template = factory.createClause()
source_template.setSubject(factory.createNounPhrase('source'))
source_template.setVerb('be')
source_template.setObject(source)

target_template = factory.createClause()
target_template.setSubject(factory.createNounPhrase('target'))
target_template.setVerb('be')
target_template.setObject(target)

issues_template = factory.createClause()
issues_template.setSubject(factory.createNounPhrase('there'))
issues_template.setVerb('be')
#num_issues = num_issues.astype(int)
issues_template.setObject(num_issues)
issues_template.setFeature(Feature.NUMBER, NumberAgreement.PLURAL)

errors_template = factory.createClause()
errors_template.setSubject(factory.createNounPhrase('there'))
errors_template.setVerb('be')
errors_template.setObject(num_errors)
errors_template.setFeature(Feature.NUMBER, NumberAgreement.PLURAL)

status_template = factory.createClause()
status_template.setSubject(factory.createNounPhrase('migration'))
status_template.setVerb('be')
status_template.setObject(status)

# realize the sentences
source_sentence = realiser.realiseSentence(source_template)
target_sentence = realiser.realiseSentence(target_template)
issues_sentence = realiser.realiseSentence(issues_template)
errors_sentence = realiser.realiseSentence(errors_template)
status_sentence = realiser.realiseSentence(status_template)

# print(source_sentence)
# print(errors_sentence)
# print(issues_sentence)

# combine the sentences into a report
report = f"In the migration report, the {source_sentence} and the {target_sentence}. " \
         f"There were {issues_sentence} and {errors_sentence}. " \
         f"The migration was {status_sentence}."

# print the report
print(report)


In [None]:
import pandas as pd
from simplenlg import NLGFactory, Realiser
from simplenlg.phrasespec import SPhraseSpec
from simplenlg.lexicon import Lexicon
from simplenlg.features import Feature, Tense, NumberAgreement

# load data from CSV file
data = pd.read_csv('sample_migration_data.csv')

# get statistics from the data
num_source = len(data['source'].unique())
num_target = len(data['target'].unique())
num_issues = data['issues'].sum()
num_errors = data['errors'].sum()

# create the sentence templates
templates = {
    'intro': 'Migration report for {} project.',
    'source': '{} unique source systems were migrated.',
    'target': '{} unique target systems were created.',
    'issues': 'There were a total of {} issues encountered during the migration.',
    'errors': 'There were a total of {} errors encountered during the migration.',
}

# create the NLG lexicon, factory and realiser
lexicon = Lexicon.getDefaultLexicon()
factory = NLGFactory(lexicon)
realiser = Realiser(lexicon)

# create the sentence objects and fill in the data
intro = factory.createSentence(templates['intro'].format(data['project'][0]))
source = factory.createSentence(templates['source'].format(num_source))
target = factory.createSentence(templates['target'].format(num_target))
issues = factory.createSentence(templates['issues'].format(num_issues))
errors = factory.createSentence(templates['errors'].format(num_errors))

# set plural number agreement for the relevant phrases
source.setFeature(Feature.NUMBER, NumberAgreement.PLURAL)
target.setFeature(Feature.NUMBER, NumberAgreement.PLURAL)
issues.setFeature(Feature.NUMBER, NumberAgreement.PLURAL)
errors.setFeature(Feature.NUMBER, NumberAgreement.PLURAL)

# create the final report text by concatenating the sentences
report_text = realiser.realiseSentence(intro) + ' ' + \
              realiser.realiseSentence(source) + ' ' + \
              realiser.realiseSentence(target) + ' ' + \
              realiser.realiseSentence(issues) + ' ' + \
              realiser.realiseSentence(errors)

# print the final report text
print(report_text)


In [None]:
!pip install --upgrade mxnet gluonnlp

In [None]:
import pandas as pd
from simplenlg import SPhraseSpec, NLGFactory, Realiser
from simplenlg.lexicon import Lexicon
from simplenlg.features import Feature, Tense, NumberAgreement

import pandas as pd
import numpy as np
import random

# Load sales data
sales_data = pd.read_csv("sample_sales_NLG.csv")

# Get total sales revenue
total_revenue = sales_data['revenue'].sum()

# Get average revenue per sale
avg_revenue_per_sale = sales_data['revenue'].mean()

# Get total sales quantity
total_quantity = sales_data['sales_quantity'].sum()

# Get number of regions
num_regions = len(sales_data['region'].unique())

# Get number of products
num_products = len(sales_data['product'].unique())

# Generate report using SimpleNLG
lexicon = Lexicon.getDefaultLexicon()
factory = NLGFactory(lexicon)
realiser = Realiser(lexicon)

# Create sentence templates
revenue_template = factory.createClause()
revenue_template.setSubject("The total revenue")
revenue_template.setVerb("be")
revenue_template.setObject(str(total_revenue))

avg_revenue_template = factory.createClause()
avg_revenue_template.setSubject("The average revenue per sale")
avg_revenue_template.setVerb("be")
avg_revenue_template.setObject(str(avg_revenue_per_sale))

quantity_template = factory.createClause()
quantity_template.setSubject("The total sales quantity")
quantity_template.setVerb("be")
quantity_template.setObject(str(total_quantity))

region_template = factory.createClause()
region_template.setSubject("The number of regions")
region_template.setVerb("be")
region_template.setObject(str(num_regions))
region_template.setFeature(Feature.NUMBER, NumberAgreement.PLURAL)

product_template = factory.createClause()
product_template.setSubject("The number of products")
product_template.setVerb("be")
product_template.setObject(str(num_products))
product_template.setFeature(Feature.NUMBER, NumberAgreement.PLURAL)

# Randomize sentence order
sentences = [revenue_template, avg_revenue_template, quantity_template, region_template, product_template]
random.shuffle(sentences)

# Realize sentences
report = ""
for sentence in sentences:
    report += realiser.realiseSentence(sentence) + " "

# Print report
print(report)


In [None]:
!pip install textblob

In [None]:
from textblob import TextBlob

# Define variables with report data
source_system = 'System A'
target_system = 'System B'
num_issues = 10
num_errors = 3
num_records = 1000
num_successful_records = 950
num_error_records = 50
issues_data = [('Data Validation', 5), ('Data Mapping', 3), ('Data Formatting', 2)]
status_data = [('Successful', 90), ('Errors', 10)]
errors_data = [('Validation Error', 2), ('Mapping Error', 1)]

# Generate NLG description
blob = TextBlob(f"The migration from {source_system} to {target_system} has been completed. ")
blob += TextBlob(f"During the migration, {num_issues} issues were identified, with {num_errors} errors encountered. ")
blob += TextBlob(f"{num_successful_records} out of {num_records} records were migrated successfully. ")
blob += TextBlob(f"The majority of issues were found in the following areas: ")
for issue in issues_data:
    blob += TextBlob(f"{issue[0]} ({issue[1]}), ")
blob += TextBlob(f"The current status of the migration can be seen in the following chart: ")
for status in status_data:
    blob += TextBlob(f"{status[0]} ({status[1]}%), ")
blob += TextBlob(f"The following chart shows the types of errors encountered during migration: ")
for error in errors_data:
    blob += TextBlob(f"{error[0]} ({error[1]}), ")
blob += TextBlob(f"Overall, while there were some issues encountered during migration, the majority of the data was successfully migrated to the target system.")

# Print NLG description
print(str(blob))


### Yedalog

In [2]:
!pip install yedalog

ERROR: Could not find a version that satisfies the requirement yedalog (from versions: none)
ERROR: No matching distribution found for yedalog


### N2G

In [5]:
!pip install N2G

Collecting N2G
  Downloading N2G-0.3.3-py3-none-any.whl (481 kB)
     -------------------------------------- 481.2/481.2 kB 4.3 MB/s eta 0:00:00
Installing collected packages: N2G
Successfully installed N2G-0.3.3




In [8]:
import N2G # For creating yed_diagrams >> https://n2g.readthedocs.io/en/latest/diagram_plugins/yEd%20Module.html#quick-start

#from N2G import

###  PYNLG - pynlg is a pure python re-implementation of SimpleNLG-EnFr, a java library enabling french and english text surface realisation, based on SimpleNLG.  >> Under MIT licence

In [10]:
!pip install pynlg

Collecting pynlg
  Downloading pynlg-0.1.1.tar.gz (49 kB)
     ---------------------------------------- 49.2/49.2 kB 1.2 MB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: pynlg
  Building wheel for pynlg (setup.py): started
  Building wheel for pynlg (setup.py): finished with status 'done'
  Created wheel for pynlg: filename=pynlg-0.1.1-py3-none-any.whl size=64348 sha256=3085e8f66db2b7e11818e7f721e74f1499886dedf7db99781eada60b599dc3f2
  Stored in directory: c:\users\prateek.kumar\appdata\local\pip\cache\wheels\b7\ba\10\026be11f6003f0ecf23f67598d6bfce6c67d04be0ee28f1144
Successfully built pynlg
Installing collected packages: pynlg
Successfully installed pynlg-0.1.1




### Gramex + NLG - https://github.com/gramener/gramex-nlg >> Good interface to create UI with NLG capabilities

### Check this one out - https://github.com/roman-kutlak/nlglib 

### Build a Natural Language Generation (NLG) System using PyTorch - https://www.analyticsvidhya.com/blog/2020/08/build-a-natural-language-generation-nlg-system-using-pytorch/

### RosaeNLG

In [12]:
# from rosaenlg import NLGEngine

# # Define a template
# template = {
#     "metadata": {
#         "language": "en",
#         "name": "My template"
#     },
#     "output": "{{noun_phrase}} are {{adjective}}."
# }

# # Create an instance of the NLG engine
# nlg = NLGEngine(debug=True)

# # Generate text using the template and input data
# input_data = {"noun_phrase": "Apples", "adjective": "delicious"}
# output = nlg.generate(template, input_data)

# # Print the generated text
# print(output['text'])


ModuleNotFoundError: No module named 'rosaenlg'

### SimpleNLG - A Java-based NLG library that has been ported to Python. It provides a simple and flexible way to generate natural language text from structured data. It is released under the Apache 2.0 license. (GitHub: https://github.com/simplenlg/simplenlg)


> SimpleNLG - https://github.com/simplenlg/simplenlg

> NLTK - https://github.com/nltk/nltk

> TextBlob - https://github.com/sloria/TextBlob 

> GPT-3 API Wrapper - https://github.com/huggingface/huggingface_hub  >> https://huggingface.co/datasets/web_nlg

> PyNLG - https://github.com/artem-zamuda/PyNLG

> EasyNLG - https://github.com/ThatcherC/PyEasyNLG

> Athena NLG - https://github.com/rgalhama/athena-nlg

> Ananse - https://github.com/robinvanemden/Ananse

> GenerationUtils - https://github.com/ucfnlp/generationutils

> NLG-Eval - https://github.com/Maluuba/nlg-eval

> PyTorch-NLG - https://github.com/omarsar/nlg

> GLTR - https://github.com/HendrikStrobelt/detecting-fake-text

> Yedalog - https://github.com/DeepLearnXMU/Yedalog

> N2G - https://github.com/parulsethi/N2G

> PyTextGenerator - https://github.com/vineetjohn/PyTextGenerator

# Transformers - Huggingface along with PyTorch

In [2]:
import os
os.environ["CURL_CA_BUNDLE"]=""

In [1]:
!pip install transformers



Collecting transformers
  Downloading transformers-4.27.4-py3-none-any.whl (6.8 MB)
     ---------------------------------------- 6.8/6.8 MB 7.7 MB/s eta 0:00:00
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.3-cp39-cp39-win_amd64.whl (3.5 MB)
     ---------------------------------------- 3.5/3.5 MB 9.6 MB/s eta 0:00:00
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.13.3-py3-none-any.whl (199 kB)
     ------------------------------------- 199.8/199.8 kB 11.8 MB/s eta 0:00:00
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.13.3 tokenizers-0.13.3 transformers-4.27.4




In [6]:
from transformers import pipeline

# Load the text generation pipeline
generator = pipeline('text-generation',model='gpt2')

# Generate text
text = generator("Hello, how are you?", max_length=100, num_return_sequences=1)[0]['generated_text']

# Print the generated text
print(text)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Hello, how are you?

No, not quite yet. He may be a big name in the band. He just seems like a good guy. He's got some very good stories.

"I want to take a chance and make myself known in this world and try to bring back a true friendship back from a very different place."

Cork: C'mon! (Laughs)

Snoop Dogg: Oh yeah.

Cork: What


In [16]:
import ssl, urllib

# ssl._create_default_https_context = ssl._create_unverified_context

#ssl.SSLContext.verify_mode = ssl.VerifyMode.CERT_OPTIONAL

urllink = 'http://huggingface.co'

urllib.request.urlopen(urllink)

<http.client.HTTPResponse at 0x126cbf86550>

In [None]:
#export PYTHONHTTPSVERIFY=0

# Using Pytorch

In [19]:
!pip install torch transformers





In [7]:
import torch
from transformers import pipeline, GPT2Tokenizer, GPT2LMHeadModel

In [2]:
import os
os.environ["CURL_CA_BUNDLE"]=""

In [8]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')



In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
import nltk
nltk.download('punkt')

# Define some technical data
data = {"CPU": "Intel Core i7-10700K", "GPU": "Nvidia RTX 3080", "RAM": "32 GB DDR4", "Storage": "1 TB SSD"}

# Define the NLG model architecture
class NLGModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NLGModel, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.linear2 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()

    def forward(self, x):
        out = self.linear1(x)
        out = self.relu(out)
        out = self.linear2(out)
        return out

# Define the training data
inputs = torch.tensor([list(data.values())])
targets =torch.tensor([nltk.sent_tokenize("This computer has a {0}, a {1}, {2} of RAM, and {3} of storage.".format(*list(data.values())))])

# Train the NLG model
model = NLGModel(len(data), 128, len(targets[0]))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
for epoch in range(1000):
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs, targets[0])
    loss.backward()
    optimizer.step()

# Generate natural language text from the technical data
model.eval()
with torch.no_grad():
    inputs = torch.tensor([list(data.values())])
    outputs = model(inputs)
    predicted_sentence = " ".join([nltk.word_tokenize(targets[0][i])[torch.argmax(outputs[0][i]).item()] for i in range(len(targets[0]))])
    print(predicted_sentence)


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\prateek.kumar\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


ValueError: too many dimensions 'str'

In [24]:
data = {"CPU": "Intel Core i7-10700K", "GPU": "Nvidia RTX 3080", "RAM": "32 GB DDR4", "Storage": "1 TB SSD"}

In [10]:
import torch
import torch.nn as nn

# Define the data for the template
data = {
    'title': 'Sample Technical Report',
    'date': 'June 1, 2022',
    'author': 'John Doe',
    'summary': 'This report outlines the results of our analysis of the XYZ system.',
    'system_name': 'XYZ',
    'system_version': '1.0',
    'component_name': 'Component A',
    'component_version': '2.0',
    'metric_name': 'Throughput',
    'metric_value': '10',
    'metric_unit': 'requests per second'
}

# Define the template for the report
template = """
{{title}}
{{date}}

Author: {{author}}

Summary: {{summary}}

System Name: {{system_name}}
System Version: {{system_version}}

Component Name: {{component_name}}
Component Version: {{component_version}}

{{metric_name}}: {{metric_value}} {{metric_unit}}
"""

# Tokenize the template
tokens = template.split()

# Create a dictionary that maps token strings to their respective indices
token_to_index = {token: i for i, token in enumerate(tokens)}

# Define the model
class NLGModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NLGModel, self).__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.linear2 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()

    def forward(self, x):
        hidden = self.relu(self.linear1(x))
        output = self.linear2(hidden)
        return output

# Define the model parameters
input_size = len(data)
hidden_size = 128
output_size = len(tokens)

# Create an instance of the model
model = NLGModel(input_size, hidden_size, output_size)

# Load the model state dict
model_state_dict = torch.load('model.pth')

# Update the model state dict to include the missing keys
for key in data:
    index = token_to_index[key]
    model_state_dict['linear1.weight'][index] = torch.zeros(hidden_size)
    model_state_dict['linear1.bias'][index] = float(data[key])
    
# Set the model state dict
model.load_state_dict(model_state_dict)

# Evaluate the model on the input data
inputs = torch.tensor([list(data.values())], dtype=torch.float32)
outputs = model(inputs).squeeze()
predicted_indices = torch.argmax(outputs, dim=-1)
predicted_tokens = [tokens[i] for i in predicted_indices]

# Generate the report text
report = ' '.join(predicted_tokens)
print(report)


FileNotFoundError: [Errno 2] No such file or directory: 'model.pth'

In [25]:
import pandas as pd
import numpy as np

# Generate example data
np.random.seed(1234)
n = 100
x1 = np.random.randint(10, 50, n)
x2 = np.random.randint(100, 500, n)
x3 = np.random.rand(n) * 100
y = np.random.rand(n) * 1000

# Create dataframe
data = pd.DataFrame({'x1': x1, 'x2': x2, 'x3': x3, 'y': y})

# Save as CSV file
data.to_csv('data.csv', index=False)

In [35]:
import simplenlg
from simplenlg import Lexicon
from simplenlg import NLGFactory
from simplenlg import Realiser
from simplenlg.phrasespec import NPPhraseSpec, VPPhraseSpec, PPPhraseSpec
from simplenlg.framework import CoordinatedPhraseElement, DocumentElement

lexicon = Lexicon.getDefaultLexicon()
nlgFactory = NLGFactory(lexicon)
realiser = Realiser(lexicon)

# function to generate sentence for each row of data
def generate_sentence(row):
    # create phrases for subject, verb, and object
    subject = nlgFactory.createNounPhrase("the", row["product"])
    verb = nlgFactory.createVerbPhrase("costs")
    object = nlgFactory.createNounPhrase(str(row["total_price"]))   #("a", "total price", "of", str(row["total_price"]))
    
    # combine phrases into a sentence
    sentence = nlgFactory.createClause(subject, verb, object)
    return realiser.realiseSentence(sentence)

In [36]:
df = pd.read_csv('data1.csv')
#df

# apply function to each row of data
for index, row in df.iterrows():
    print(generate_sentence(row))

The A costs 822.996255498504.
The B costs 849.314389071416.
The C costs 685.7963590567286.
The D costs 34.33941153743347.
The E costs 53.50064942514465.
The F costs 84.52257182935907.
The G costs 816.3542272857691.
The H costs 914.4270471198862.


In [40]:
from textblob import TextBlob

# function to generate sentence for each row of data
def generate_sentence(row):
    sentence = TextBlob(f"The {row['product']} costs a total price of {row['total_price']}.")
    return sentence #.capitalize()

# apply function to each row of data
for index, row in df.iterrows():
    print(generate_sentence(row))

The A costs a total price of 822.996255498504.
The B costs a total price of 849.314389071416.
The C costs a total price of 685.7963590567286.
The D costs a total price of 34.33941153743347.
The E costs a total price of 53.50064942514465.
The F costs a total price of 84.52257182935907.
The G costs a total price of 816.3542272857691.
The H costs a total price of 914.4270471198862.


# Web Scraping

In [12]:
# import required modules
import requests
 
# get URL
page = requests.get("https://en.wikipedia.org/wiki/Main_Page")
 
# display status code
print(page.status_code)
 
# display scraped data
print(page.content)

200
b'<!DOCTYPE html>\n<html class="client-nojs vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-language-alert-in-sidebar-enabled vector-feature-sticky-header-disabled vector-feature-page-tools-enabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-enabled vector-feature-main-menu-pinned-disabled vector-feature-limited-width-enabled vector-feature-limited-width-content-enabled" lang="en" dir="ltr">\n<head>\n<meta charset="UTF-8"/>\n<title>Wikipedia, the free encyclopedia</title>\n<script>document.documentElement.className="client-js vector-feature-language-in-header-enabled vector-feature-language-in-main-page-header-disabled vector-feature-language-alert-in-sidebar-enabled vector-feature-sticky-header-disabled vector-feature-page-tools-enabled vector-feature-page-tools-pinned-disabled vector-feature-toc-pinned-enabled vector-feature-main-menu-pinned-disabled vector-feature-limited-width-enabled vector-f



In [3]:
!pip install fastai

Collecting fastai

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
darts 0.23.1 requires requests>=2.22.0, but you have requests 2.18.4 which is incompatible.



  Downloading fastai-2.7.11-py3-none-any.whl (232 kB)
     -------------------------------------- 232.8/232.8 kB 1.8 MB/s eta 0:00:00
Collecting fastcore<1.6,>=1.4.5
  Downloading fastcore-1.5.28-py3-none-any.whl (67 kB)
     ---------------------------------------- 67.6/67.6 kB 1.8 MB/s eta 0:00:00
Collecting fastdownload<2,>=0.0.5
  Downloading fastdownload-0.0.7-py3-none-any.whl (12 kB)
Collecting torchvision>=0.8.2
  Downloading torchvision-0.15.1-cp39-cp39-win_amd64.whl (1.2 MB)
     ---------------------------------------- 1.2/1.2 MB 3.4 MB/s eta 0:00:00
Collecting fastprogress>=0.2.4
  Downloading fastprogress-1.0.3-py3-none-any.whl (12 kB)
Collecting spacy<4
  Downloading spacy-3.5.1-cp39-cp39-win_amd64.whl (12.2 MB)
     --------------------------------------- 12.2/12.2 MB 13.1 MB/s eta 0:00:00
Collecting catalogue<2.1.0,>=2.0.6
  Downloading catalogue-2.0.8-py3-none-any.whl (17 kB)
Collecting smart-open<7.0.0,>=5.2.1
  Downloading smart_open-6.3.0-py3-none-any.whl (56 kB)
  

In [14]:
import requests
import urllib
from bs4 import BeautifulSoup
import pandas as pd

# URL to scrape
url = 'https://en.wikipedia.org/wiki/Indian_Premier_League#Teams'

# Send a GET request to the URL
response = requests.get(url)

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')

#Extract data and store in list of dictionaries
data_list = []
for item in soup.find_all('div', class_='item'):
    data_dict = {}
    data_dict['title'] = item.find('h2', class_='title').text.strip()
    data_dict['description'] = item.find('p', class_='description').text.strip()
    data_dict['price'] = item.find('span', class_='price').text.strip()
    data_list.append(data_dict)

# Create Pandas DataFrame from list of dictionaries
df = pd.DataFrame(data_list)

# Find all <a> tags and print their text and href attributes
for link in soup.find_all('a'):
    print(link.text, link.get('href'))




Jump to content #bodyContent
Main page /wiki/Main_Page
Contents /wiki/Wikipedia:Contents
Current events /wiki/Portal:Current_events
Random article /wiki/Special:Random
About Wikipedia /wiki/Wikipedia:About
Contact us //en.wikipedia.org/wiki/Wikipedia:Contact_us
Donate https://donate.wikimedia.org/wiki/Special:FundraiserRedirector?utm_source=donate&utm_medium=sidebar&utm_campaign=C13_en.wikipedia.org&uselang=en
Help /wiki/Help:Contents
Learn to edit /wiki/Help:Introduction
Community portal /wiki/Wikipedia:Community_portal
Recent changes /wiki/Special:RecentChanges
Upload file /wiki/Wikipedia:File_upload_wizard






 /wiki/Main_Page

Search
 /wiki/Special:Search
Create account /w/index.php?title=Special:CreateAccount&returnto=Indian+Premier+League
Log in /w/index.php?title=Special:UserLogin&returnto=Indian+Premier+League
 Create account /w/index.php?title=Special:CreateAccount&returnto=Indian+Premier+League
 Log in /w/index.php?title=Special:UserLogin&returnto=Indian+Premier+League
lear

Deccan Chargers /wiki/Deccan_Chargers
Pune Warriors India /wiki/Pune_Warriors_India
Kochi Tuskers Kerala /wiki/Kochi_Tuskers_Kerala
Gujarat Lions /wiki/Gujarat_Lions
Rising Pune Supergiant /wiki/Rising_Pune_Supergiant
Indian Premier League None
Indian Premier League None
AB de Villiers /wiki/AB_de_Villiers
Virat Kohli /wiki/Virat_Kohli
Gujarat Lions /wiki/Gujarat_Lions
AB de Villiers /wiki/AB_de_Villiers
Virat Kohli /wiki/Virat_Kohli
Mumbai Indians /wiki/Mumbai_Indians
Quinton de Kock /wiki/Quinton_de_Kock
K. L. Rahul /wiki/K._L._Rahul
Kolkata Knight Riders /wiki/Kolkata_Knight_Riders
List of Indian Premier League awards /wiki/List_of_Indian_Premier_League_awards
copy editing /wiki/Wikipedia:Basic_copyediting
Learn how and when to remove this template message /wiki/Help:Maintenance_template_removal
[94] #cite_note-96
Jos Buttler /wiki/Jos_Buttler
[95] #cite_note-97
Yuzvendra Chahal /wiki/Yuzvendra_Chahal
citation needed /wiki/Wikipedia:Citation_needed
Jos Buttler /wiki/Jos_Buttler
[96]

Hockey /wiki/Hockey_India_League
Kabaddi /wiki/Pro_Kabaddi_League
Mixed Martial Arts /wiki/Super_Fight_League
Table tennis /wiki/Ultimate_Table_Tennis
Volleyball /wiki/Prime_Volleyball_League
Wrestling /wiki/Pro_Wrestling_League
Syed Mushtaq Ali Trophy /wiki/Syed_Mushtaq_Ali_Trophy
Vijay Hazare Trophy /wiki/Vijay_Hazare_Trophy
Ranji Trophy /wiki/Ranji_Trophy
Football /wiki/Santosh_Trophy
Cricket /wiki/List_of_regional_T20_cricket_leagues_in_India
KPL /wiki/Karnataka_Premier_League
TNPL /wiki/Tamil_Nadu_Premier_League
Football /wiki/Indian_State_Leagues
Arunachal Pradesh /wiki/Indrajit_Namchoom_Arunachal_League
Assam /wiki/Assam_State_Premier_League
Bihar /wiki/Bihar_State_Soccer_League
Men /wiki/Calcutta_Football_League
Women /wiki/Calcutta_Women%27s_Football_League
Chhattisgarh /wiki/Chhattisgarh_State_Men%27s_Football_League_Championship
Men /wiki/FD_Senior_Division
Women /wiki/FD_Women%27s_League
Men /wiki/Goa_Professional_League
Women /wiki/Goa_Women%27s_League
Gujarat /wiki/Gujara

In [6]:
!jupyter notebook list

Currently running servers:
http://localhost:8889/?token=3c8802f2a598d8b79949b13b678d0cf96b40b954e0e31aaa :: C:\Users\prateek.kumar
http://localhost:8888/?token=ce9334f53fe7f311face0a7e0b85c79db315cb1edaf84d51 :: C:\Users\prateek.kumar
http://localhost:8888/?token=f7792908d58424089d680ce161084873951f1c007698a23f :: C:\Users\prateek.kumar
http://localhost:8888/?token=23bb935346e54417834fbe48de338efa38e72421a3e65d2d :: C:\Users\prateek.kumar
http://localhost:8890/?token=31e9a7b27493a1fa5ea482c5da4f91ed7b5a2a18017cf619 :: C:\Users\prateek.kumar
