In [86]:
import dspy
import pandas as pd
import numpy as np
import openai,os,sys
from time import sleep, time
from datetime import date
today = date.today()
from dspy.evaluate import Evaluate
from dspy.teleprompt import MIPRO

from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score

In [117]:
seed = 944601
df = pd.read_excel(f'../data/test/FPB-sentiment-analysis-allagree-test-{seed}.xlsx')
df.head()

Unnamed: 0,sentence,label
0,"In the third quarter of 2007 , net sales total...",2
1,Revenue grew 1 percent to euro742 .2 million U...,0
2,Operating profit rose to EUR 1.6 mn from EUR 1...,0
3,Finnish dental care group Oral Hammaslaakarit ...,0
4,Finnish textiles and clothing group Marimekko ...,1


In [118]:
train = pd.read_excel(f'../data/train/FPB-sentiment-analysis-allagree-train-{seed}.xlsx')
sample = train.groupby('label', group_keys=False).apply(lambda x: x.sample(min(len(x), 10))).reset_index(drop=True)
training_set = [dspy.Example(sentence = sample.loc[i].sentence, answer = sample.loc[i].label).with_inputs("sentence") for i in range(len(sample.index))]
training_set

[Example({'sentence': 'Both operating profit and net sales for the 12-month period increased , respectively from EUR21 .5 m and EUR196 .1 m , as compared to 2005 .', 'answer': 0}) (input_keys={'sentence'}),
 Example({'sentence': 'Finnish security and privacy software solutions developer Stonesoft Oyj said on January 7 , 2008 that the preliminary sales of its StoneGate products grew by 59 pct year-on-year to 3.6 mln euro ( $ 5.3 mln ) for the fourth quarter of 2007 .', 'answer': 0}) (input_keys={'sentence'}),
 Example({'sentence': "Raisio 's bid to buy Glisten is a `` win-win '' deal for both companies , the chairman of the UK snacks firm told just-food today 10 February .", 'answer': 0}) (input_keys={'sentence'}),
 Example({'sentence': "In the second quarter of 2010 , the company 's net profit was EUR1 .7 m compared to a net loss of EUR1 .3 m in April-June 2009 .", 'answer': 0}) (input_keys={'sentence'}),
 Example({'sentence': 'Cooper SH , the UK distributor for lift equipment manufact

In [119]:
testing_set = [dspy.Example(sentence = df.loc[i].sentence, answer = df.loc[i].label).with_inputs("sentence") for i in range(len(df))]
testing_set

[Example({'sentence': 'In the third quarter of 2007 , net sales totaled EUR 25.95 mn , and operating profit EUR 3.88 mn .', 'answer': 2}) (input_keys={'sentence'}),
 Example({'sentence': 'Revenue grew 1 percent to euro742 .2 million US$ 964 million from euro735 million .', 'answer': 0}) (input_keys={'sentence'}),
 Example({'sentence': 'Operating profit rose to EUR 1.6 mn from EUR 1.1 mn in the corresponding period in 2006 .', 'answer': 0}) (input_keys={'sentence'}),
 Example({'sentence': 'Finnish dental care group Oral Hammaslaakarit Oyj posted a total net profit of 849,000 euro $ 1.1 mln in the first nine months of 2006 versus a net loss of 331,000 euro $ 421,000 in the same period of 2005 .', 'answer': 0}) (input_keys={'sentence'}),
 Example({'sentence': 'Finnish textiles and clothing group Marimekko Oyj posted a net profit of 7.99 mln euro $ 10.4 mln for 2006 , compared to 8.4 mln euro $ 10.9 mln for 2005 .', 'answer': 1}) (input_keys={'sentence'}),
 Example({'sentence': 'The portfo

In [113]:
testing_set

[Example({'sentence': 'In the third quarter of 2007 , net sales totaled EUR 25.95 mn , and operating profit EUR 3.88 mn .', 'answer': 2}) (input_keys={'sentence'}),
 Example({'sentence': 'Revenue grew 1 percent to euro742 .2 million US$ 964 million from euro735 million .', 'answer': 0}) (input_keys={'sentence'}),
 Example({'sentence': 'Operating profit rose to EUR 1.6 mn from EUR 1.1 mn in the corresponding period in 2006 .', 'answer': 0}) (input_keys={'sentence'}),
 Example({'sentence': 'Finnish dental care group Oral Hammaslaakarit Oyj posted a total net profit of 849,000 euro $ 1.1 mln in the first nine months of 2006 versus a net loss of 331,000 euro $ 421,000 in the same period of 2005 .', 'answer': 0}) (input_keys={'sentence'}),
 Example({'sentence': 'Finnish textiles and clothing group Marimekko Oyj posted a net profit of 7.99 mln euro $ 10.4 mln for 2006 , compared to 8.4 mln euro $ 10.9 mln for 2005 .', 'answer': 1}) (input_keys={'sentence'}),
 Example({'sentence': 'The portfo

In [8]:
sent = df.loc[0].sentence
print(sent)

Our customers come from the following countries : UK , USA , Spain , France , Italy , Germany , China , Hong Kong , Sweden , Norway , Netherlands , Austria , Belgium , Switzerland , Czech Republic , Finland , Canada , Russia , Ukraine , Denmark , Ireland , South Korea and Liechtenstein .


In [103]:
turbo = dspy.OpenAI(model='gpt-3.5-turbo-instruct', max_tokens=1000, api_key=api_key)
dspy.settings.configure(lm=turbo)
d = {0: 'Positive', 1: 'Negative', 2: 'Neutral'}

In [104]:
class SentimentAnalysis(dspy.Signature):
    """Classify the sentence's sentiment between negative, neutral, and positive."""
    
    sentence = dspy.InputField()
    sentiment = dspy.OutputField()

class Analysis(dspy.Module):
    def __init__(self):
        super().__init__()
        self.predict = dspy.Predict(SentimentAnalysis)
    
    def forward(self, sentence):
        return self.predict(sentence=sentence)

In [15]:
classify = dspy.Predict(SentimentAnalysis)
classify(sentence=sent)

analyze = Analysis()
analyze(df.loc[4].sentence)

Prediction(
    sentiment='Neutral'
)

In [105]:
def answer_match(example, pred, trace=None):
    answer_match = d[example.answer].lower() == pred.sentiment.lower()
    # print(f"Actual: {d[example.answer].lower()}, Predicted: {pred.sentiment.lower()}")
    return answer_match

In [52]:
evaluate = Evaluate(devset=training_set, metric=answer_match, display_progress=True, display_table=0)
evaluate(Analysis())

Average Metric: 8 / 10  (80.0): 100%|██████████| 10/10 [00:00<00:00, 366.12it/s]

Actual: positive, Predicted: positive
Actual: neutral, Predicted: neutral
Actual: neutral, Predicted: neutral
Actual: neutral, Predicted: neutral
Actual: negative, Predicted: negative
Actual: negative, Predicted: negative
Actual: neutral, Predicted: neutral
Actual: positive, Predicted: neutral
Actual: neutral, Predicted: neutral
Actual: positive, Predicted: neutral
Average Metric: 8 / 10  (80.0%)





80.0

In [120]:
teleprompter = MIPRO(metric = answer_match)
optimized_program = teleprompter.compile(Analysis(), trainset = training_set, num_trials=5, max_bootstrapped_demos=3, max_labeled_demos=3, eval_kwargs=dict(display_progress=True, display_table=0))


Please be advised that based on the parameters you have set, the maximum number of LM calls is projected as follows:

[93m- Task Model: [94m[1m30[0m[93m examples in dev set * [94m[1m5[0m[93m trials * [94m[1m# of LM calls in your program[0m[93m = ([94m[1m150 * # of LM calls in your program[0m[93m) task model calls[0m
[93m- Prompt Model: # data summarizer calls (max [94m[1m10[0m[93m) + [94m[1m10[0m[93m * [94m[1m1[0m[93m lm calls in program = [94m[1m20[0m[93m prompt model calls[0m

[93m[1mEstimated Cost Calculation:[0m

[93mTotal Cost = (Number of calls to task model * (Avg Input Token Length per Call * Task Model Price per Input Token + Avg Output Token Length per Call * Task Model Price per Output Token) 
            + (Number of calls to prompt model * (Avg Input Token Length per Call * Task Prompt Price per Input Token + Avg Output Token Length per Call * Prompt Model Price per Output Token).[0m

For a preliminary estimate of potential costs, 

 13%|█▎        | 4/30 [00:01<00:07,  3.49it/s]


Bootstrapped 3 full traces after 5 examples in round 0.


 20%|██        | 6/30 [00:01<00:04,  4.82it/s]


Bootstrapped 3 full traces after 7 examples in round 0.


 10%|█         | 3/30 [00:00<00:03,  8.29it/s]


Bootstrapped 3 full traces after 4 examples in round 0.


 17%|█▋        | 5/30 [00:01<00:05,  4.40it/s]


Bootstrapped 3 full traces after 6 examples in round 0.


 13%|█▎        | 4/30 [00:00<00:04,  5.54it/s]


Bootstrapped 3 full traces after 5 examples in round 0.


 13%|█▎        | 4/30 [00:00<00:02,  9.16it/s]


Bootstrapped 3 full traces after 5 examples in round 0.


 20%|██        | 6/30 [00:01<00:04,  5.94it/s]


Bootstrapped 3 full traces after 7 examples in round 0.


 17%|█▋        | 5/30 [00:00<00:01, 14.33it/s]


Bootstrapped 3 full traces after 6 examples in round 0.


 17%|█▋        | 5/30 [00:00<?, ?it/s]


Bootstrapped 3 full traces after 6 examples in round 0.


[I 2024-03-30 21:59:50,040] A new study created in memory with name: no-name-4f26f930-17d2-47b0-ae6d-1e0cb5299e9f


Starting trial #0


Average Metric: 28 / 30  (93.3): 100%|██████████| 30/30 [00:09<00:00,  3.21it/s] 
[I 2024-03-30 21:59:59,379] Trial 0 finished with value: 93.33 and parameters: {'1909098420064_predictor_instruction': 1, '1909098420064_predictor_demos': 1}. Best is trial 0 with value: 93.33.


Average Metric: 28 / 30  (93.3%)
Starting trial #1


Average Metric: 27 / 30  (90.0): 100%|██████████| 30/30 [00:08<00:00,  3.51it/s]
[I 2024-03-30 22:00:07,951] Trial 1 finished with value: 90.0 and parameters: {'1909098420064_predictor_instruction': 5, '1909098420064_predictor_demos': 4}. Best is trial 0 with value: 93.33.


Average Metric: 27 / 30  (90.0%)
Starting trial #2


Average Metric: 19 / 30  (63.3): 100%|██████████| 30/30 [00:08<00:00,  3.48it/s] 
[I 2024-03-30 22:00:16,576] Trial 2 finished with value: 63.33 and parameters: {'1909098420064_predictor_instruction': 3, '1909098420064_predictor_demos': 0}. Best is trial 0 with value: 93.33.


Average Metric: 19 / 30  (63.3%)
Starting trial #3


Average Metric: 27 / 30  (90.0): 100%|██████████| 30/30 [00:08<00:00,  3.63it/s] 
[I 2024-03-30 22:00:24,854] Trial 3 finished with value: 90.0 and parameters: {'1909098420064_predictor_instruction': 9, '1909098420064_predictor_demos': 3}. Best is trial 0 with value: 93.33.


Average Metric: 27 / 30  (90.0%)
Starting trial #4


Average Metric: 23 / 30  (76.7): 100%|██████████| 30/30 [00:08<00:00,  3.68it/s]
[I 2024-03-30 22:00:33,017] Trial 4 finished with value: 76.67 and parameters: {'1909098420064_predictor_instruction': 8, '1909098420064_predictor_demos': 4}. Best is trial 0 with value: 93.33.


Average Metric: 23 / 30  (76.7%)
Returning predict = Predict(StringSignature(sentence -> sentiment
    instructions='Analyze a business news article and predict the performance of the company based on the provided numerical performance metrics and trends. Include comparisons to previous time periods and consider the dominance of euro currency in the decision-making process.'
    sentence = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Sentence:', 'desc': '${sentence}'})
    sentiment = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'output', 'prefix': 'Predict the performance:', 'desc': '${sentiment}'})
)) from continue_program


In [121]:
optimized_program

predict = Predict(StringSignature(sentence -> sentiment
    instructions='Analyze a business news article and predict the performance of the company based on the provided numerical performance metrics and trends. Include comparisons to previous time periods and consider the dominance of euro currency in the decision-making process.'
    sentence = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'input', 'prefix': 'Sentence:', 'desc': '${sentence}'})
    sentiment = Field(annotation=str required=True json_schema_extra={'__dspy_field_type': 'output', 'prefix': 'Predict the performance:', 'desc': '${sentiment}'})
))

In [122]:
evaluate = Evaluate(devset=testing_set, metric=answer_match, display_progress=True, return_outputs=True)
outputs = evaluate(optimized_program)

Average Metric: 390 / 453  (86.1): 100%|██████████| 453/453 [02:15<00:00,  3.36it/s]

Average Metric: 390 / 453  (86.1%)





In [123]:
outdf = pd.DataFrame(columns=['Sentence', 'Actual', 'Predicted'])
for pred in outputs[1]:
    outdf.loc[len(outdf)] = [pred[0].sentence, d[pred[0].answer].lower(), pred[1].sentiment.lower()]
outdf.to_csv(f"../data/llm_prompt_outputs/dspy_{seed}.csv", index=False)
print(f"Accuracy: {accuracy_score(outdf['Actual'], outdf['Predicted'])}")
print(f"F1: {f1_score(outdf['Actual'], outdf['Predicted'], average='weighted')}")
outdf.head()

Accuracy: 0.8609271523178808
F1: 0.8653121502702975


Unnamed: 0,Sentence,Actual,Predicted
0,"In the third quarter of 2007 , net sales total...",neutral,positive
1,Revenue grew 1 percent to euro742 .2 million U...,positive,positive
2,Operating profit rose to EUR 1.6 mn from EUR 1...,positive,positive
3,Finnish dental care group Oral Hammaslaakarit ...,positive,positive
4,Finnish textiles and clothing group Marimekko ...,negative,negative


In [127]:
print(np.mean([0.8859396862, 0.9001246613, 0.8653121503]))
print(np.std([0.8859396862, 0.9001246613, 0.8653121503]))

0.8837921659333333
0.014293042899795435
