# Summary

## Test the Pipeline code for scoring tweets


## Import libraries

In [1]:
import os, sys
import pandas as pd


# Pipeline file
sys.path.insert(0, '../scraper')
import model


  from .autonotebook import tqdm as notebook_tqdm


## Read unscored scraper output

In [2]:
scraped_tweets_file = "GVCEH-2023-12-27-tweet-scored.csv"
file_path = '../data/processed/twitter'
final_results = pd.read_csv(filepath_or_buffer = os.path.join(file_path, 
                                                              scraped_tweets_file))


In [3]:
print('There are {} entries in the pre-scored tweet file\n'.format(len(final_results)))
print('A sample of entries')
final_results.head()


There are 361 entries in the pre-scored tweet file

A sample of entries


Unnamed: 0,text,scrape_time,tweet_id,created_at,reply_count,quote_count,like_count,retweet_count,geo_full_name,geo_id,username,user_location,num_followers,search_keywords,search_neighbourhood
0,THE PARTY HOUSE - 'This will appeal to fans of...,2023-12-27 13:13:29.327168,1740085153366421989,2023-12-27 19:00:04+00:00,0,0,0,0,,,CannesVibe,"Cannes, France",16684,(900-block pandora avenue OR esquimalt OR high...,900-block pandora avenue OR esquimalt OR highl...
1,"being from the central belt is a crime, there ...",2023-12-27 13:13:29.327202,1740076944387944728,2023-12-27 18:27:27+00:00,4,0,4,0,,,georger__,AB üè¥Û†ÅßÛ†Å¢Û†Å≥Û†Å£Û†Å¥Û†Åøüá™üá∫,1491,(900-block pandora avenue OR esquimalt OR high...,900-block pandora avenue OR esquimalt OR highl...
2,FOLLOW THE DEAD (Shortlisted for Scottish Crim...,2023-12-27 13:13:29.327212,1739979461691322789,2023-12-27 12:00:05+00:00,1,1,22,28,,,Dunedin_Media,"Edinburgh, Scotland",19048,(900-block pandora avenue OR esquimalt OR high...,900-block pandora avenue OR esquimalt OR highl...
3,THE PARTY HOUSE - 'This will appeal to fans of...,2023-12-27 13:13:29.327221,1739707696046014532,2023-12-26 18:00:11+00:00,0,0,1,2,,,Mystery_Tomes,On the case,28065,(900-block pandora avenue OR esquimalt OR high...,900-block pandora avenue OR esquimalt OR highl...
4,THE PARTY HOUSE - 'This will appeal to fans of...,2023-12-27 13:13:29.327230,1739174148440551929,2023-12-25 06:40:03+00:00,1,1,26,34,,,LeftyPleb,Northumberland,4153,(900-block pandora avenue OR esquimalt OR high...,900-block pandora avenue OR esquimalt OR highl...


## Run the relevancy filter and add sentiment scores


In [4]:
print(f"Pre relevancy filter: {len(final_results)}")
final_results = model.relevance_model(final_results)  # filters out irrelevant tweets
print(f"Relevancy filter applied: {len(final_results)}")
final_results = model.sentiment_model(final_results)  # adding sentiment model scores



Pre relevancy filter: 361
relevancy filter applied
Relevancy filter applied: 37


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


sentiment model downloaded
sentiment scores added


## View the results

In [5]:
print('There are {} entries in the tweet file after filtering for relevancy\n'.format(len(final_results)))
print('A sample of entries')
final_results.head()



There are 37 entries in the tweet file after filtering for relevancy

A sample of entries


Unnamed: 0,text,scrape_time,tweet_id,created_at,reply_count,quote_count,like_count,retweet_count,geo_full_name,geo_id,username,user_location,num_followers,search_keywords,search_neighbourhood,sentiment,score
21,Come on journos find out what‚Äôs happening to t...,2023-12-27 13:13:35.405958,1740082798776537238,2023-12-27 18:50:43+00:00,0,0,6,1,,,Invernessbelle,Inverness,851,(900-block pandora avenue OR esquimalt OR high...,900-block pandora avenue OR esquimalt OR highl...,negative,0.847158
22,Saanich ought to be a place where people can f...,2023-12-27 13:13:35.406022,1740080536838926785,2023-12-27 18:41:43+00:00,5,0,8,0,,,Dean_Murdock,L…ôkÃì ∑…ô≈ã…ôn & WÃ±S√ÅNEƒÜ homelands,2842,(900-block pandora avenue OR esquimalt OR high...,900-block pandora avenue OR esquimalt OR highl...,neutral,0.579667
93,Hanging out with my local MP @ElizabethMay at ...,2023-12-27 13:13:35.483509,1738270796806828478,2023-12-22 18:50:28+00:00,0,0,2,1,,,TrailheadMrktng,"Portland, Oregon",99,(900-block pandora avenue OR esquimalt OR high...,900-block pandora avenue OR esquimalt OR highl...,positive,0.954958
99,The framework agreement that allows Victoria P...,2023-12-27 13:13:38.272974,1738014210783936614,2023-12-22 01:50:53+00:00,0,0,2,0,,,CHEK_News,"Victoria, BC",59639,(900-block pandora avenue OR esquimalt OR high...,900-block pandora avenue OR esquimalt OR highl...,neutral,0.827134
100,"Indigenous families, elders, and people in #So...",2023-12-27 13:13:38.272981,1737862293290180785,2023-12-21 15:47:13+00:00,0,2,3,1,,,makoladevelops,"Victoria, British Columbia",1135,(900-block pandora avenue OR esquimalt OR high...,900-block pandora avenue OR esquimalt OR highl...,neutral,0.754363


## Save filtered and scored results

In [6]:
scored_tweets_file = "GVCEH-2023-12-27-tweet-scored_testrun.csv"
final_results.to_csv(path_or_buf=scored_tweets_file)
