In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from transformers import pipeline

In [5]:
df = pd.read_csv("df_tweets.csv")

In [11]:
# Do not run if pre-processing is not required
!pip install tweet-preprocessor

Collecting tweet-preprocessor
  Downloading tweet_preprocessor-0.6.0-py3-none-any.whl (27 kB)
Installing collected packages: tweet-preprocessor
Successfully installed tweet-preprocessor-0.6.0
[0m

In [12]:
# Do not run if pre-processing is not required
df['text_processed'] = df['text']

In [13]:
# transforming column with tweets to the list
sentences = list(df['text_processed'].values) # text will be the input to the model

In [14]:
# Checking GPU
import torch
device = "cuda:0" if torch.cuda.is_available() else "cpu"
device

'cuda:0'

In [16]:
# Applying classifier to the list of tweets
classifier = pipeline("zero-shot-classification", model = 'facebook/bart-large-mnli')
result = classifier(list(sentences), candidate_labels = ['society','economy','culture','greenwashing'])
result[10]

Downloading config.json:   0%|          | 0.00/1.13k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.52G [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

Downloading vocab.json:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading merges.txt:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/1.29M [00:00<?, ?B/s]

{'sequence': 'respect the environment practice catch and release and follow local fishing regulations to preserve bass populations and their habitats for future generations conservationmatters sustainablefishing',
 'labels': ['society'],
 'scores': [0.43363097310066223]}

In [17]:
# Transforming list of predictions back to dataframe
df_result = pd.DataFrame(result)

In [18]:
# Checking predictions
df_result.head()

Unnamed: 0,sequence,labels,scores
0,well done bro future us bright and i like your...,[society],[0.149720698595047]
1,the world circular economy forum is an opportu...,[society],[0.686609148979187]
2,flocard igniting future business growth throug...,[society],[0.06200450286269188]
3,celebrating international day for biological d...,[society],[0.014194754883646965]
4,savesoil biodiversity the above sentence empha...,[society],[0.021134324371814728]


In [19]:
# Function to extract score based on label
def extract_score(labels, scores, label_name):
    try:
        index = labels.index(label_name)
        return scores[index]
    except ValueError:
        return None

# Extracting scores based on labels
df_result['greenwashing'] = df_result.apply(lambda row: extract_score(row['labels'], row['scores'], 'Greenwashing'), axis=1)
df_result['culture'] = df_result.apply(lambda row: extract_score(row['labels'], row['scores'], 'Culture'), axis=1)
df_result['nature'] = df_result.apply(lambda row: extract_score(row['labels'], row['scores'], 'Nature'), axis=1)
df_result['society'] = df_result.apply(lambda row: extract_score(row['labels'], row['scores'], 'Society'), axis=1)


In [20]:
# Dropping the original 'label' and 'scores' columns
df_result.drop(['labels', 'scores'], axis=1, inplace=True)

df_result.head(5)

Unnamed: 0,sequence,society
0,well done bro future us bright and i like your...,0.149721
1,the world circular economy forum is an opportu...,0.686609
2,flocard igniting future business growth throug...,0.062005
3,celebrating international day for biological d...,0.014195
4,savesoil biodiversity the above sentence empha...,0.021134


In [21]:
df_result.rename(columns={'sequence': 'text_processed'}, inplace=True)
df_result.head(5)

Unnamed: 0,text_processed,society
0,well done bro future us bright and i like your...,0.149721
1,the world circular economy forum is an opportu...,0.686609
2,flocard igniting future business growth throug...,0.062005
3,celebrating international day for biological d...,0.014195
4,savesoil biodiversity the above sentence empha...,0.021134


In [22]:
merged_df = pd.merge(df, df_result, on='text_processed', how='left')
merged_df

Unnamed: 0,author_id,lang,id,text,created_at,in_reply_to_user_id,Document,Topic,Name,Representation,Representative_Docs,Top_n_words,Probability,Representative_document,Positive future,Environmental_Score,Updated_Environmental_Score,text_processed,society
0,"1,20472E+18",en,"1,66141E+18",@RuttokipKevin @EnviClimateC_Ke @Meshac_Kiplag...,2023-05-24T19:26:33+0300,3129025773,well done bro future us bright and i like your...,183,183_environment future_happy presidentsday_env...,"['environment future', 'happy presidentsday', ...",['woow great job this is really inspiring thos...,environment future - happy presidentsday - env...,1,False,0995941877,0,1,well done bro future us bright and i like your...,0.149721
1,102448827,en,"1,6614E+18",1/2 The World Circular Economy Forum is an opp...,2023-05-24T19:03:22+0300,,the world circular economy forum is an opportu...,178,178_circulareconomy sustainability_circular ec...,"['circulareconomy sustainability', 'circular e...",['unlock the future of the circular bioeconomy...,circulareconomy sustainability - circular econ...,1,False,0990356684,0,1,the world circular economy forum is an opportu...,0.686609
2,"1,37352E+18",en,"1,66126E+18",FloCard: Igniting Future Business Growth throu...,2023-05-24T09:33:41+0300,,flocard igniting future business growth throug...,0,0_environment day_environmentday_worldenvironm...,"['environment day', 'environmentday', 'worlden...",['this world environment day let s come togeth...,environment day - environmentday - worldenviro...,0507634043,False,0957089424,0,2,flocard igniting future business growth throug...,0.062005
3,"9,65331E+17",en,"1,66125E+18",üåçüåø Celebrating International Day for Biologica...,2023-05-24T08:49:16+0300,,celebrating international day for biological d...,0,0_environment day_environmentday_worldenvironm...,"['environment day', 'environmentday', 'worlden...",['this world environment day let s come togeth...,environment day - environmentday - worldenviro...,1,False,0939804971,0,2,celebrating international day for biological d...,0.014195
4,"1,51776E+18",en,"1,6612E+18","#SaveSoil,...biodiversity. The above sentence ...",2023-05-24T05:42:36+0300,,savesoil biodiversity the above sentence empha...,0,0_environment day_environmentday_worldenvironm...,"['environment day', 'environmentday', 'worlden...",['this world environment day let s come togeth...,environment day - environmentday - worldenviro...,0760217395,False,079150331,0,2,savesoil biodiversity the above sentence empha...,0.021134
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2054,"1,50472E+18",en,"1,52906E+18",For the vision of sustainability and well bein...,2022-05-24T14:26:21+0300,,for the vision of sustainability and well bein...,0,0_environment day_environmentday_worldenvironm...,"['environment day', 'environmentday', 'worlden...",['this world environment day let s come togeth...,environment day - environmentday - worldenviro...,1,False,0990749776,0,1,for the vision of sustainability and well bein...,0.436568
2055,"1,50232E+18",en,"1,52905E+18","Thanks to this residency, coordinated by @peac...",2022-05-24T13:40:05+0300,"1,50232E+18",thanks to this residency coordinated by anna c...,101,101_art nature_nature art_artistic vision_art,"['art nature', 'nature art', 'artistic vision'...",['my art works are inspired by the relationshi...,art nature - nature art - artistic vision - ar...,1,False,0922970831,0,0,thanks to this residency coordinated by anna c...,0.009476
2056,15071251,en,"1,52899E+18",Future foods: What you could be eating by 2050...,2022-05-24T09:25:49+0300,,future foods what you could be eating by accor...,0,0_environment day_environmentday_worldenvironm...,"['environment day', 'environmentday', 'worlden...",['this world environment day let s come togeth...,environment day - environmentday - worldenviro...,1,False,0786512434,0,0,future foods what you could be eating by accor...,0.238872
2057,"1,51071E+18",en,"1,5289E+18",Look at these incredible moments caught by Hiv...,2022-05-24T04:00:20+0300,,look at these incredible moments caught by hiv...,60,60_biodiversity future_pollinators_bees future...,"['biodiversity future', 'pollinators', 'bees f...",['the way humanity manages its nature based as...,biodiversity future - pollinators - bees futur...,0691287069,False,0677969873,0,1,look at these incredible moments caught by hiv...,0.001011


In [24]:
# Saving prediction results to csv
merged_df.to_csv('twitter_visions_zero_shot.csv')