### Imports

In [1]:
from sentence_transformers import SentenceTransformer
import json
import numpy as np
import csv
import pickle

  from .autonotebook import tqdm as notebook_tqdm


### Downloading Sentence Transformer Model

In [2]:
model = SentenceTransformer('all-MiniLM-L6-v2')

### Reading Tweet Dataset

In [3]:
all_tweets = []
with open('data_1/trumptweets1205_127.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0
    for row in csv_reader:
        if line_count != 0:
            all_tweets.append(row[2])
        line_count += 1

In [4]:
len(all_tweets)

297

In [5]:
all_tweets[:5]

['I am thrilled to nominate Dr. @RealBenCarson as our next Secretary of the US Dept. of Housing and Urban Development… https://t.co/OJKuDFhP3r',
 'If the press would cover me accurately &amp; honorably, I would have far less reason to "tweet." Sadly, I don\'t know if that will ever happen!',
 '#ThankYouTour2016 \n\n12/6- North Carolina\nhttps://t.co/79AHq3NC0v\n\n12/8- Iowa\nhttps://t.co/1IuRTVwMSx\n\n12/9- Michiga… https://t.co/vcQaIJ8qoB',
 'Boeing is building a brand new 747 Air Force One for future presidents, but costs are out of control, more than $4 billion. Cancel order!',
 'Join me tonight in Fayetteville, North Carolina at 7pm! \n#ThankYouTour2016 \nTickets: https://t.co/79AHq3NC0v https://t.co/KoZCE7JeG7']

### Generating all embeddings for each tweet

In [6]:
embeddings = model.encode(all_tweets)

### Preparing maps for ID to Tweet Look-up and ID to Embedding Look-up

In [7]:
series_id = 0
sentence_id_mapping = {}
id_embedding_mapping = {}

for sentence, embedding in zip(all_tweets, embeddings):
    sentence_id_mapping[series_id] = sentence
    id_embedding_mapping[series_id] = embedding
    series_id += 1

### Saving both maps

In [8]:
with open('id_embedding_mapping.pkl', 'wb') as output:
    pickle.dump(id_embedding_mapping, output)

In [9]:
with open('sentence_id_mapping.json', 'w') as fp:
    json.dump(sentence_id_mapping, fp)