### Data Exploration and Cleaning

Importing related libraries and read the file.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('Job_Tweets.csv')
df.head()

Unnamed: 0,ID,Timestamp,User,Text,Hashtag,Retweets,Likes,Replies,Source,Location,Verified_Account,Followers,Following
0,1211798864656769025,2019-12-30 23:58:58+00:00,SipapuNM,Looking for an exciting job where you can ski ...,,0,1,0,"<a href=""http://instagram.com"" rel=""nofollow"">...","20 miles SE of Taos, NM",False,3342,258
1,1211797371853705220,2019-12-30 23:53:02+00:00,LorettaOD1,Hail in Phoenix - no way! New opportunity at ...,['job'],0,0,0,"<a href=""http://jobvite.com"" rel=""nofollow"">Jo...",Arizona,False,63,129
2,1211795775363145728,2019-12-30 23:46:41+00:00,guajardo_celina,Rackspace is hiring! We are looking for Nation...,"['becomearacker', 'Rackspace', 'recruiting', '...",0,0,0,"<a href=""http://jobvite.com"" rel=""nofollow"">Jo...","Texas, USA",False,19,50
3,1211793355060981767,2019-12-30 23:37:04+00:00,SteveEckert_OTD,We are #hiring Administrative Assistant in Nan...,"['hiring', 'jobs', 'Nanuet']",0,0,0,"<a href=""http://www.ziprecruiter.com"" rel=""nof...","Orange County, California",False,966,1569
4,1211792689022349315,2019-12-30 23:34:25+00:00,HireLive,We are #hiring Family Service Counselor in Irv...,"['hiring', 'jobs', 'Irvine']",0,0,0,"<a href=""http://www.ziprecruiter.com"" rel=""nof...",United States,False,983,1251


In [3]:
# list all column names in the dataset
list(df.columns)

['ID',
 'Timestamp',
 'User',
 'Text',
 'Hashtag',
 'Retweets',
 'Likes',
 'Replies',
 'Source',
 'Location',
 'Verified_Account',
 'Followers',
 'Following']

In [4]:
# to check dimensions of the dataset
print("Dimensions of the dataset:")
print("Number of rows:", df.shape[0])
print("Number of columns:", df.shape[1])

Dimensions of the dataset:
Number of rows: 50000
Number of columns: 13


In [5]:
# to get a summary of data types and non-null values
print("Summary of data types and non-null values:")
df.info()

Summary of data types and non-null values:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   ID                50000 non-null  int64 
 1   Timestamp         50000 non-null  object
 2   User              50000 non-null  object
 3   Text              50000 non-null  object
 4   Hashtag           40684 non-null  object
 5   Retweets          50000 non-null  int64 
 6   Likes             50000 non-null  int64 
 7   Replies           50000 non-null  int64 
 8   Source            50000 non-null  object
 9   Location          44011 non-null  object
 10  Verified_Account  50000 non-null  bool  
 11  Followers         50000 non-null  int64 
 12  Following         50000 non-null  int64 
dtypes: bool(1), int64(6), object(6)
memory usage: 4.6+ MB


In [6]:
# convert data type of user ID from int to object
df["ID"] = df["ID"].astype(object)
df.dtypes

ID                  object
Timestamp           object
User                object
Text                object
Hashtag             object
Retweets             int64
Likes                int64
Replies              int64
Source              object
Location            object
Verified_Account      bool
Followers            int64
Following            int64
dtype: object

In [7]:
# to calculate the basic statistics for numerical columns
print("Basic statistics for numerical columns:")
df.describe()

Basic statistics for numerical columns:


Unnamed: 0,Retweets,Likes,Replies,Followers,Following
count,50000.0,50000.0,50000.0,50000.0,50000.0
mean,0.6921,1.34786,0.10892,5511.382,1961.96546
std,7.441484,17.527216,1.276483,67708.25,5847.789338
min,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,80.0,94.0
50%,0.0,0.0,0.0,298.0,331.0
75%,0.0,0.0,0.0,1258.0,1076.0
max,1016.0,1677.0,175.0,5226473.0,310791.0


In [8]:
# describing categorical data
df.describe(include='object')

Unnamed: 0,ID,Timestamp,User,Text,Hashtag,Source,Location
count,50000,50000,50000,50000,40684,50000,44011
unique,50000,46795,13715,46507,14227,261,5973
top,1636315580528574467,2019-12-19 20:00:27+00:00,cameroncraig,We generally enjoy and are satisfied by being ...,"['hiring', 'jobs', 'Houston']","<a href=""http://www.ziprecruiter.com"" rel=""nof...",United States
freq,1,8,2513,134,792,21888,3504


In [9]:
# to check for any missing values
missing_values = df.isnull().sum()
print("Missing Values:")
print(missing_values)

Missing Values:
ID                     0
Timestamp              0
User                   0
Text                   0
Hashtag             9316
Retweets               0
Likes                  0
Replies                0
Source                 0
Location            5989
Verified_Account       0
Followers              0
Following              0
dtype: int64


In [10]:
missing_values = df.dropna()
df = missing_values
missing_values = df.isnull().sum() # count number of missing values again
print("Missing Values:")
print(missing_values)

Missing Values:
ID                  0
Timestamp           0
User                0
Text                0
Hashtag             0
Retweets            0
Likes               0
Replies             0
Source              0
Location            0
Verified_Account    0
Followers           0
Following           0
dtype: int64


In [11]:
# to identify any duplicated rows in the dataset
duplicate_rows = df.duplicated()
duplicate_rows

1        False
2        False
3        False
4        False
5        False
         ...  
49993    False
49995    False
49996    False
49997    False
49998    False
Length: 36632, dtype: bool

In [12]:
# to count the number of duplicated rows
num_duplicates = sum(duplicate_rows)
print(num_duplicates)

0


In [13]:
# to display unique values in each column to identify categorical variables
for column in df.columns:
    unique_values = df[column].unique()
    print(f"Unique values in {column}:")
    print(unique_values)
    print()

Unique values in ID:
[1211797371853705220 1211795775363145728 1211793355060981767 ...
 1636306593858871297 1636305111726141440 1636304982768066561]

Unique values in Timestamp:
['2019-12-30 23:53:02+00:00' '2019-12-30 23:46:41+00:00'
 '2019-12-30 23:37:04+00:00' ... '2023-03-16 10:01:07+00:00'
 '2023-03-16 09:55:14+00:00' '2023-03-16 09:54:43+00:00']

Unique values in User:
['LorettaOD1' 'guajardo_celina' 'SteveEckert_OTD' ... 'movmn'
 'babelsblessing' 'SSARecruit']

Unique values in Text:
['Hail in Phoenix - no way!  New opportunity at Freedom? Possibly. We are hiring a(n) Reconciliation Specialist II and would like to talk to you!\nhttps://t.co/CFAKKQHeFJ #job'
 'Rackspace is hiring! We are looking for National Partner Manager. Learn more or Jobvite a friend. #becomearacker #Rackspace #recruiting #talentacquisition #jobsearching\nhttps://t.co/XuSMG0eSgs #job'
 'We are #hiring Administrative Assistant in Nanuet, NY https://t.co/3IGxFiCXUI #jobs #Nanuet'
 ...
 "A lovely review from one

<br>

### Data Labeling

Install and import related libraries

In [28]:
#pip install textblob

In [29]:
#pip install vaderSentiment

In [30]:
#pip install flair

In [32]:
#pip install transformers

Installations are successful. Now, let's proceed to do sentiment analysis using TextBlob.

In [25]:
import torch
from tweepy import OAuthHandler
from textblob import TextBlob
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from transformers import pipeline
from flair.models import TextClassifier
from flair.data import Sentence

  from .autonotebook import tqdm as notebook_tqdm


In [26]:
#selecting column 'Text' to do sentiment analysis and display in DataFrame
df_text = pd.DataFrame(df['Text'])
df_text.head()

Unnamed: 0,Text
1,Hail in Phoenix - no way! New opportunity at ...
2,Rackspace is hiring! We are looking for Nation...
3,We are #hiring Administrative Assistant in Nan...
4,We are #hiring Family Service Counselor in Irv...
5,We are #hiring Inside Sales Representative in ...


<br>

In [27]:
# Applying TextBlob
df['text_blob'] = df['Text'].apply(lambda x: TextBlob(x).sentiment)
df.head()

Unnamed: 0,ID,Timestamp,User,Text,Hashtag,Retweets,Likes,Replies,Source,Location,Verified_Account,Followers,Following,text_blob
1,1211797371853705220,2019-12-30 23:53:02+00:00,LorettaOD1,Hail in Phoenix - no way! New opportunity at ...,['job'],0,0,0,"<a href=""http://jobvite.com"" rel=""nofollow"">Jo...",Arizona,False,63,129,"(0.06818181818181818, 0.7272727272727273)"
2,1211795775363145728,2019-12-30 23:46:41+00:00,guajardo_celina,Rackspace is hiring! We are looking for Nation...,"['becomearacker', 'Rackspace', 'recruiting', '...",0,0,0,"<a href=""http://jobvite.com"" rel=""nofollow"">Jo...","Texas, USA",False,19,50,"(0.5, 0.5)"
3,1211793355060981767,2019-12-30 23:37:04+00:00,SteveEckert_OTD,We are #hiring Administrative Assistant in Nan...,"['hiring', 'jobs', 'Nanuet']",0,0,0,"<a href=""http://www.ziprecruiter.com"" rel=""nof...","Orange County, California",False,966,1569,"(0.0, 0.0)"
4,1211792689022349315,2019-12-30 23:34:25+00:00,HireLive,We are #hiring Family Service Counselor in Irv...,"['hiring', 'jobs', 'Irvine']",0,0,0,"<a href=""http://www.ziprecruiter.com"" rel=""nof...",United States,False,983,1251,"(0.0, 0.0)"
5,1211792528388841473,2019-12-30 23:33:47+00:00,HireLive,We are #hiring Inside Sales Representative in ...,"['hiring', 'jobs', 'Irvine']",0,0,0,"<a href=""http://www.ziprecruiter.com"" rel=""nof...",United States,False,983,1251,"(0.0, 0.0)"


In [28]:
df_text_blob = pd.DataFrame(df['text_blob'])
df_text_blob.head()

Unnamed: 0,text_blob
1,"(0.06818181818181818, 0.7272727272727273)"
2,"(0.5, 0.5)"
3,"(0.0, 0.0)"
4,"(0.0, 0.0)"
5,"(0.0, 0.0)"


In [29]:
# Placing 'Text' and text_blob side by side
blob_analysis = df[['Text','text_blob']]
blob_analysis.head()

Unnamed: 0,Text,text_blob
1,Hail in Phoenix - no way! New opportunity at ...,"(0.06818181818181818, 0.7272727272727273)"
2,Rackspace is hiring! We are looking for Nation...,"(0.5, 0.5)"
3,We are #hiring Administrative Assistant in Nan...,"(0.0, 0.0)"
4,We are #hiring Family Service Counselor in Irv...,"(0.0, 0.0)"
5,We are #hiring Inside Sales Representative in ...,"(0.0, 0.0)"


<br> Let's do sentiment analysis using Vader.

In [30]:
# Applying Vader
analyzer = SentimentIntensityAnalyzer()
df['text_vader'] = df['Text'].apply(lambda x: analyzer.polarity_scores(x))
df.head()

Unnamed: 0,ID,Timestamp,User,Text,Hashtag,Retweets,Likes,Replies,Source,Location,Verified_Account,Followers,Following,text_blob,text_vader
1,1211797371853705220,2019-12-30 23:53:02+00:00,LorettaOD1,Hail in Phoenix - no way! New opportunity at ...,['job'],0,0,0,"<a href=""http://jobvite.com"" rel=""nofollow"">Jo...",Arizona,False,63,129,"(0.06818181818181818, 0.7272727272727273)","{'neg': 0.062, 'neu': 0.618, 'pos': 0.32, 'com..."
2,1211795775363145728,2019-12-30 23:46:41+00:00,guajardo_celina,Rackspace is hiring! We are looking for Nation...,"['becomearacker', 'Rackspace', 'recruiting', '...",0,0,0,"<a href=""http://jobvite.com"" rel=""nofollow"">Jo...","Texas, USA",False,19,50,"(0.5, 0.5)","{'neg': 0.0, 'neu': 0.863, 'pos': 0.137, 'comp..."
3,1211793355060981767,2019-12-30 23:37:04+00:00,SteveEckert_OTD,We are #hiring Administrative Assistant in Nan...,"['hiring', 'jobs', 'Nanuet']",0,0,0,"<a href=""http://www.ziprecruiter.com"" rel=""nof...","Orange County, California",False,966,1569,"(0.0, 0.0)","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."
4,1211792689022349315,2019-12-30 23:34:25+00:00,HireLive,We are #hiring Family Service Counselor in Irv...,"['hiring', 'jobs', 'Irvine']",0,0,0,"<a href=""http://www.ziprecruiter.com"" rel=""nof...",United States,False,983,1251,"(0.0, 0.0)","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."
5,1211792528388841473,2019-12-30 23:33:47+00:00,HireLive,We are #hiring Inside Sales Representative in ...,"['hiring', 'jobs', 'Irvine']",0,0,0,"<a href=""http://www.ziprecruiter.com"" rel=""nof...",United States,False,983,1251,"(0.0, 0.0)","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."


In [31]:
df_text_vader = pd.DataFrame(df['text_vader'])
df_text_vader.head()

Unnamed: 0,text_vader
1,"{'neg': 0.062, 'neu': 0.618, 'pos': 0.32, 'com..."
2,"{'neg': 0.0, 'neu': 0.863, 'pos': 0.137, 'comp..."
3,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."
4,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."
5,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."


In [32]:
# Placing 'Text' and text_vader side by side
vader_analysis = df[['Text','text_vader']]
vader_analysis.head()

Unnamed: 0,Text,text_vader
1,Hail in Phoenix - no way! New opportunity at ...,"{'neg': 0.062, 'neu': 0.618, 'pos': 0.32, 'com..."
2,Rackspace is hiring! We are looking for Nation...,"{'neg': 0.0, 'neu': 0.863, 'pos': 0.137, 'comp..."
3,We are #hiring Administrative Assistant in Nan...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."
4,We are #hiring Family Service Counselor in Irv...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."
5,We are #hiring Inside Sales Representative in ...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound..."


Let's do sentiment analysis using flair.

In [33]:
# Applying flair
classifier = TextClassifier.load('en-sentiment')
sentences = [Sentence(text) for text in df['Text']]
classifier.predict(sentences)
df['text_flair'] = [sentence.labels[0].value for sentence in sentences]
df.head()

2024-12-14 16:15:45,316 https://nlp.informatik.hu-berlin.de/resources/models/sentiment-curated-distilbert/sentiment-en-mix-distillbert_4.pt not found in cache, downloading to C:\Users\RASYIQ~1\AppData\Local\Temp\tmpejencaa9


100%|██████████| 253M/253M [00:16<00:00, 15.9MB/s] 

2024-12-14 16:16:02,913 copying C:\Users\RASYIQ~1\AppData\Local\Temp\tmpejencaa9 to cache at C:\Users\Rasyiqah Rais\.flair\models\sentiment-en-mix-distillbert_4.pt





2024-12-14 16:16:03,386 removing temp file C:\Users\RASYIQ~1\AppData\Local\Temp\tmpejencaa9


Unnamed: 0,ID,Timestamp,User,Text,Hashtag,Retweets,Likes,Replies,Source,Location,Verified_Account,Followers,Following,text_blob,text_vader,text_flair
1,1211797371853705220,2019-12-30 23:53:02+00:00,LorettaOD1,Hail in Phoenix - no way! New opportunity at ...,['job'],0,0,0,"<a href=""http://jobvite.com"" rel=""nofollow"">Jo...",Arizona,False,63,129,"(0.06818181818181818, 0.7272727272727273)","{'neg': 0.062, 'neu': 0.618, 'pos': 0.32, 'com...",NEGATIVE
2,1211795775363145728,2019-12-30 23:46:41+00:00,guajardo_celina,Rackspace is hiring! We are looking for Nation...,"['becomearacker', 'Rackspace', 'recruiting', '...",0,0,0,"<a href=""http://jobvite.com"" rel=""nofollow"">Jo...","Texas, USA",False,19,50,"(0.5, 0.5)","{'neg': 0.0, 'neu': 0.863, 'pos': 0.137, 'comp...",POSITIVE
3,1211793355060981767,2019-12-30 23:37:04+00:00,SteveEckert_OTD,We are #hiring Administrative Assistant in Nan...,"['hiring', 'jobs', 'Nanuet']",0,0,0,"<a href=""http://www.ziprecruiter.com"" rel=""nof...","Orange County, California",False,966,1569,"(0.0, 0.0)","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",NEGATIVE
4,1211792689022349315,2019-12-30 23:34:25+00:00,HireLive,We are #hiring Family Service Counselor in Irv...,"['hiring', 'jobs', 'Irvine']",0,0,0,"<a href=""http://www.ziprecruiter.com"" rel=""nof...",United States,False,983,1251,"(0.0, 0.0)","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",NEGATIVE
5,1211792528388841473,2019-12-30 23:33:47+00:00,HireLive,We are #hiring Inside Sales Representative in ...,"['hiring', 'jobs', 'Irvine']",0,0,0,"<a href=""http://www.ziprecruiter.com"" rel=""nof...",United States,False,983,1251,"(0.0, 0.0)","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",NEGATIVE


In [34]:
df_text_flair = pd.DataFrame(df['text_flair'])
df_text_flair.head()

Unnamed: 0,text_flair
1,NEGATIVE
2,POSITIVE
3,NEGATIVE
4,NEGATIVE
5,NEGATIVE


In [35]:
# Placing 'Text' and text_flair side by side
flair_analysis = df[['Text','text_flair']]
flair_analysis.head()

Unnamed: 0,Text,text_flair
1,Hail in Phoenix - no way! New opportunity at ...,NEGATIVE
2,Rackspace is hiring! We are looking for Nation...,POSITIVE
3,We are #hiring Administrative Assistant in Nan...,NEGATIVE
4,We are #hiring Family Service Counselor in Irv...,NEGATIVE
5,We are #hiring Inside Sales Representative in ...,NEGATIVE


Let's do sentiment analysis using Transformers.

In [45]:
# Applying Transformers
sentiment_model = pipeline('sentiment-analysis', model='nlptown/bert-base-multilingual-uncased-sentiment')
def analyze_sentiment(text):
    result = sentiment_model(text)
    return result[0]['label']

df['text_bert'] = df['Text'].apply(analyze_sentiment)
df.head()




Unnamed: 0,ID,Timestamp,User,Text,Hashtag,Retweets,Likes,Replies,Source,Location,Verified_Account,Followers,Following,text_blob,text_vader,text_flair,text_bert
1,1211797371853705220,2019-12-30 23:53:02+00:00,LorettaOD1,Hail in Phoenix - no way! New opportunity at ...,['job'],0,0,0,"<a href=""http://jobvite.com"" rel=""nofollow"">Jo...",Arizona,False,63,129,"(0.06818181818181818, 0.7272727272727273)","{'neg': 0.062, 'neu': 0.618, 'pos': 0.32, 'com...",NEGATIVE,1 star
2,1211795775363145728,2019-12-30 23:46:41+00:00,guajardo_celina,Rackspace is hiring! We are looking for Nation...,"['becomearacker', 'Rackspace', 'recruiting', '...",0,0,0,"<a href=""http://jobvite.com"" rel=""nofollow"">Jo...","Texas, USA",False,19,50,"(0.5, 0.5)","{'neg': 0.0, 'neu': 0.863, 'pos': 0.137, 'comp...",POSITIVE,5 stars
3,1211793355060981767,2019-12-30 23:37:04+00:00,SteveEckert_OTD,We are #hiring Administrative Assistant in Nan...,"['hiring', 'jobs', 'Nanuet']",0,0,0,"<a href=""http://www.ziprecruiter.com"" rel=""nof...","Orange County, California",False,966,1569,"(0.0, 0.0)","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",NEGATIVE,5 stars
4,1211792689022349315,2019-12-30 23:34:25+00:00,HireLive,We are #hiring Family Service Counselor in Irv...,"['hiring', 'jobs', 'Irvine']",0,0,0,"<a href=""http://www.ziprecruiter.com"" rel=""nof...",United States,False,983,1251,"(0.0, 0.0)","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",NEGATIVE,5 stars
5,1211792528388841473,2019-12-30 23:33:47+00:00,HireLive,We are #hiring Inside Sales Representative in ...,"['hiring', 'jobs', 'Irvine']",0,0,0,"<a href=""http://www.ziprecruiter.com"" rel=""nof...",United States,False,983,1251,"(0.0, 0.0)","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",NEGATIVE,5 stars


In [46]:
df_text_bert = pd.DataFrame(df['text_bert'])
df_text_bert.head()

Unnamed: 0,text_bert
1,1 star
2,5 stars
3,5 stars
4,5 stars
5,5 stars


In [47]:
# Placing 'Text' and text_transformers side by side
bert_analysis = df[['Text','text_bert']]
bert_analysis.head()

Unnamed: 0,Text,text_bert
1,Hail in Phoenix - no way! New opportunity at ...,1 star
2,Rackspace is hiring! We are looking for Nation...,5 stars
3,We are #hiring Administrative Assistant in Nan...,5 stars
4,We are #hiring Family Service Counselor in Irv...,5 stars
5,We are #hiring Inside Sales Representative in ...,5 stars


Let's do sentiment analysis with DistilBERT

In [48]:
# Applying DistilBERT
classifier = pipeline("sentiment-analysis", model="distilbert-base-uncased", device=-1) 
df['text_distilbert'] = classifier(df['Text'].tolist(), truncation=True, max_length=512)
df['text_distilbert'] = df['text_distilbert'].apply(lambda x: x['label'])
df.head()

Unnamed: 0,ID,Timestamp,User,Text,Hashtag,Retweets,Likes,Replies,Source,Location,Verified_Account,Followers,Following,text_blob,text_vader,text_flair,text_bert,text_distilbert
1,1211797371853705220,2019-12-30 23:53:02+00:00,LorettaOD1,Hail in Phoenix - no way! New opportunity at ...,['job'],0,0,0,"<a href=""http://jobvite.com"" rel=""nofollow"">Jo...",Arizona,False,63,129,"(0.06818181818181818, 0.7272727272727273)","{'neg': 0.062, 'neu': 0.618, 'pos': 0.32, 'com...",NEGATIVE,1 star,LABEL_1
2,1211795775363145728,2019-12-30 23:46:41+00:00,guajardo_celina,Rackspace is hiring! We are looking for Nation...,"['becomearacker', 'Rackspace', 'recruiting', '...",0,0,0,"<a href=""http://jobvite.com"" rel=""nofollow"">Jo...","Texas, USA",False,19,50,"(0.5, 0.5)","{'neg': 0.0, 'neu': 0.863, 'pos': 0.137, 'comp...",POSITIVE,5 stars,LABEL_1
3,1211793355060981767,2019-12-30 23:37:04+00:00,SteveEckert_OTD,We are #hiring Administrative Assistant in Nan...,"['hiring', 'jobs', 'Nanuet']",0,0,0,"<a href=""http://www.ziprecruiter.com"" rel=""nof...","Orange County, California",False,966,1569,"(0.0, 0.0)","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",NEGATIVE,5 stars,LABEL_1
4,1211792689022349315,2019-12-30 23:34:25+00:00,HireLive,We are #hiring Family Service Counselor in Irv...,"['hiring', 'jobs', 'Irvine']",0,0,0,"<a href=""http://www.ziprecruiter.com"" rel=""nof...",United States,False,983,1251,"(0.0, 0.0)","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",NEGATIVE,5 stars,LABEL_1
5,1211792528388841473,2019-12-30 23:33:47+00:00,HireLive,We are #hiring Inside Sales Representative in ...,"['hiring', 'jobs', 'Irvine']",0,0,0,"<a href=""http://www.ziprecruiter.com"" rel=""nof...",United States,False,983,1251,"(0.0, 0.0)","{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",NEGATIVE,5 stars,LABEL_1


In [49]:
df_text_distilbert = pd.DataFrame(df['text_distilbert'])
df_text_distilbert.head()

Unnamed: 0,text_distilbert
1,LABEL_1
2,LABEL_1
3,LABEL_1
4,LABEL_1
5,LABEL_1


In [50]:
# Placing 'Text' and text_distilbert side by side
distilbert_analysis = df[['Text','text_distilbert']]
distilbert_analysis.head()

Unnamed: 0,Text,text_distilbert
1,Hail in Phoenix - no way! New opportunity at ...,LABEL_1
2,Rackspace is hiring! We are looking for Nation...,LABEL_1
3,We are #hiring Administrative Assistant in Nan...,LABEL_1
4,We are #hiring Family Service Counselor in Irv...,LABEL_1
5,We are #hiring Inside Sales Representative in ...,LABEL_1
